Merge tag 'kvmarm-fixes-5.11-1' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorPaolo Bonzini <pbonzini@redhat.com>
Fri, 8 Jan 2021 10:02:40 +0000 (05:02 -0500)
committerPaolo Bonzini <pbonzini@redhat.com>
Fri, 8 Jan 2021 10:02:40 +0000 (05:02 -0500)
KVM/arm64 fixes for 5.11, take #1

- VM init cleanups
- PSCI relay cleanups
- Kill CONFIG_KVM_ARM_PMU
- Fixup __init annotations
- Fixup reg_to_encoding()
- Fix spurious PMCR_EL0 access

1197 files changed:
.mailmap
CREDITS
Documentation/ABI/testing/sysfs-bus-iio-timer-stm32
Documentation/admin-guide/bootconfig.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/dev-tools/kunit/faq.rst
Documentation/dev-tools/kunit/style.rst
Documentation/dev-tools/kunit/usage.rst
Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml
Documentation/devicetree/bindings/net/can/tcan4x5x.txt
Documentation/devicetree/bindings/net/nfc/nxp-nci.txt
Documentation/devicetree/bindings/net/nfc/pn544.txt
Documentation/devicetree/bindings/sound/rt1015.txt
Documentation/driver-api/media/drivers/vidtv.rst
Documentation/kbuild/llvm.rst
Documentation/networking/netdev-FAQ.rst
Documentation/virt/kvm/api.rst
Documentation/virt/kvm/mmu.rst
Documentation/xtensa/mmu.rst
MAINTAINERS
Makefile
arch/Kconfig
arch/alpha/kernel/process.c
arch/arc/include/asm/bitops.h
arch/arc/include/asm/pgtable.h
arch/arc/kernel/stacktrace.c
arch/arc/mm/tlb.c
arch/arm/Kconfig
arch/arm/Makefile
arch/arm/boot/compressed/Makefile
arch/arm/boot/compressed/head.S
arch/arm/boot/dts/am437x-l4.dtsi
arch/arm/boot/dts/dra76x.dtsi
arch/arm/boot/dts/exynos4412-odroid-common.dtsi
arch/arm/boot/dts/imx50-evk.dts
arch/arm/boot/dts/imx6q-prti6q.dts
arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi
arch/arm/boot/dts/imx6qdl-udoo.dtsi
arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi
arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts
arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi
arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi
arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi
arch/arm/boot/dts/sun6i-a31-hummingbird.dts
arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts
arch/arm/boot/dts/sun7i-a20-bananapi.dts
arch/arm/boot/dts/sun7i-a20-cubietruck.dts
arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts
arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts
arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts
arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts
arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts
arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts
arch/arm/boot/dts/sun8i-s3-pinecube.dts
arch/arm/boot/dts/sun8i-v3s.dtsi
arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts
arch/arm/boot/dts/sun9i-a80-cubieboard4.dts
arch/arm/boot/dts/sun9i-a80-optimus.dts
arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi
arch/arm/boot/dts/vf610-zii-dev-rev-b.dts
arch/arm/configs/omap2plus_defconfig
arch/arm/include/asm/pgtable-2level.h
arch/arm/include/asm/pgtable-3level.h
arch/arm/kernel/process.c
arch/arm/mach-imx/anatop.c
arch/arm/mach-keystone/memory.h
arch/arm/mach-omap1/board-osk.c
arch/arm/mach-omap2/Kconfig
arch/arm/mach-omap2/cpuidle44xx.c
arch/arm/mach-sunxi/sunxi.c
arch/arm64/Kconfig
arch/arm64/Makefile
arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts
arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts
arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts
arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts
arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts
arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts
arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts
arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi
arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi
arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi
arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi
arch/arm64/boot/dts/freescale/imx8mm.dtsi
arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
arch/arm64/boot/dts/freescale/imx8mn-evk.dts
arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
arch/arm64/boot/dts/freescale/imx8mn.dtsi
arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi
arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts
arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi
arch/arm64/boot/dts/nvidia/tegra194.dtsi
arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi
arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts
arch/arm64/boot/dts/qcom/ipq6018.dtsi
arch/arm64/boot/dts/renesas/r8a774e1.dtsi
arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts
arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts
arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi
arch/arm64/boot/dts/rockchip/rk3399.dtsi
arch/arm64/include/asm/daifflags.h
arch/arm64/include/asm/esr.h
arch/arm64/include/asm/exception.h
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/pgtable.h
arch/arm64/include/asm/probes.h
arch/arm64/include/asm/ptrace.h
arch/arm64/include/asm/sysreg.h
arch/arm64/kernel/entry-common.c
arch/arm64/kernel/entry.S
arch/arm64/kernel/irq.c
arch/arm64/kernel/process.c
arch/arm64/kernel/sdei.c
arch/arm64/kernel/syscall.c
arch/arm64/kernel/traps.c
arch/arm64/kvm/hyp/nvhe/hyp.lds.S
arch/arm64/kvm/hyp/pgtable.c
arch/arm64/kvm/mmu.c
arch/arm64/kvm/vgic/vgic-mmio-v3.c
arch/arm64/mm/fault.c
arch/csky/kernel/process.c
arch/h8300/kernel/process.c
arch/hexagon/kernel/process.c
arch/ia64/include/asm/sparsemem.h
arch/ia64/kernel/process.c
arch/microblaze/kernel/process.c
arch/mips/alchemy/common/clock.c
arch/mips/include/asm/pgtable-32.h
arch/mips/kernel/idle.c
arch/mips/kernel/setup.c
arch/mips/mm/tlb-r4k.c
arch/nios2/kernel/process.c
arch/openrisc/kernel/process.c
arch/parisc/kernel/process.c
arch/powerpc/Kconfig
arch/powerpc/Makefile
arch/powerpc/include/asm/book3s/32/pgtable.h
arch/powerpc/include/asm/book3s/64/kup-radix.h
arch/powerpc/include/asm/book3s/64/mmu.h
arch/powerpc/include/asm/exception-64s.h
arch/powerpc/include/asm/feature-fixups.h
arch/powerpc/include/asm/kup.h
arch/powerpc/include/asm/mmzone.h
arch/powerpc/include/asm/nohash/32/pgtable.h
arch/powerpc/include/asm/security_features.h
arch/powerpc/include/asm/setup.h
arch/powerpc/include/asm/sparsemem.h
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/head_book3s_32.S
arch/powerpc/kernel/idle.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/syscall_64.c
arch/powerpc/kernel/vmlinux.lds.S
arch/powerpc/kvm/book3s_xive.c
arch/powerpc/kvm/book3s_xive_native.c
arch/powerpc/lib/feature-fixups.c
arch/powerpc/mm/Makefile
arch/powerpc/mm/book3s64/hash_native.c
arch/powerpc/mm/book3s64/mmu_context.c
arch/powerpc/mm/maccess.c [new file with mode: 0644]
arch/powerpc/mm/mem.c
arch/powerpc/mm/numa.c
arch/powerpc/platforms/powermac/smp.c
arch/powerpc/platforms/powernv/setup.c
arch/powerpc/platforms/powernv/smp.c
arch/powerpc/platforms/pseries/hotplug-cpu.c
arch/powerpc/platforms/pseries/mobility.c
arch/powerpc/platforms/pseries/msi.c
arch/powerpc/platforms/pseries/pseries.h
arch/powerpc/platforms/pseries/setup.c
arch/riscv/include/asm/pgtable-32.h
arch/riscv/include/asm/timex.h
arch/riscv/include/asm/vdso/processor.h
arch/riscv/kernel/process.c
arch/riscv/kernel/setup.c
arch/riscv/kernel/vdso/Makefile
arch/s390/configs/debug_defconfig
arch/s390/include/asm/kvm_host.h
arch/s390/kernel/asm-offsets.c
arch/s390/kernel/entry.S
arch/s390/kernel/idle.c
arch/s390/kernel/perf_cpum_sf.c
arch/s390/kernel/uv.c
arch/s390/kvm/guestdbg.c
arch/s390/kvm/intercept.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/priv.c
arch/s390/kvm/pv.c
arch/s390/kvm/vsie.c
arch/s390/lib/delay.c
arch/s390/mm/gmap.c
arch/s390/pci/pci_irq.c
arch/sh/kernel/idle.c
arch/sparc/kernel/leon_pmc.c
arch/sparc/kernel/process_32.c
arch/sparc/kernel/process_64.c
arch/sparc/lib/csum_copy.S
arch/um/kernel/process.c
arch/x86/Kconfig
arch/x86/Makefile
arch/x86/boot/compressed/Makefile
arch/x86/boot/compressed/sev-es.c
arch/x86/events/intel/cstate.c
arch/x86/events/intel/ds.c
arch/x86/events/intel/uncore.c
arch/x86/events/intel/uncore.h
arch/x86/events/rapl.c
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/insn.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/mwait.h
arch/x86/include/asm/pgtable_types.h
arch/x86/include/asm/sparsemem.h
arch/x86/include/asm/svm.h
arch/x86/include/asm/sync_core.h
arch/x86/include/asm/vmx.h
arch/x86/include/uapi/asm/kvm.h
arch/x86/include/uapi/asm/svm.h
arch/x86/include/uapi/asm/vmx.h
arch/x86/kernel/apic/vector.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/cpuid-deps.c
arch/x86/kernel/cpu/mce/core.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/cpu/resctrl/core.c
arch/x86/kernel/cpu/resctrl/internal.h
arch/x86/kernel/cpu/resctrl/monitor.c
arch/x86/kernel/cpu/resctrl/rdtgroup.c
arch/x86/kernel/cpu/scattered.c
arch/x86/kernel/cpu/vmware.c
arch/x86/kernel/dumpstack.c
arch/x86/kernel/kprobes/opt.c
arch/x86/kernel/kvmclock.c
arch/x86/kernel/process.c
arch/x86/kernel/tboot.c
arch/x86/kernel/uprobes.c
arch/x86/kvm/Kconfig
arch/x86/kvm/Makefile
arch/x86/kvm/cpuid.c
arch/x86/kvm/cpuid.h
arch/x86/kvm/hyperv.c
arch/x86/kvm/hyperv.h
arch/x86/kvm/irq.c
arch/x86/kvm/kvm_cache_regs.h
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu.h
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/mmutrace.h
arch/x86/kvm/mmu/spte.c
arch/x86/kvm/mmu/spte.h
arch/x86/kvm/mmu/tdp_mmu.c
arch/x86/kvm/mmu/tdp_mmu.h
arch/x86/kvm/mtrr.c
arch/x86/kvm/svm/avic.c
arch/x86/kvm/svm/nested.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/kvm/svm/vmenter.S
arch/x86/kvm/trace.h
arch/x86/kvm/vmx/evmcs.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmenter.S
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
arch/x86/lib/insn-eval.c
arch/x86/mm/mem_encrypt_identity.c
arch/x86/mm/numa.c
arch/x86/mm/tlb.c
arch/x86/platform/efi/efi_64.c
arch/x86/xen/spinlock.c
arch/xtensa/include/asm/pgtable.h
arch/xtensa/include/asm/uaccess.h
arch/xtensa/mm/cache.c
block/blk-cgroup.c
block/blk-flush.c
block/blk-merge.c
block/blk-settings.c
block/keyslot-manager.c
drivers/Makefile
drivers/accessibility/speakup/spk_ttyio.c
drivers/acpi/apei/apei-base.c
drivers/acpi/arm64/iort.c
drivers/acpi/fan.c
drivers/atm/nicstar.c
drivers/block/xen-blkback/blkback.c
drivers/block/xen-blkback/common.h
drivers/block/xen-blkback/xenbus.c
drivers/bus/ti-sysc.c
drivers/clk/imx/Kconfig
drivers/clk/renesas/r9a06g032-clocks.c
drivers/counter/ti-eqep.c
drivers/cpufreq/scmi-cpufreq.c
drivers/cpufreq/tegra186-cpufreq.c
drivers/cpuidle/cpuidle-tegra.c
drivers/dax/Kconfig
drivers/dma/dmaengine.c
drivers/dma/idxd/device.c
drivers/dma/idxd/idxd.h
drivers/dma/idxd/init.c
drivers/dma/idxd/registers.h
drivers/dma/idxd/submit.c
drivers/dma/ioat/dca.c
drivers/dma/pl330.c
drivers/dma/ti/k3-udma-private.c
drivers/dma/ti/omap-dma.c
drivers/dma/xilinx/xilinx_dma.c
drivers/firmware/efi/Kconfig
drivers/firmware/efi/efi.c
drivers/firmware/xilinx/zynqmp.c
drivers/fpga/Kconfig
drivers/gpio/gpio-arizona.c
drivers/gpio/gpio-dwapb.c
drivers/gpio/gpio-eic-sprd.c
drivers/gpio/gpio-mvebu.c
drivers/gpio/gpio-zynq.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
drivers/gpu/drm/amd/display/dc/core/dc_link.c
drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
drivers/gpu/drm/amd/pm/inc/smu10.h
drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
drivers/gpu/drm/ast/ast_mode.c
drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
drivers/gpu/drm/drm_gem_vram_helper.c
drivers/gpu/drm/exynos/Kconfig
drivers/gpu/drm/i915/display/intel_display.c
drivers/gpu/drm/i915/display/intel_dp.c
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
drivers/gpu/drm/i915/gt/intel_context.c
drivers/gpu/drm/i915/gt/intel_context_types.h
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/intel_mocs.c
drivers/gpu/drm/i915/gt/intel_rc6.c
drivers/gpu/drm/i915/gt/intel_rps.c
drivers/gpu/drm/i915/gt/intel_workarounds.c
drivers/gpu/drm/i915/gt/shmem_utils.c
drivers/gpu/drm/i915/gvt/display.c
drivers/gpu/drm/i915/gvt/gvt.h
drivers/gpu/drm/i915/gvt/kvmgt.c
drivers/gpu/drm/i915/gvt/vgpu.c
drivers/gpu/drm/i915/i915_perf.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/i915_request.h
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/selftests/i915_gem.c
drivers/gpu/drm/i915/selftests/i915_request.c
drivers/gpu/drm/mediatek/mtk_dpi.c
drivers/gpu/drm/mediatek/mtk_dsi.c
drivers/gpu/drm/mxsfb/mxsfb_kms.c
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/nouveau/nouveau_gem.c
drivers/gpu/drm/omapdrm/dss/sdi.c
drivers/gpu/drm/panel/panel-sony-acx565akm.c
drivers/gpu/drm/rockchip/rockchip_lvds.c
drivers/gpu/drm/sun4i/sun4i_backend.c
drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c
drivers/gpu/drm/tegra/drm.c
drivers/gpu/drm/tegra/output.c
drivers/gpu/drm/tegra/sor.c
drivers/gpu/drm/vc4/vc4_drv.h
drivers/gpu/drm/vc4/vc4_hdmi.c
drivers/gpu/drm/vc4/vc4_hdmi.h
drivers/gpu/drm/vc4/vc4_kms.c
drivers/hid/hid-cypress.c
drivers/hid/hid-ids.h
drivers/hid/hid-input.c
drivers/hid/hid-ite.c
drivers/hid/hid-logitech-dj.c
drivers/hid/hid-logitech-hidpp.c
drivers/hid/hid-mcp2221.c
drivers/hid/hid-quirks.c
drivers/hid/hid-sensor-hub.c
drivers/hid/hid-uclogic-core.c
drivers/hid/hid-uclogic-params.c
drivers/hid/i2c-hid/i2c-hid-core.c
drivers/hv/hv.c
drivers/i2c/busses/Kconfig
drivers/i2c/busses/i2c-imx.c
drivers/i2c/busses/i2c-mlxbf.c
drivers/i2c/busses/i2c-qcom-cci.c
drivers/i2c/busses/i2c-qup.c
drivers/idle/intel_idle.c
drivers/iio/accel/kxcjk-1013.c
drivers/iio/adc/ingenic-adc.c
drivers/iio/adc/mt6577_auxadc.c
drivers/iio/adc/stm32-adc-core.c
drivers/iio/adc/stm32-adc.c
drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c
drivers/iio/light/Kconfig
drivers/infiniband/Kconfig
drivers/infiniband/core/cache.c
drivers/infiniband/core/cm.c
drivers/infiniband/hw/efa/efa_verbs.c
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/hfi1/file_ops.c
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/mmu_rb.c
drivers/infiniband/hw/hfi1/mmu_rb.h
drivers/infiniband/hw/hfi1/user_exp_rcv.c
drivers/infiniband/hw/hfi1/user_exp_rcv.h
drivers/infiniband/hw/hfi1/user_sdma.c
drivers/infiniband/hw/hfi1/user_sdma.h
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.h
drivers/infiniband/hw/i40iw/i40iw_main.c
drivers/infiniband/hw/i40iw/i40iw_verbs.c
drivers/infiniband/hw/mthca/mthca_cq.c
drivers/infiniband/hw/qedr/verbs.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
drivers/infiniband/sw/rdmavt/Kconfig
drivers/infiniband/sw/rxe/Kconfig
drivers/infiniband/sw/siw/Kconfig
drivers/input/joystick/xpad.c
drivers/input/keyboard/cros_ec_keyb.c
drivers/input/keyboard/sunkbd.c
drivers/input/misc/adxl34x.c
drivers/input/misc/cm109.c
drivers/input/misc/soc_button_array.c
drivers/input/mouse/elan_i2c.h
drivers/input/mouse/elan_i2c_core.c
drivers/input/mouse/elan_i2c_i2c.c
drivers/input/mouse/elan_i2c_smbus.c
drivers/input/serio/i8042-x86ia64io.h
drivers/input/serio/i8042.c
drivers/input/touchscreen/Kconfig
drivers/input/touchscreen/atmel_mxt_ts.c
drivers/input/touchscreen/goodix.c
drivers/input/touchscreen/raydium_i2c_ts.c
drivers/interconnect/core.c
drivers/interconnect/qcom/msm8916.c
drivers/interconnect/qcom/msm8974.c
drivers/interconnect/qcom/qcs404.c
drivers/iommu/amd/amd_iommu_types.h
drivers/iommu/amd/init.c
drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
drivers/iommu/intel/dmar.c
drivers/iommu/intel/iommu.c
drivers/iommu/iommu.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-sni-exiu.c
drivers/md/dm-cache-target.c
drivers/md/dm-integrity.c
drivers/md/dm-raid.c
drivers/md/dm-table.c
drivers/md/dm-writecache.c
drivers/md/dm.c
drivers/md/md.c
drivers/md/md.h
drivers/md/raid0.c
drivers/md/raid10.c
drivers/md/raid10.h
drivers/media/cec/usb/pulse8/pulse8-cec.c
drivers/media/common/videobuf2/videobuf2-core.c
drivers/media/platform/Kconfig
drivers/media/platform/marvell-ccic/mmp-driver.c
drivers/media/platform/mtk-vcodec/Makefile
drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c
drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
drivers/media/platform/mtk-vcodec/mtk_vcodec_fw.c
drivers/media/platform/mtk-vcodec/mtk_vcodec_fw.h
drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_priv.h [new file with mode: 0644]
drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_scp.c [new file with mode: 0644]
drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_vpu.c [new file with mode: 0644]
drivers/media/platform/qcom/venus/core.h
drivers/media/platform/qcom/venus/pm_helpers.c
drivers/media/platform/qcom/venus/venc.c
drivers/media/platform/qcom/venus/venc_ctrls.c
drivers/media/rc/mtk-cir.c
drivers/media/test-drivers/vidtv/vidtv_bridge.c
drivers/media/test-drivers/vidtv/vidtv_bridge.h
drivers/media/test-drivers/vidtv/vidtv_channel.c
drivers/media/test-drivers/vidtv/vidtv_channel.h
drivers/media/test-drivers/vidtv/vidtv_common.h
drivers/media/test-drivers/vidtv/vidtv_demod.c
drivers/media/test-drivers/vidtv/vidtv_demod.h
drivers/media/test-drivers/vidtv/vidtv_encoder.h
drivers/media/test-drivers/vidtv/vidtv_mux.c
drivers/media/test-drivers/vidtv/vidtv_mux.h
drivers/media/test-drivers/vidtv/vidtv_pes.c
drivers/media/test-drivers/vidtv/vidtv_pes.h
drivers/media/test-drivers/vidtv/vidtv_psi.c
drivers/media/test-drivers/vidtv/vidtv_psi.h
drivers/media/test-drivers/vidtv/vidtv_s302m.c
drivers/media/test-drivers/vidtv/vidtv_s302m.h
drivers/media/test-drivers/vidtv/vidtv_ts.c
drivers/media/test-drivers/vidtv/vidtv_ts.h
drivers/media/test-drivers/vidtv/vidtv_tuner.c
drivers/media/test-drivers/vidtv/vidtv_tuner.h
drivers/misc/eeprom/at24.c
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/memory.c
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/mei/Kconfig
drivers/misc/mei/Makefile
drivers/misc/mei/hw-virtio.c [deleted file]
drivers/mmc/core/block.c
drivers/mmc/host/mtk-sd.c
drivers/mmc/host/sdhci-of-arasan.c
drivers/mmc/host/sdhci-pci-core.c
drivers/mmc/host/tmio_mmc_core.c
drivers/mtd/nand/raw/ams-delta.c
drivers/mtd/nand/raw/au1550nd.c
drivers/mtd/nand/raw/cs553x_nand.c
drivers/mtd/nand/raw/davinci_nand.c
drivers/mtd/nand/raw/diskonchip.c
drivers/mtd/nand/raw/fsmc_nand.c
drivers/mtd/nand/raw/gpio.c
drivers/mtd/nand/raw/lpc32xx_mlc.c
drivers/mtd/nand/raw/lpc32xx_slc.c
drivers/mtd/nand/raw/mpc5121_nfc.c
drivers/mtd/nand/raw/orion_nand.c
drivers/mtd/nand/raw/pasemi_nand.c
drivers/mtd/nand/raw/plat_nand.c
drivers/mtd/nand/raw/r852.c
drivers/mtd/nand/raw/r852.h
drivers/mtd/nand/raw/sharpsl.c
drivers/mtd/nand/raw/socrates_nand.c
drivers/mtd/nand/raw/tmio_nand.c
drivers/mtd/nand/raw/txx9ndfmc.c
drivers/mtd/nand/raw/xway_nand.c
drivers/net/bonding/bond_main.c
drivers/net/bonding/bond_options.c
drivers/net/bonding/bond_sysfs_slave.c
drivers/net/can/c_can/c_can.c
drivers/net/can/dev.c
drivers/net/can/flexcan.c
drivers/net/can/kvaser_pciefd.c
drivers/net/can/m_can/Kconfig
drivers/net/can/m_can/m_can.c
drivers/net/can/m_can/m_can.h
drivers/net/can/m_can/m_can_platform.c
drivers/net/can/m_can/tcan4x5x.c
drivers/net/can/sja1000/sja1000.c
drivers/net/can/softing/softing_main.c
drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
drivers/net/can/sun4i_can.c
drivers/net/can/ti_hecc.c
drivers/net/can/usb/gs_usb.c
drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
drivers/net/can/usb/mcba_usb.c
drivers/net/can/usb/peak_usb/pcan_usb_core.c
drivers/net/dsa/lantiq_gswip.c
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/global1.c
drivers/net/dsa/mv88e6xxx/global1.h
drivers/net/dsa/mv88e6xxx/global1_vtu.c
drivers/net/dsa/ocelot/felix.c
drivers/net/dsa/ocelot/felix_vsc9959.c
drivers/net/dsa/ocelot/seville_vsc9953.c
drivers/net/ethernet/agere/Kconfig
drivers/net/ethernet/amazon/ena/ena_eth_com.c
drivers/net/ethernet/amazon/ena/ena_netdev.c
drivers/net/ethernet/aquantia/atlantic/aq_ring.c
drivers/net/ethernet/atheros/atl1c/atl1c_main.c
drivers/net/ethernet/atheros/atl1e/atl1e_main.c
drivers/net/ethernet/broadcom/Kconfig
drivers/net/ethernet/broadcom/b44.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
drivers/net/ethernet/cadence/Kconfig
drivers/net/ethernet/chelsio/Kconfig
drivers/net/ethernet/chelsio/cxgb3/sge.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c
drivers/net/ethernet/faraday/Kconfig
drivers/net/ethernet/faraday/ftgmac100.c
drivers/net/ethernet/freescale/Kconfig
drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
drivers/net/ethernet/freescale/dpaa2/Kconfig
drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
drivers/net/ethernet/freescale/enetc/Kconfig
drivers/net/ethernet/freescale/enetc/enetc.c
drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
drivers/net/ethernet/freescale/enetc/enetc_hw.h
drivers/net/ethernet/freescale/enetc/enetc_mdio.c
drivers/net/ethernet/freescale/enetc/enetc_qos.c
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/fman/Kconfig
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/ibm/ibmvnic.h
drivers/net/ethernet/intel/e1000e/netdev.c
drivers/net/ethernet/intel/i40e/i40e.h
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/ice/ice_txrx.c
drivers/net/ethernet/intel/igb/igb.h
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
drivers/net/ethernet/marvell/prestera/prestera_main.c
drivers/net/ethernet/marvell/prestera/prestera_pci.c
drivers/net/ethernet/mediatek/mtk_star_emac.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx4/fw.c
drivers/net/ethernet/mellanox/mlx4/fw.h
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
drivers/net/ethernet/mellanox/mlx5/core/Kconfig
drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
drivers/net/ethernet/mellanox/mlxsw/Kconfig
drivers/net/ethernet/mellanox/mlxsw/core.c
drivers/net/ethernet/microchip/Kconfig
drivers/net/ethernet/microchip/lan743x_main.c
drivers/net/ethernet/mscc/ocelot.c
drivers/net/ethernet/mscc/ocelot_vsc7514.c
drivers/net/ethernet/netronome/Kconfig
drivers/net/ethernet/netronome/nfp/nfp_net_common.c
drivers/net/ethernet/nxp/Kconfig
drivers/net/ethernet/pasemi/pasemi_mac.c
drivers/net/ethernet/qlogic/qed/qed_cxt.c
drivers/net/ethernet/qlogic/qed/qed_cxt.h
drivers/net/ethernet/qlogic/qed/qed_iwarp.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
drivers/net/ethernet/rocker/Kconfig
drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/ti/am65-cpts.c
drivers/net/ethernet/ti/cpsw.c
drivers/net/ethernet/ti/cpsw_new.c
drivers/net/ethernet/ti/cpsw_priv.c
drivers/net/ethernet/xilinx/ll_temac_main.c
drivers/net/geneve.c
drivers/net/ipa/gsi_trans.c
drivers/net/netdevsim/bpf.c
drivers/net/netdevsim/dev.c
drivers/net/netdevsim/health.c
drivers/net/netdevsim/netdevsim.h
drivers/net/netdevsim/udp_tunnels.c
drivers/net/phy/mscc/mscc_macsec.c
drivers/net/phy/smsc.c
drivers/net/tun.c
drivers/net/usb/cx82310_eth.c
drivers/net/usb/ipheth.c
drivers/net/usb/qmi_wwan.c
drivers/net/vrf.c
drivers/net/vxlan.c
drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h
drivers/net/wireless/intel/iwlwifi/iwl-config.h
drivers/net/wireless/intel/iwlwifi/iwl-csr.h
drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
drivers/net/wireless/intel/iwlwifi/mvm/sta.c
drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
drivers/net/wireless/intel/iwlwifi/pcie/drv.c
drivers/net/wireless/intel/iwlwifi/pcie/trans.c
drivers/net/wireless/mediatek/mt76/usb.c
drivers/net/wireless/realtek/rtw88/debug.c
drivers/net/wireless/realtek/rtw88/fw.c
drivers/nfc/s3fwrn5/i2c.c
drivers/nvme/host/core.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
drivers/phy/intel/Kconfig
drivers/phy/mediatek/Kconfig
drivers/phy/motorola/phy-cpcap-usb.c
drivers/phy/qualcomm/Kconfig
drivers/phy/qualcomm/phy-qcom-qmp.c
drivers/phy/tegra/xusb.c
drivers/pinctrl/aspeed/pinctrl-aspeed.c
drivers/pinctrl/aspeed/pinmux-aspeed.h
drivers/pinctrl/intel/pinctrl-baytrail.c
drivers/pinctrl/intel/pinctrl-intel.c
drivers/pinctrl/intel/pinctrl-jasperlake.c
drivers/pinctrl/intel/pinctrl-merrifield.c
drivers/pinctrl/pinctrl-amd.c
drivers/platform/x86/acer-wmi.c
drivers/platform/x86/intel-vbtn.c
drivers/platform/x86/thinkpad_acpi.c
drivers/platform/x86/toshiba_acpi.c
drivers/platform/x86/touchscreen_dmi.c
drivers/ptp/ptp_clockmatrix.c
drivers/pwm/pwm-sl28cpld.c
drivers/regulator/core.c
drivers/regulator/pfuze100-regulator.c
drivers/regulator/ti-abb-regulator.c
drivers/s390/block/dasd.c
drivers/s390/net/qeth_core.h
drivers/s390/net/qeth_core_main.c
drivers/s390/net/qeth_l2_main.c
drivers/scsi/be2iscsi/be_main.c
drivers/scsi/bnx2i/Kconfig
drivers/scsi/hisi_sas/hisi_sas_main.c
drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
drivers/scsi/libiscsi.c
drivers/scsi/megaraid/megaraid_sas_base.c
drivers/scsi/megaraid/megaraid_sas_fusion.c
drivers/scsi/mpt3sas/mpt3sas_base.c
drivers/scsi/mpt3sas/mpt3sas_ctl.c
drivers/scsi/scsi_lib.c
drivers/scsi/storvsc_drv.c
drivers/scsi/ufs/ufshcd.c
drivers/soc/fsl/dpio/dpio-driver.c
drivers/spi/spi-bcm-qspi.c
drivers/spi/spi-bcm2835.c
drivers/spi/spi-bcm2835aux.c
drivers/spi/spi-cadence-quadspi.c
drivers/spi/spi-dw-core.c
drivers/spi/spi-fsi.c
drivers/spi/spi-fsl-lpspi.c
drivers/spi/spi-imx.c
drivers/spi/spi-npcm-fiu.c
drivers/spi/spi-nxp-fspi.c
drivers/spi/spi.c
drivers/staging/media/sunxi/cedrus/cedrus_h264.c
drivers/staging/mt7621-pci/pci-mt7621.c
drivers/staging/ralink-gdma/Kconfig
drivers/staging/rtl8723bs/os_dep/sdio_intf.c
drivers/target/iscsi/iscsi_target.c
drivers/tee/amdtee/amdtee_private.h
drivers/tee/amdtee/core.c
drivers/tee/optee/call.c
drivers/thermal/ti-soc-thermal/ti-bandgap.c
drivers/thunderbolt/icm.c
drivers/tty/serial/ar933x_uart.c
drivers/tty/serial/imx.c
drivers/tty/tty_io.c
drivers/tty/tty_jobctrl.c
drivers/usb/cdns3/core.c
drivers/usb/cdns3/gadget.c
drivers/usb/core/devio.c
drivers/usb/core/quirks.c
drivers/usb/gadget/function/f_fs.c
drivers/usb/gadget/function/f_midi.c
drivers/usb/gadget/legacy/inode.c
drivers/usb/host/ohci-omap.c
drivers/usb/serial/ch341.c
drivers/usb/serial/kl5kusb105.c
drivers/usb/serial/option.c
drivers/usb/storage/scsiglue.c
drivers/usb/storage/uas.c
drivers/usb/storage/usb.c
drivers/usb/typec/Kconfig
drivers/usb/typec/stusb160x.c
drivers/vdpa/Kconfig
drivers/vfio/virqfd.c
drivers/vhost/scsi.c
drivers/vhost/vdpa.c
drivers/vhost/vhost.c
drivers/vhost/vhost.h
drivers/vhost/vringh.c
drivers/video/fbdev/hyperv_fb.c
drivers/xen/grant-table.c
drivers/xen/unpopulated-alloc.c
drivers/xen/xen-scsiback.c
fs/9p/vfs_file.c
fs/afs/dir.c
fs/afs/inode.c
fs/afs/internal.h
fs/afs/super.c
fs/btrfs/ctree.h
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/qgroup.c
fs/btrfs/tests/inode-tests.c
fs/btrfs/tree-checker.c
fs/btrfs/volumes.c
fs/cifs/cifsacl.c
fs/cifs/connect.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cifs/smb2pdu.h
fs/cifs/transport.c
fs/coredump.c
fs/efivarfs/inode.c
fs/eventfd.c
fs/ext4/ext4.h
fs/ext4/super.c
fs/gfs2/glock.c
fs/gfs2/glops.c
fs/gfs2/incore.h
fs/gfs2/inode.c
fs/gfs2/rgrp.c
fs/io_uring.c
fs/jbd2/journal.c
fs/jbd2/transaction.c
fs/libfs.c
fs/nfs/Kconfig
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/nfs42proc.c
fs/nfs/nfs42xdr.c
fs/nfs/nfs4file.c
fs/nfs/nfs4proc.c
fs/nfs/pagelist.c
fs/notify/fsnotify.c
fs/proc/self.c
fs/proc/task_mmu.c
fs/seq_file.c
fs/xfs/libxfs/xfs_attr_leaf.c
fs/xfs/libxfs/xfs_rmap_btree.c
fs/xfs/scrub/bmap.c
fs/xfs/scrub/btree.c
fs/xfs/scrub/dir.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iwalk.c
fs/xfs/xfs_mount.c
fs/zonefs/super.c
include/kunit/test.h
include/linux/blkdev.h
include/linux/bootconfig.h
include/linux/build_bug.h
include/linux/compiler-clang.h
include/linux/elfcore.h
include/linux/eventfd.h
include/linux/firmware/xlnx-zynqmp.h
include/linux/intel-iommu.h
include/linux/irqdomain.h
include/linux/jbd2.h
include/linux/kvm_dirty_ring.h [new file with mode: 0644]
include/linux/kvm_host.h
include/linux/memcontrol.h
include/linux/memory_hotplug.h
include/linux/mlx5/mlx5_ifc.h
include/linux/netdevice.h
include/linux/netfilter/x_tables.h
include/linux/nfs_page.h
include/linux/numa.h
include/linux/pagemap.h
include/linux/pgtable.h
include/linux/platform_data/ti-sysc.h
include/linux/pm_runtime.h
include/linux/sched.h
include/linux/security.h
include/linux/spi/spi.h
include/linux/stmmac.h
include/linux/swiotlb.h
include/linux/tty.h
include/linux/wait.h
include/linux/zsmalloc.h
include/net/bonding.h
include/net/inet_ecn.h
include/net/inet_hashtables.h
include/net/ip_tunnels.h
include/net/ipv6_frag.h
include/net/neighbour.h
include/net/netfilter/nf_tables.h
include/net/netfilter/nf_tables_offload.h
include/net/tls.h
include/net/xdp.h
include/net/xdp_sock.h
include/scsi/libiscsi.h
include/soc/mscc/ocelot.h
include/sound/rt1015.h [new file with mode: 0644]
include/trace/events/kvm.h
include/trace/events/sunrpc.h
include/trace/events/writeback.h
include/uapi/linux/bpf.h
include/uapi/linux/devlink.h
include/uapi/linux/kvm.h
include/uapi/linux/openvswitch.h
include/uapi/linux/stat.h
include/xen/grant_table.h
init/Kconfig
init/initramfs.c
init/main.c
kernel/Makefile
kernel/bpf/helpers.c
kernel/bpf/verifier.c
kernel/cpu.c
kernel/elfcore.c [deleted file]
kernel/fail_function.c
kernel/irq/irqdomain.c
kernel/locking/lockdep.c
kernel/printk/printk.c
kernel/printk/printk_ringbuffer.c
kernel/ptrace.c
kernel/rcu/tree_stall.h
kernel/sched/core.c
kernel/sched/deadline.c
kernel/sched/fair.c
kernel/sched/idle.c
kernel/sched/membarrier.c
kernel/sched/wait.c
kernel/seccomp.c
kernel/trace/Kconfig
kernel/trace/bpf_trace.c
kernel/trace/ftrace.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace_hwlat.c
lib/Makefile
lib/strncpy_from_user.c
lib/syscall.c
lib/zlib_dfltcc/dfltcc_inflate.c
mm/Kconfig
mm/filemap.c
mm/huge_memory.c
mm/hugetlb.c
mm/hugetlb_cgroup.c
mm/kasan/quarantine.c
mm/list_lru.c
mm/madvise.c
mm/memcontrol.c
mm/memory_hotplug.c
mm/mmap.c
mm/page-writeback.c
mm/page_alloc.c
mm/slab.h
mm/swapfile.c
mm/zsmalloc.c
net/batman-adv/fragmentation.c
net/batman-adv/hard-interface.c
net/batman-adv/log.c
net/bridge/br_device.c
net/bridge/br_multicast.c
net/bridge/br_netfilter_hooks.c
net/bridge/br_private.h
net/bridge/br_vlan.c
net/can/af_can.c
net/can/isotp.c
net/core/dev.c
net/core/devlink.c
net/core/flow_offload.c
net/core/gro_cells.c
net/core/lwt_bpf.c
net/core/neighbour.c
net/core/netpoll.c
net/core/skbuff.c
net/core/skmsg.c
net/core/xdp.c
net/dccp/ipv4.c
net/dccp/ipv6.c
net/ethtool/bitset.c
net/ipv4/arp.c
net/ipv4/fib_frontend.c
net/ipv4/inet_connection_sock.c
net/ipv4/inet_diag.c
net/ipv4/inet_hashtables.c
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/route.c
net/ipv4/tcp_bbr.c
net/ipv4/tcp_bpf.c
net/ipv4/tcp_cong.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_output.c
net/ipv4/udp.c
net/ipv6/addrconf.c
net/ipv6/addrlabel.c
net/ipv6/ah6.c
net/ipv6/ip6_gre.c
net/ipv6/ndisc.c
net/ipv6/netfilter/ip6_tables.c
net/ipv6/netfilter/nf_conntrack_reasm.c
net/ipv6/reassembly.c
net/ipv6/tcp_ipv6.c
net/iucv/af_iucv.c
net/mac80211/iface.c
net/mac80211/mesh_pathtbl.c
net/mac80211/rc80211_minstrel.c
net/mac80211/rc80211_minstrel.h
net/mac80211/sta_info.c
net/mac80211/status.c
net/mac80211/util.c
net/mptcp/mib.c
net/mptcp/subflow.c
net/ncsi/ncsi-manage.c
net/ncsi/ncsi-netlink.c
net/ncsi/ncsi-netlink.h
net/netfilter/ipset/ip_set_core.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/nf_tables_api.c
net/netfilter/nf_tables_offload.c
net/netfilter/nft_cmp.c
net/netfilter/nft_ct.c
net/netfilter/nft_dynset.c
net/netfilter/nft_meta.c
net/netfilter/nft_payload.c
net/netfilter/x_tables.c
net/netlabel/netlabel_unlabeled.c
net/openvswitch/actions.c
net/openvswitch/flow_netlink.c
net/packet/af_packet.c
net/rfkill/core.c
net/rose/rose_loopback.c
net/sched/act_mpls.c
net/sched/cls_flower.c
net/sched/sch_fq_pie.c
net/sctp/input.c
net/sctp/sm_sideeffect.c
net/sctp/transport.c
net/smc/af_smc.c
net/smc/smc_core.c
net/smc/smc_ib.c
net/tipc/node.c
net/tls/tls_device.c
net/tls/tls_sw.c
net/vmw_vsock/af_vsock.c
net/vmw_vsock/virtio_transport_common.c
net/wireless/nl80211.c
net/x25/af_x25.c
net/xdp/xdp_umem.c
net/xdp/xdp_umem.h
net/xdp/xsk.c
net/xdp/xsk_buff_pool.c
net/xdp/xsk_queue.h
net/xfrm/xfrm_compat.c
net/xfrm/xfrm_state.c
samples/ftrace/ftrace-direct-modify.c
samples/ftrace/ftrace-direct-too.c
samples/ftrace/ftrace-direct.c
scripts/Makefile.build
scripts/Makefile.extrawarn
scripts/lld-version.sh [new file with mode: 0755]
scripts/package/builddeb
sound/core/control.c
sound/firewire/fireworks/fireworks_transaction.c
sound/pci/hda/hda_generic.c
sound/pci/hda/hda_generic.h
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_ca0132.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/pci/mixart/mixart_core.c
sound/soc/codecs/rt1015.c
sound/soc/codecs/rt1015.h
sound/soc/codecs/rt5682.c
sound/soc/codecs/wm_adsp.c
sound/soc/intel/boards/bytcr_rt5640.c
sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
sound/soc/intel/catpt/pcm.c
sound/soc/intel/keembay/kmb_platform.c
sound/soc/qcom/lpass-cpu.c
sound/soc/qcom/lpass-lpaif-reg.h
sound/soc/qcom/lpass-platform.c
sound/soc/qcom/lpass.h
sound/usb/card.c
sound/usb/mixer_maps.c
sound/usb/mixer_us16x08.c
sound/usb/quirks.c
tools/arch/x86/include/asm/insn.h
tools/arch/x86/lib/memcpy_64.S
tools/arch/x86/lib/memset_64.S
tools/bootconfig/main.c
tools/bootconfig/test-bootconfig.sh
tools/bpf/bpftool/btf.c
tools/bpf/bpftool/net.c
tools/bpf/bpftool/pids.c
tools/include/uapi/linux/bpf.h
tools/kvm/kvm_stat/kvm_stat
tools/lib/bpf/Makefile
tools/lib/bpf/libbpf.c
tools/lib/bpf/ringbuf.c
tools/perf/arch/x86/tests/dwarf-unwind.c
tools/perf/bench/mem-memcpy-x86-64-asm.S
tools/perf/bench/mem-memset-x86-64-asm.S
tools/perf/builtin-diff.c
tools/perf/builtin-inject.c
tools/perf/builtin-lock.c
tools/perf/tests/shell/test_arm_coresight.sh
tools/perf/util/dwarf-aux.c
tools/perf/util/hashmap.h
tools/perf/util/include/linux/linkage.h
tools/perf/util/probe-finder.c
tools/perf/util/stat-display.c
tools/perf/util/synthetic-events.c
tools/testing/ktest/ktest.pl
tools/testing/kunit/.gitattributes [deleted file]
tools/testing/kunit/kunit.py
tools/testing/kunit/kunit_kernel.py
tools/testing/kunit/kunit_parser.py
tools/testing/kunit/kunit_tool_test.py
tools/testing/scatterlist/linux/mm.h
tools/testing/scatterlist/main.c
tools/testing/selftests/bpf/prog_tests/align.c
tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/ringbuf.c
tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
tools/testing/selftests/bpf/prog_tests/subprogs.c
tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
tools/testing/selftests/bpf/progs/test_global_func8.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_probe_read_user_str.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_subprogs_unused.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_offload.py
tools/testing/selftests/bpf/verifier/array_access.c
tools/testing/selftests/bpf/verifier/bounds.c
tools/testing/selftests/kvm/.gitignore
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/demand_paging_test.c
tools/testing/selftests/kvm/dirty_log_perf_test.c
tools/testing/selftests/kvm/dirty_log_test.c
tools/testing/selftests/kvm/include/guest_modes.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/kvm_util.h
tools/testing/selftests/kvm/include/perf_test_util.h
tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86_64/processor.h
tools/testing/selftests/kvm/include/x86_64/vmx.h
tools/testing/selftests/kvm/lib/aarch64/processor.c
tools/testing/selftests/kvm/lib/guest_modes.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/lib/kvm_util_internal.h
tools/testing/selftests/kvm/lib/perf_test_util.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/s390x/processor.c
tools/testing/selftests/kvm/lib/x86_64/processor.c
tools/testing/selftests/kvm/s390x/sync_regs_test.c
tools/testing/selftests/kvm/set_memory_region_test.c
tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
tools/testing/selftests/kvm/x86_64/debug_regs.c
tools/testing/selftests/kvm/x86_64/evmcs_test.c
tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
tools/testing/selftests/kvm/x86_64/set_sregs_test.c
tools/testing/selftests/kvm/x86_64/smm_test.c
tools/testing/selftests/kvm/x86_64/state_test.c
tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
tools/testing/selftests/kvm/x86_64/user_msr_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
tools/testing/selftests/net/fcnal-test.sh
tools/testing/selftests/net/udpgso_bench_rx.c
tools/testing/selftests/powerpc/include/utils.h
tools/testing/selftests/powerpc/security/.gitignore
tools/testing/selftests/powerpc/security/Makefile
tools/testing/selftests/powerpc/security/entry_flush.c [new file with mode: 0644]
tools/testing/selftests/powerpc/security/flush_utils.c [new file with mode: 0644]
tools/testing/selftests/powerpc/security/flush_utils.h [new file with mode: 0644]
tools/testing/selftests/powerpc/security/rfi_flush.c
tools/testing/selftests/seccomp/seccomp_bpf.c
tools/testing/selftests/tc-testing/config
tools/testing/selftests/vm/Makefile
tools/testing/selftests/vm/userfaultfd.c
virt/kvm/dirty_ring.c [new file with mode: 0644]
virt/kvm/eventfd.c
virt/kvm/kvm_main.c

index 505b3d7..225546c 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -290,6 +290,7 @@ Santosh Shilimkar <ssantosh@kernel.org>
 Sarangdhar Joshi <spjoshi@codeaurora.org>
 Sascha Hauer <s.hauer@pengutronix.de>
 S.ÇaÄŸlar Onur <caglar@pardus.org.tr>
+Sean Christopherson <seanjc@google.com> <sean.j.christopherson@intel.com>
 Sean Nyekjaer <sean@geanix.com> <sean.nyekjaer@prevas.dk>
 Sebastian Reichel <sre@kernel.org> <sebastian.reichel@collabora.co.uk>
 Sebastian Reichel <sre@kernel.org> <sre@debian.org>
@@ -321,6 +322,8 @@ TripleX Chung <xxx.phy@gmail.com> <zhongyu@18mail.cn>
 Tsuneo Yoshioka <Tsuneo.Yoshioka@f-secure.com>
 Tycho Andersen <tycho@tycho.pizza> <tycho@tycho.ws>
 Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de>
+Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Uwe Kleine-König <ukleinek@strlen.de>
 Uwe Kleine-König <ukl@pengutronix.de>
 Uwe Kleine-König <Uwe.Kleine-Koenig@digi.com>
 Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
diff --git a/CREDITS b/CREDITS
index 8592e45..e88d1a7 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -98,7 +98,7 @@ N: Erik Andersen
 E: andersen@codepoet.org
 W: https://www.codepoet.org/
 P: 1024D/30D39057 1BC4 2742 E885 E4DE 9301  0C82 5F9B 643E 30D3 9057
-D: Maintainer of ide-cd and Uniform CD-ROM driver, 
+D: Maintainer of ide-cd and Uniform CD-ROM driver,
 D: ATAPI CD-Changer support, Major 2.1.x CD-ROM update.
 S: 352 North 525 East
 S: Springville, Utah 84663
@@ -263,7 +263,7 @@ N: Paul Barton-Davis
 E: pbd@op.net
 D: Driver for WaveFront soundcards (Turtle Beach Maui, Tropez, Tropez+)
 D: Various bugfixes and changes to sound drivers
-S: USA 
+S: USA
 
 N: Carlos Henrique Bauer
 E: chbauer@acm.org
@@ -740,6 +740,11 @@ S: (ask for current address)
 S: Portland, Oregon
 S: USA
 
+N: Jason Cooper
+D: ARM/Marvell SOC co-maintainer
+D: irqchip co-maintainer
+D: MVEBU PCI DRIVER co-maintainer
+
 N: Robin Cornelius
 E: robincornelius@users.sourceforge.net
 D: Ralink rt2x00 WLAN driver
@@ -849,6 +854,12 @@ D: trivial hack to add variable address length routing to Rose.
 D: AX25-HOWTO, HAM-HOWTO, IPX-HOWTO, NET-2-HOWTO
 D: ax25-utils maintainer.
 
+N: Kamil Debski
+E: kamil@wypas.org
+D: Samsung S5P 2D graphics acceleration and Multi Format Codec drivers
+D: Samsung USB2 phy drivers
+D: PWM fan driver
+
 N: Helge Deller
 E: deller@gmx.de
 W: http://www.parisc-linux.org/
@@ -1199,7 +1210,7 @@ N: Daniel J. Frasnelli
 E: dfrasnel@alphalinux.org
 W: http://www.alphalinux.org/
 P: 1024/3EF87611 B9 F1 44 50 D3 E8 C2 80  DA E5 55 AA 56 7C 42 DA
-D: DEC Alpha hacker 
+D: DEC Alpha hacker
 D: Miscellaneous bug squisher
 
 N: Jim Freeman
@@ -1299,7 +1310,7 @@ S: P.O. Box 76, Epping
 S: New South Wales, 2121
 S: Australia
 
-N: Carlos E. Gorges 
+N: Carlos E. Gorges
 E: carlos@techlinux.com.br
 D: fix smp support on cmpci driver
 P: 2048G/EA3C4B19 FF31 33A6 0362 4915 B7EB  E541 17D0 0379 EA3C 4B19
@@ -1340,7 +1351,7 @@ E: wgreathouse@smva.com
 E: wgreathouse@myfavoritei.com
 D: Current Belkin USB Serial Adapter F5U103 hacker
 D: Kernel hacker, embedded systems
-S: 7802 Fitzwater Road   
+S: 7802 Fitzwater Road
 S: Brecksville, OH  44141-1334
 S: USA
 
@@ -1381,7 +1392,7 @@ N: Grant Guenther
 E: grant@torque.net
 W: http://www.torque.net/linux-pp.html
 D: original author of ppa driver for parallel port ZIP drive
-D: original architect of the parallel-port sharing scheme 
+D: original architect of the parallel-port sharing scheme
 D: PARIDE subsystem: drivers for parallel port IDE & ATAPI devices
 S: 44 St. Joseph Street, Suite 506
 S: Toronto, Ontario, M4Y 2W4
@@ -1523,7 +1534,7 @@ N: Benjamin Herrenschmidt
 E: benh@kernel.crashing.org
 D: Various parts of PPC/PPC64 & PowerMac
 S: 312/107 Canberra Avenue
-S: Griffith, ACT 2603 
+S: Griffith, ACT 2603
 S: Australia
 
 N: Andreas Herrmann
@@ -1825,7 +1836,7 @@ S: Hungary
 N: Bernhard Kaindl
 E: bkaindl@netway.at
 E: edv@bartelt.via.at
-D: Author of a menu based configuration tool, kmenu, which 
+D: Author of a menu based configuration tool, kmenu, which
 D: is the predecessor of 'make menuconfig' and 'make xconfig'.
 D: digiboard driver update(modularisation work and 2.1.x upd)
 S: Tallak 95
@@ -2016,7 +2027,7 @@ W: http://www.xos.nl/
 D: IP transparent proxy support
 S: X/OS Experts in Open Systems BV
 S: Kruislaan 419
-S: 1098 VA Amsterdam 
+S: 1098 VA Amsterdam
 S: The Netherlands
 
 N: Goran Koruga
@@ -2088,7 +2099,7 @@ S: Germany
 
 N: Andrzej M. Krzysztofowicz
 E: ankry@mif.pg.gda.pl
-D: Some 8-bit XT disk driver and devfs hacking 
+D: Some 8-bit XT disk driver and devfs hacking
 D: Aladdin 1533/1543(C) chipset IDE
 D: PIIX chipset IDE
 S: ul. Matemblewska 1B/10
@@ -2463,7 +2474,7 @@ E: mge@EZ-Darmstadt.Telekom.de
 D: Logical Volume Manager
 S: Bartningstr. 12
 S: 64289 Darmstadt
-S: Germany 
+S: Germany
 
 N: Mark W. McClelland
 E: mmcclell@bigfoot.com
@@ -2547,7 +2558,7 @@ E: meskes@debian.org
 P: 1024/04B6E8F5 6C 77 33 CA CC D6 22 03  AB AB 15 A3 AE AD 39 7D
 D: Kernel hacker. PostgreSQL hacker. Software watchdog daemon.
 D: Maintainer of several Debian packages
-S: Th.-Heuss-Str. 61 
+S: Th.-Heuss-Str. 61
 S: D-41812 Erkelenz
 S: Germany
 
@@ -2785,7 +2796,7 @@ E: neuffer@goofy.zdv.uni-mainz.de
 W: http://www.i-Connect.Net/~mike/
 D: Developer and maintainer of the EATA-DMA SCSI driver
 D: Co-developer EATA-PIO SCSI driver
-D: /proc/scsi and assorted other snippets 
+D: /proc/scsi and assorted other snippets
 S: Zum Schiersteiner Grund 2
 S: 55127 Mainz
 S: Germany
@@ -2852,6 +2863,10 @@ D: IPX development and support
 N: Venkatesh Pallipadi (Venki)
 D: x86/HPET
 
+N: Kyungmin Park
+E: kyungmin.park@samsung.com
+D: Samsung S5Pv210 and Exynos4210 mobile platforms
+
 N: David Parsons
 E: orc@pell.chi.il.us
 D: improved memory detection code.
@@ -3019,7 +3034,7 @@ D: Embedded PowerPC 4xx/6xx/7xx/74xx support
 S: Chandler, Arizona 85249
 S: USA
 
-N: Frederic Potter 
+N: Frederic Potter
 E: fpotter@cirpack.com
 D: Some PCI kernel support
 
@@ -3452,21 +3467,21 @@ S: Klosterweg 28 / i309
 S: 76131 Karlsruhe
 S: Germany
 
-N: James Simmons 
+N: James Simmons
 E: jsimmons@infradead.org
-E: jsimmons@users.sf.net 
+E: jsimmons@users.sf.net
 D: Frame buffer device maintainer
 D: input layer development
 D: tty/console layer
-D: various mipsel devices 
-S: 115 Carmel Avenue 
+D: various mipsel devices
+S: 115 Carmel Avenue
 S: El Cerrito CA 94530
-S: USA 
+S: USA
 
 N: Jaspreet Singh
 E: jaspreet@sangoma.com
 W: www.sangoma.com
-D: WANPIPE drivers & API Support for Sangoma S508/FT1 cards 
+D: WANPIPE drivers & API Support for Sangoma S508/FT1 cards
 S: Sangoma Technologies Inc.,
 S: 1001 Denison Street
 S: Suite 101
@@ -3490,7 +3505,7 @@ N: Craig Small
 E: csmall@triode.apana.org.au
 E: vk2xlz@gonzo.vk2xlz.ampr.org (packet radio)
 D: Gracilis PackeTwin device driver
-D: RSPF daemon 
+D: RSPF daemon
 S: 10 Stockalls Place
 S: Minto, NSW, 2566
 S: Australia
@@ -3700,7 +3715,7 @@ N: Tsu-Sheng Tsao
 E: tsusheng@scf.usc.edu
 D: IGMP(Internet Group Management Protocol) version 2
 S: 2F 14 ALY 31 LN 166 SEC 1 SHIH-PEI RD
-S: Taipei 
+S: Taipei
 S: Taiwan 112
 S: Republic of China
 S: 24335 Delta Drive
@@ -3861,7 +3876,7 @@ D: Produced the Slackware distribution, updated the SVGAlib
 D: patches for ghostscript, worked on color 'ls', etc.
 S: 301 15th Street S.
 S: Moorhead, Minnesota 56560
-S: USA 
+S: USA
 
 N: Jos Vos
 E: jos@xos.nl
@@ -3869,7 +3884,7 @@ W: http://www.xos.nl/
 D: Various IP firewall updates, ipfwadm
 S: X/OS Experts in Open Systems BV
 S: Kruislaan 419
-S: 1098 VA Amsterdam 
+S: 1098 VA Amsterdam
 S: The Netherlands
 
 N: Jeroen Vreeken
@@ -4107,7 +4122,7 @@ S: People's Repulic of China
 N: Victor Yodaiken
 E: yodaiken@fsmlabs.com
 D: RTLinux (RealTime Linux)
-S: POB 1822 
+S: POB 1822
 S: Socorro NM, 87801
 S: USA
 
@@ -4205,7 +4220,7 @@ D: EISA/sysfs subsystem
 S: France
 
 # Don't add your name here, unless you really _are_ after Marc
-# alphabetically. Leonard used to be very proud of being the 
+# alphabetically. Leonard used to be very proud of being the
 # last entry, and he'll get positively pissed if he can't even
 # be second-to-last.  (and this file really _is_ supposed to be
 # in alphabetic order)
index a10a4de..c4a4497 100644 (file)
@@ -109,30 +109,6 @@ Description:
                When counting down the counter start from preset value
                and fire event when reach 0.
 
-What:          /sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available
-KernelVersion: 4.12
-Contact:       benjamin.gaignard@st.com
-Description:
-               Reading returns the list possible quadrature modes.
-
-What:          /sys/bus/iio/devices/iio:deviceX/in_count0_quadrature_mode
-KernelVersion: 4.12
-Contact:       benjamin.gaignard@st.com
-Description:
-               Configure the device counter quadrature modes:
-
-               channel_A:
-                       Encoder A input servers as the count input and B as
-                       the UP/DOWN direction control input.
-
-               channel_B:
-                       Encoder B input serves as the count input and A as
-                       the UP/DOWN direction control input.
-
-               quadrature:
-                       Encoder A and B inputs are mixed to get direction
-                       and count with a scale of 0.25.
-
 What:          /sys/bus/iio/devices/iio:deviceX/in_count_enable_mode_available
 KernelVersion: 4.12
 Contact:       benjamin.gaignard@st.com
index a22024f..9b90efc 100644 (file)
@@ -137,15 +137,24 @@ Boot Kernel With a Boot Config
 ==============================
 
 Since the boot configuration file is loaded with initrd, it will be added
-to the end of the initrd (initramfs) image file with size, checksum and
-12-byte magic word as below.
+to the end of the initrd (initramfs) image file with padding, size,
+checksum and 12-byte magic word as below.
 
-[initrd][bootconfig][size(u32)][checksum(u32)][#BOOTCONFIG\n]
+[initrd][bootconfig][padding][size(le32)][checksum(le32)][#BOOTCONFIG\n]
+
+The size and checksum fields are unsigned 32bit little endian value.
+
+When the boot configuration is added to the initrd image, the total
+file size is aligned to 4 bytes. To fill the gap, null characters
+(``\0``) will be added. Thus the ``size`` is the length of the bootconfig
+file + padding bytes.
 
 The Linux kernel decodes the last part of the initrd image in memory to
 get the boot configuration data.
 Because of this "piggyback" method, there is no need to change or
-update the boot loader and the kernel image itself.
+update the boot loader and the kernel image itself as long as the boot
+loader passes the correct initrd file size. If by any chance, the boot
+loader passes a longer size, the kernel feils to find the bootconfig data.
 
 To do this operation, Linux kernel provides "bootconfig" command under
 tools/bootconfig, which allows admin to apply or delete the config file
@@ -176,7 +185,8 @@ up to 512 key-value pairs. If keys contains 3 words in average, it can
 contain 256 key-value pairs. In most cases, the number of config items
 will be under 100 entries and smaller than 8KB, so it would be enough.
 If the node number exceeds 1024, parser returns an error even if the file
-size is smaller than 32KB.
+size is smaller than 32KB. (Note that this maximum size is not including
+the padding null characters.)
 Anyway, since bootconfig command verifies it when appending a boot config
 to initrd image, user can notice it before boot.
 
index ee9f137..71b8b89 100644 (file)
                                               mds=off [X86]
                                               tsx_async_abort=off [X86]
                                               kvm.nx_huge_pages=off [X86]
+                                              no_entry_flush [PPC]
+                                              no_uaccess_flush [PPC]
 
                                Exceptions:
                                               This does not have any effect on
 
        noefi           Disable EFI runtime services support.
 
+       no_entry_flush  [PPC] Don't flush the L1-D cache when entering the kernel.
+
        noexec          [IA-64]
 
        noexec          [X86]
        nospec_store_bypass_disable
                        [HW] Disable all mitigations for the Speculative Store Bypass vulnerability
 
+       no_uaccess_flush
+                       [PPC] Don't flush the L1-D cache after accessing user data.
+
        noxsave         [BUGS=X86] Disables x86 extended register state save
                        and restore using xsave. The kernel will fallback to
                        enabling legacy floating-point and sse state.
index 1628862..8d5029a 100644 (file)
@@ -90,7 +90,7 @@ things to try.
    re-run kunit_tool.
 5. Try to run ``make ARCH=um defconfig`` before running ``kunit.py run``. This
    may help clean up any residual config items which could be causing problems.
-6. Finally, try running KUnit outside UML. KUnit and KUnit tests can run be
+6. Finally, try running KUnit outside UML. KUnit and KUnit tests can be
    built into any kernel, or can be built as a module and loaded at runtime.
    Doing so should allow you to determine if UML is causing the issue you're
    seeing. When tests are built-in, they will execute when the kernel boots, and
index da1d6f0..8dbcdc5 100644 (file)
@@ -175,17 +175,17 @@ An example Kconfig entry:
 
 .. code-block:: none
 
-        config FOO_KUNIT_TEST
-                tristate "KUnit test for foo" if !KUNIT_ALL_TESTS
-                depends on KUNIT
-                default KUNIT_ALL_TESTS
-                help
-                    This builds unit tests for foo.
+       config FOO_KUNIT_TEST
+               tristate "KUnit test for foo" if !KUNIT_ALL_TESTS
+               depends on KUNIT
+               default KUNIT_ALL_TESTS
+               help
+                 This builds unit tests for foo.
 
-                    For more information on KUnit and unit tests in general, please refer
-                    to the KUnit documentation in Documentation/dev-tools/kunit
+                 For more information on KUnit and unit tests in general, please refer
+                 to the KUnit documentation in Documentation/dev-tools/kunit/.
 
-                    If unsure, say N
+                 If unsure, say N.
 
 
 Test File and Module Names
index 62142a4..9c28c51 100644 (file)
@@ -92,7 +92,7 @@ behavior of a function called ``add``; the first parameter is always of type
 the second parameter, in this case, is what the value is expected to be; the
 last value is what the value actually is. If ``add`` passes all of these
 expectations, the test case, ``add_test_basic`` will pass; if any one of these
-expectations fail, the test case will fail.
+expectations fails, the test case will fail.
 
 It is important to understand that a test case *fails* when any expectation is
 violated; however, the test will continue running, potentially trying other
@@ -202,7 +202,7 @@ Example:
        kunit_test_suite(example_test_suite);
 
 In the above example the test suite, ``example_test_suite``, would run the test
-cases ``example_test_foo``, ``example_test_bar``, and ``example_test_baz``,
+cases ``example_test_foo``, ``example_test_bar``, and ``example_test_baz``;
 each would have ``example_test_init`` called immediately before it and would
 have ``example_test_exit`` called immediately after it.
 ``kunit_test_suite(example_test_suite)`` registers the test suite with the
@@ -229,7 +229,7 @@ through some sort of indirection where a function is exposed as part of an API
 such that the definition of that function can be changed without affecting the
 rest of the code base. In the kernel this primarily comes from two constructs,
 classes, structs that contain function pointers that are provided by the
-implementer, and architecture specific functions which have definitions selected
+implementer, and architecture-specific functions which have definitions selected
 at compile time.
 
 Classes
@@ -459,7 +459,7 @@ KUnit on non-UML architectures
 By default KUnit uses UML as a way to provide dependencies for code under test.
 Under most circumstances KUnit's usage of UML should be treated as an
 implementation detail of how KUnit works under the hood. Nevertheless, there
-are instances where being able to run architecture specific code or test
+are instances where being able to run architecture-specific code or test
 against real hardware is desirable. For these reasons KUnit supports running on
 other architectures.
 
@@ -599,7 +599,7 @@ writing normal KUnit tests. One special caveat is that you have to reset
 hardware state in between test cases; if this is not possible, you may only be
 able to run one test case per invocation.
 
-.. TODO(brendanhiggins@google.com): Add an actual example of an architecture
+.. TODO(brendanhiggins@google.com): Add an actual example of an architecture-
    dependent KUnit test.
 
 KUnit debugfs representation
index 03a7672..7ce06f9 100644 (file)
@@ -76,6 +76,12 @@ properties:
   resets:
     maxItems: 1
 
+  wifi-2.4ghz-coexistence:
+    type: boolean
+    description: >
+      Should the pixel frequencies in the WiFi frequencies range be
+      avoided?
+
 required:
   - compatible
   - reg
index 3613c2c..0968b40 100644 (file)
@@ -33,7 +33,7 @@ tcan4x5x: tcan4x5x@0 {
                spi-max-frequency = <10000000>;
                bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>;
                interrupt-parent = <&gpio1>;
-               interrupts = <14 GPIO_ACTIVE_LOW>;
+               interrupts = <14 IRQ_TYPE_LEVEL_LOW>;
                device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
                device-wake-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>;
                reset-gpios = <&gpio1 27 GPIO_ACTIVE_HIGH>;
index cfaf889..9e4dc51 100644 (file)
@@ -25,7 +25,7 @@ Example (for ARM-based BeagleBone with NPC100 NFC controller on I2C2):
                clock-frequency = <100000>;
 
                interrupt-parent = <&gpio1>;
-               interrupts = <29 GPIO_ACTIVE_HIGH>;
+               interrupts = <29 IRQ_TYPE_LEVEL_HIGH>;
 
                enable-gpios = <&gpio0 30 GPIO_ACTIVE_HIGH>;
                firmware-gpios = <&gpio0 31 GPIO_ACTIVE_HIGH>;
index 92f399e..2bd8256 100644 (file)
@@ -25,7 +25,7 @@ Example (for ARM-based BeagleBone with PN544 on I2C2):
                clock-frequency = <400000>;
 
                interrupt-parent = <&gpio1>;
-               interrupts = <17 GPIO_ACTIVE_HIGH>;
+               interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
 
                enable-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
                firmware-gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>;
index fcfd02d..e498966 100644 (file)
@@ -8,10 +8,16 @@ Required properties:
 
 - reg : The I2C address of the device.
 
+Optional properties:
+
+- realtek,power-up-delay-ms
+  Set a delay time for flush work to be completed,
+  this value is adjustable depending on platform.
 
 Example:
 
 rt1015: codec@28 {
        compatible = "realtek,rt1015";
        reg = <0x28>;
+       realtek,power-up-delay-ms = <50>;
 };
index 6511544..673bdff 100644 (file)
@@ -149,11 +149,11 @@ vidtv_psi.[ch]
        Because the generator is implemented in a separate file, it can be
        reused elsewhere in the media subsystem.
 
-       Currently vidtv supports working with 3 PSI tables: PAT, PMT and
-       SDT.
+       Currently vidtv supports working with 5 PSI tables: PAT, PMT,
+       SDT, NIT and EIT.
 
        The specification for PAT and PMT can be found in *ISO 13818-1:
-       Systems*, while the specification for the SDT can be found in *ETSI
+       Systems*, while the specification for the SDT, NIT, EIT can be found in *ETSI
        EN 300 468: Specification for Service Information (SI) in DVB
        systems*.
 
@@ -197,6 +197,8 @@ vidtv_channel.[ch]
 
        #. Their programs will be concatenated to populate the PAT
 
+       #. Their events will be concatenated to populate the EIT
+
        #. For each program in the PAT, a PMT section will be created
 
        #. The PMT section for a channel will be assigned its streams.
@@ -256,6 +258,42 @@ Using dvb-fe-tool
 The first step to check whether the demod loaded successfully is to run::
 
        $ dvb-fe-tool
+       Device Dummy demod for DVB-T/T2/C/S/S2 (/dev/dvb/adapter0/frontend0) capabilities:
+           CAN_FEC_1_2
+           CAN_FEC_2_3
+           CAN_FEC_3_4
+           CAN_FEC_4_5
+           CAN_FEC_5_6
+           CAN_FEC_6_7
+           CAN_FEC_7_8
+           CAN_FEC_8_9
+           CAN_FEC_AUTO
+           CAN_GUARD_INTERVAL_AUTO
+           CAN_HIERARCHY_AUTO
+           CAN_INVERSION_AUTO
+           CAN_QAM_16
+           CAN_QAM_32
+           CAN_QAM_64
+           CAN_QAM_128
+           CAN_QAM_256
+           CAN_QAM_AUTO
+           CAN_QPSK
+           CAN_TRANSMISSION_MODE_AUTO
+       DVB API Version 5.11, Current v5 delivery system: DVBC/ANNEX_A
+       Supported delivery systems:
+           DVBT
+           DVBT2
+           [DVBC/ANNEX_A]
+           DVBS
+           DVBS2
+       Frequency range for the current standard:
+       From:            51.0 MHz
+       To:              2.15 GHz
+       Step:            62.5 kHz
+       Tolerance:       29.5 MHz
+       Symbol rate ranges for the current standard:
+       From:            1.00 MBauds
+       To:              45.0 MBauds
 
 This should return what is currently set up at the demod struct, i.e.::
 
@@ -314,7 +352,7 @@ For this, one should provide a configuration file known as a 'scan file',
 here's an example::
 
        [Channel]
-       FREQUENCY = 330000000
+       FREQUENCY = 474000000
        MODULATION = QAM/AUTO
        SYMBOL_RATE = 6940000
        INNER_FEC = AUTO
@@ -335,6 +373,14 @@ You can browse scan tables online here: `dvb-scan-tables
 Assuming this channel is named 'channel.conf', you can then run::
 
        $ dvbv5-scan channel.conf
+       dvbv5-scan ~/vidtv.conf
+       ERROR    command BANDWIDTH_HZ (5) not found during retrieve
+       Cannot calc frequency shift. Either bandwidth/symbol-rate is unavailable (yet).
+       Scanning frequency #1 330000000
+           (0x00) Signal= -68.00dBm
+       Scanning frequency #2 474000000
+       Lock   (0x1f) Signal= -34.45dBm C/N= 33.74dB UCB= 0
+       Service Beethoven, provider LinuxTV.org: digital television
 
 For more information on dvb-scan, check its documentation online here:
 `dvb-scan Documentation <https://www.linuxtv.org/wiki/index.php/Dvbscan>`_.
@@ -344,23 +390,38 @@ Using dvb-zap
 
 dvbv5-zap is a command line tool that can be used to record MPEG-TS to disk. The
 typical use is to tune into a channel and put it into record mode. The example
-below - which is taken from the documentation - illustrates that::
+below - which is taken from the documentation - illustrates that\ [1]_::
 
-       $ dvbv5-zap -c dvb_channel.conf "trilhas sonoras" -r
-       using demux '/dev/dvb/adapter0/demux0'
+       $ dvbv5-zap -c dvb_channel.conf "beethoven" -o music.ts -P -t 10
+       using demux 'dvb0.demux0'
        reading channels from file 'dvb_channel.conf'
-       service has pid type 05:  204
-       tuning to 573000000 Hz
-       audio pid 104
-         dvb_set_pesfilter 104
-       Lock   (0x1f) Quality= Good Signal= 100.00% C/N= -13.80dB UCB= 70 postBER= 3.14x10^-3 PER= 0
-       DVR interface '/dev/dvb/adapter0/dvr0' can now be opened
+       tuning to 474000000 Hz
+       pass all PID's to TS
+       dvb_set_pesfilter 8192
+       dvb_dev_set_bufsize: buffer set to 6160384
+       Lock   (0x1f) Quality= Good Signal= -34.66dBm C/N= 33.41dB UCB= 0 postBER= 0 preBER= 1.05x10^-3 PER= 0
+       Lock   (0x1f) Quality= Good Signal= -34.57dBm C/N= 33.46dB UCB= 0 postBER= 0 preBER= 1.05x10^-3 PER= 0
+       Record to file 'music.ts' started
+       received 24587768 bytes (2401 Kbytes/sec)
+       Lock   (0x1f) Quality= Good Signal= -34.42dBm C/N= 33.89dB UCB= 0 postBER= 0 preBER= 2.44x10^-3 PER= 0
+
+.. [1] In this example, it records 10 seconds with all program ID's stored
+       at the music.ts file.
+
 
-The channel can be watched by playing the contents of the DVR interface, with
-some player that recognizes the MPEG-TS format, such as *mplayer* or *vlc*.
+The channel can be watched by playing the contents of the stream with some
+player that  recognizes the MPEG-TS format, such as ``mplayer`` or ``vlc``.
 
 By playing the contents of the stream one can visually inspect the workings of
-vidtv, e.g.::
+vidtv, e.g., to play a recorded TS file with::
+
+       $ mplayer music.ts
+
+or, alternatively, running this command on one terminal::
+
+       $ dvbv5-zap -c dvb_channel.conf "beethoven" -P -r &
+
+And, on a second terminal, playing the contents from DVR interface with::
 
        $ mplayer /dev/dvb/adapter0/dvr0
 
@@ -423,3 +484,30 @@ A nice addition is to simulate some noise when the signal quality is bad by:
 - Updating the error statistics accordingly (e.g. BER, etc).
 
 - Simulating some noise in the encoded data.
+
+Functions and structs used within vidtv
+---------------------------------------
+
+.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_bridge.h
+
+.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_channel.h
+
+.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_demod.h
+
+.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_encoder.h
+
+.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_mux.h
+
+.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_pes.h
+
+.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_psi.h
+
+.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_s302m.h
+
+.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_ts.h
+
+.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_tuner.h
+
+.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_common.c
+
+.. kernel-doc:: drivers/media/test-drivers/vidtv/vidtv_tuner.c
index cf3ca23..21c8478 100644 (file)
@@ -57,9 +57,8 @@ to enable them. ::
 They can be enabled individually. The full list of the parameters: ::
 
        make CC=clang LD=ld.lld AR=llvm-ar NM=llvm-nm STRIP=llvm-strip \
-         OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump OBJSIZE=llvm-size \
-         READELF=llvm-readelf HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar \
-         HOSTLD=ld.lld
+         OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump READELF=llvm-readelf \
+         HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar HOSTLD=ld.lld
 
 Currently, the integrated assembler is disabled by default. You can pass
 ``LLVM_IAS=1`` to enable it.
index 2153776..4b9ed58 100644 (file)
@@ -254,6 +254,32 @@ you will have done run-time testing specific to your change, but at a
 minimum, your changes should survive an ``allyesconfig`` and an
 ``allmodconfig`` build without new warnings or failures.
 
+Q: How do I post corresponding changes to user space components?
+----------------------------------------------------------------
+A: User space code exercising kernel features should be posted
+alongside kernel patches. This gives reviewers a chance to see
+how any new interface is used and how well it works.
+
+When user space tools reside in the kernel repo itself all changes
+should generally come as one series. If series becomes too large
+or the user space project is not reviewed on netdev include a link
+to a public repo where user space patches can be seen.
+
+In case user space tooling lives in a separate repository but is
+reviewed on netdev  (e.g. patches to `iproute2` tools) kernel and
+user space patches should form separate series (threads) when posted
+to the mailing list, e.g.::
+
+  [PATCH net-next 0/3] net: some feature cover letter
+   â””─ [PATCH net-next 1/3] net: some feature prep
+   â””─ [PATCH net-next 2/3] net: some feature do it
+   â””─ [PATCH net-next 3/3] selftest: net: some feature
+
+  [PATCH iproute2-next] ip: add support for some feature
+
+Posting as one thread is discouraged because it confuses patchwork
+(as of patchwork 2.2.2).
+
 Q: Any other tips to help ensure my net/net-next patch gets OK'd?
 -----------------------------------------------------------------
 A: Attention to detail.  Re-read your own work as if you were the
index 4e5316e..c136e25 100644 (file)
@@ -262,6 +262,18 @@ The KVM_RUN ioctl (cf.) communicates with userspace via a shared
 memory region.  This ioctl returns the size of that region.  See the
 KVM_RUN documentation for details.
 
+Besides the size of the KVM_RUN communication region, other areas of
+the VCPU file descriptor can be mmap-ed, including:
+
+- if KVM_CAP_COALESCED_MMIO is available, a page at
+  KVM_COALESCED_MMIO_PAGE_OFFSET * PAGE_SIZE; for historical reasons,
+  this page is included in the result of KVM_GET_VCPU_MMAP_SIZE.
+  KVM_CAP_COALESCED_MMIO is not documented yet.
+
+- if KVM_CAP_DIRTY_LOG_RING is available, a number of pages at
+  KVM_DIRTY_LOG_PAGE_OFFSET * PAGE_SIZE.  For more information on
+  KVM_CAP_DIRTY_LOG_RING, see section 8.3.
+
 
 4.6 KVM_SET_MEMORY_REGION
 -------------------------
@@ -4460,9 +4472,9 @@ that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is present.
 4.118 KVM_GET_SUPPORTED_HV_CPUID
 --------------------------------
 
-:Capability: KVM_CAP_HYPERV_CPUID
+:Capability: KVM_CAP_HYPERV_CPUID (vcpu), KVM_CAP_SYS_HYPERV_CPUID (system)
 :Architectures: x86
-:Type: vcpu ioctl
+:Type: system ioctl, vcpu ioctl
 :Parameters: struct kvm_cpuid2 (in/out)
 :Returns: 0 on success, -1 on error
 
@@ -4507,9 +4519,6 @@ Currently, the following list of CPUID leaves are returned:
  - HYPERV_CPUID_SYNDBG_INTERFACE
  - HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
 
-HYPERV_CPUID_NESTED_FEATURES leaf is only exposed when Enlightened VMCS was
-enabled on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
-
 Userspace invokes KVM_GET_SUPPORTED_HV_CPUID by passing a kvm_cpuid2 structure
 with the 'nent' field indicating the number of entries in the variable-size
 array 'entries'.  If the number of entries is too low to describe all Hyper-V
@@ -4520,6 +4529,15 @@ number of valid entries in the 'entries' array, which is then filled.
 'index' and 'flags' fields in 'struct kvm_cpuid_entry2' are currently reserved,
 userspace should not expect to get any particular value there.
 
+Note, vcpu version of KVM_GET_SUPPORTED_HV_CPUID is currently deprecated. Unlike
+system ioctl which exposes all supported feature bits unconditionally, vcpu
+version has the following quirks:
+- HYPERV_CPUID_NESTED_FEATURES leaf and HV_X64_ENLIGHTENED_VMCS_RECOMMENDED
+  feature bit are only exposed when Enlightened VMCS was previously enabled
+  on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
+- HV_STIMER_DIRECT_MODE_AVAILABLE bit is only exposed with in-kernel LAPIC.
+  (presumes KVM_CREATE_IRQCHIP has already been called).
+
 4.119 KVM_ARM_VCPU_FINALIZE
 ---------------------------
 
@@ -6395,3 +6413,91 @@ When enabled, KVM will disable paravirtual features provided to the
 guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
 (0x40000001). Otherwise, a guest may use the paravirtual features
 regardless of what has actually been exposed through the CPUID leaf.
+
+
+8.29 KVM_CAP_DIRTY_LOG_RING
+---------------------------
+
+:Architectures: x86
+:Parameters: args[0] - size of the dirty log ring
+
+KVM is capable of tracking dirty memory using ring buffers that are
+mmaped into userspace; there is one dirty ring per vcpu.
+
+The dirty ring is available to userspace as an array of
+``struct kvm_dirty_gfn``.  Each dirty entry it's defined as::
+
+  struct kvm_dirty_gfn {
+          __u32 flags;
+          __u32 slot; /* as_id | slot_id */
+          __u64 offset;
+  };
+
+The following values are defined for the flags field to define the
+current state of the entry::
+
+  #define KVM_DIRTY_GFN_F_DIRTY           BIT(0)
+  #define KVM_DIRTY_GFN_F_RESET           BIT(1)
+  #define KVM_DIRTY_GFN_F_MASK            0x3
+
+Userspace should call KVM_ENABLE_CAP ioctl right after KVM_CREATE_VM
+ioctl to enable this capability for the new guest and set the size of
+the rings.  Enabling the capability is only allowed before creating any
+vCPU, and the size of the ring must be a power of two.  The larger the
+ring buffer, the less likely the ring is full and the VM is forced to
+exit to userspace. The optimal size depends on the workload, but it is
+recommended that it be at least 64 KiB (4096 entries).
+
+Just like for dirty page bitmaps, the buffer tracks writes to
+all user memory regions for which the KVM_MEM_LOG_DIRTY_PAGES flag was
+set in KVM_SET_USER_MEMORY_REGION.  Once a memory region is registered
+with the flag set, userspace can start harvesting dirty pages from the
+ring buffer.
+
+An entry in the ring buffer can be unused (flag bits ``00``),
+dirty (flag bits ``01``) or harvested (flag bits ``1X``).  The
+state machine for the entry is as follows::
+
+          dirtied         harvested        reset
+     00 -----------> 01 -------------> 1X -------+
+      ^                                          |
+      |                                          |
+      +------------------------------------------+
+
+To harvest the dirty pages, userspace accesses the mmaped ring buffer
+to read the dirty GFNs.  If the flags has the DIRTY bit set (at this stage
+the RESET bit must be cleared), then it means this GFN is a dirty GFN.
+The userspace should harvest this GFN and mark the flags from state
+``01b`` to ``1Xb`` (bit 0 will be ignored by KVM, but bit 1 must be set
+to show that this GFN is harvested and waiting for a reset), and move
+on to the next GFN.  The userspace should continue to do this until the
+flags of a GFN have the DIRTY bit cleared, meaning that it has harvested
+all the dirty GFNs that were available.
+
+It's not necessary for userspace to harvest the all dirty GFNs at once.
+However it must collect the dirty GFNs in sequence, i.e., the userspace
+program cannot skip one dirty GFN to collect the one next to it.
+
+After processing one or more entries in the ring buffer, userspace
+calls the VM ioctl KVM_RESET_DIRTY_RINGS to notify the kernel about
+it, so that the kernel will reprotect those collected GFNs.
+Therefore, the ioctl must be called *before* reading the content of
+the dirty pages.
+
+The dirty ring can get full.  When it happens, the KVM_RUN of the
+vcpu will return with exit reason KVM_EXIT_DIRTY_LOG_FULL.
+
+The dirty ring interface has a major difference comparing to the
+KVM_GET_DIRTY_LOG interface in that, when reading the dirty ring from
+userspace, it's still possible that the kernel has not yet flushed the
+processor's dirty page buffers into the kernel buffer (with dirty bitmaps, the
+flushing is done by the KVM_GET_DIRTY_LOG ioctl).  To achieve that, one
+needs to kick the vcpu out of KVM_RUN using a signal.  The resulting
+vmexit ensures that all dirty GFNs are flushed to the dirty rings.
+
+NOTE: the capability KVM_CAP_DIRTY_LOG_RING and the corresponding
+ioctl KVM_RESET_DIRTY_RINGS are mutual exclusive to the existing ioctls
+KVM_GET_DIRTY_LOG and KVM_CLEAR_DIRTY_LOG.  After enabling
+KVM_CAP_DIRTY_LOG_RING with an acceptable dirty ring size, the virtual
+machine will switch to ring-buffer dirty page tracking and further
+KVM_GET_DIRTY_LOG or KVM_CLEAR_DIRTY_LOG ioctls will fail.
index 1c030db..5bfe28b 100644 (file)
@@ -455,7 +455,7 @@ If the generation number of the spte does not equal the global generation
 number, it will ignore the cached MMIO information and handle the page
 fault through the slow path.
 
-Since only 19 bits are used to store generation-number on mmio spte, all
+Since only 18 bits are used to store generation-number on mmio spte, all
 pages are zapped when there is an overflow.
 
 Unfortunately, a single memory access might access kvm_memslots(kvm) multiple
index e52a129..450573a 100644 (file)
@@ -82,7 +82,8 @@ Default MMUv2-compatible layout::
   +------------------+
   | VMALLOC area     |  VMALLOC_START            0xc0000000  128MB - 64KB
   +------------------+  VMALLOC_END
-  | Cache aliasing   |  TLBTEMP_BASE_1           0xc7ff0000  DCACHE_WAY_SIZE
+  +------------------+
+  | Cache aliasing   |  TLBTEMP_BASE_1           0xc8000000  DCACHE_WAY_SIZE
   | remap area 1     |
   +------------------+
   | Cache aliasing   |  TLBTEMP_BASE_2                       DCACHE_WAY_SIZE
@@ -124,7 +125,8 @@ Default MMUv2-compatible layout::
   +------------------+
   | VMALLOC area     |  VMALLOC_START            0xa0000000  128MB - 64KB
   +------------------+  VMALLOC_END
-  | Cache aliasing   |  TLBTEMP_BASE_1           0xa7ff0000  DCACHE_WAY_SIZE
+  +------------------+
+  | Cache aliasing   |  TLBTEMP_BASE_1           0xa8000000  DCACHE_WAY_SIZE
   | remap area 1     |
   +------------------+
   | Cache aliasing   |  TLBTEMP_BASE_2                       DCACHE_WAY_SIZE
@@ -167,7 +169,8 @@ Default MMUv2-compatible layout::
   +------------------+
   | VMALLOC area     |  VMALLOC_START            0x90000000  128MB - 64KB
   +------------------+  VMALLOC_END
-  | Cache aliasing   |  TLBTEMP_BASE_1           0x97ff0000  DCACHE_WAY_SIZE
+  +------------------+
+  | Cache aliasing   |  TLBTEMP_BASE_1           0x98000000  DCACHE_WAY_SIZE
   | remap area 1     |
   +------------------+
   | Cache aliasing   |  TLBTEMP_BASE_2                       DCACHE_WAY_SIZE
index e451dcc..9d8b773 100644 (file)
@@ -1486,10 +1486,20 @@ F:      Documentation/devicetree/bindings/iommu/arm,smmu*
 F:     drivers/iommu/arm/
 F:     drivers/iommu/io-pgtable-arm*
 
+ARM AND ARM64 SoC SUB-ARCHITECTURES (COMMON PARTS)
+M:     Arnd Bergmann <arnd@arndb.de>
+M:     Olof Johansson <olof@lixom.net>
+M:     soc@kernel.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:     Maintained
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git
+F:     arch/arm/boot/dts/Makefile
+F:     arch/arm64/boot/dts/Makefile
+
 ARM SUB-ARCHITECTURES
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git
 F:     arch/arm/mach-*/
 F:     arch/arm/plat-*/
 
@@ -1546,6 +1556,7 @@ F:        drivers/clk/sunxi/
 ARM/Allwinner sunXi SoC support
 M:     Maxime Ripard <mripard@kernel.org>
 M:     Chen-Yu Tsai <wens@csie.org>
+R:     Jernej Skrabec <jernej.skrabec@siol.net>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux.git
@@ -1723,11 +1734,13 @@ F:      arch/arm/mach-ep93xx/micro9.c
 
 ARM/CORESIGHT FRAMEWORK AND DRIVERS
 M:     Mathieu Poirier <mathieu.poirier@linaro.org>
-R:     Suzuki K Poulose <suzuki.poulose@arm.com>
+M:     Suzuki K Poulose <suzuki.poulose@arm.com>
 R:     Mike Leach <mike.leach@linaro.org>
+R:     Leo Yan <leo.yan@linaro.org>
 L:     coresight@lists.linaro.org (moderated for non-subscribers)
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git
 F:     Documentation/ABI/testing/sysfs-bus-coresight-devices-*
 F:     Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt
 F:     Documentation/devicetree/bindings/arm/coresight-cti.yaml
@@ -1994,7 +2007,6 @@ N:        lpc18xx
 
 ARM/LPC32XX SOC SUPPORT
 M:     Vladimir Zapolskiy <vz@mleia.com>
-M:     Sylvain Lemieux <slemieux.tyco@gmail.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 T:     git git://github.com/vzapolskiy/linux-lpc32xx.git
@@ -2012,7 +2024,6 @@ M:        Philipp Zabel <philipp.zabel@gmail.com>
 S:     Maintained
 
 ARM/Marvell Dove/MV78xx0/Orion SOC support
-M:     Jason Cooper <jason@lakedaemon.net>
 M:     Andrew Lunn <andrew@lunn.ch>
 M:     Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
 M:     Gregory Clement <gregory.clement@bootlin.com>
@@ -2029,7 +2040,6 @@ F:        arch/arm/plat-orion/
 F:     drivers/soc/dove/
 
 ARM/Marvell Kirkwood and Armada 370, 375, 38x, 39x, XP, 3700, 7K/8K, CN9130 SOC support
-M:     Jason Cooper <jason@lakedaemon.net>
 M:     Andrew Lunn <andrew@lunn.ch>
 M:     Gregory Clement <gregory.clement@bootlin.com>
 M:     Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
@@ -2374,7 +2384,7 @@ F:        drivers/i2c/busses/i2c-rk3x.c
 F:     sound/soc/rockchip/
 N:     rockchip
 
-ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
+ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES
 M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-samsung-soc@vger.kernel.org
@@ -2403,15 +2413,7 @@ N:       s3c2410
 N:     s3c64xx
 N:     s5pv210
 
-ARM/SAMSUNG MOBILE MACHINE SUPPORT
-M:     Kyungmin Park <kyungmin.park@samsung.com>
-L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:     Maintained
-F:     arch/arm/mach-s5pv210/
-
 ARM/SAMSUNG S5P SERIES 2D GRAPHICS ACCELERATION (G2D) SUPPORT
-M:     Kyungmin Park <kyungmin.park@samsung.com>
-M:     Kamil Debski <kamil@wypas.org>
 M:     Andrzej Hajda <a.hajda@samsung.com>
 L:     linux-arm-kernel@lists.infradead.org
 L:     linux-media@vger.kernel.org
@@ -2436,9 +2438,6 @@ S:        Maintained
 F:     drivers/media/platform/s5p-jpeg/
 
 ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT
-M:     Kyungmin Park <kyungmin.park@samsung.com>
-M:     Kamil Debski <kamil@wypas.org>
-M:     Jeongtae Park <jtp.park@samsung.com>
 M:     Andrzej Hajda <a.hajda@samsung.com>
 L:     linux-arm-kernel@lists.infradead.org
 L:     linux-media@vger.kernel.org
@@ -3243,12 +3242,12 @@ F:      drivers/iio/accel/bma400*
 BPF (Safe dynamic programs and tools)
 M:     Alexei Starovoitov <ast@kernel.org>
 M:     Daniel Borkmann <daniel@iogearbox.net>
+M:     Andrii Nakryiko <andrii@kernel.org>
 R:     Martin KaFai Lau <kafai@fb.com>
 R:     Song Liu <songliubraving@fb.com>
 R:     Yonghong Song <yhs@fb.com>
-R:     Andrii Nakryiko <andrii@kernel.org>
 R:     John Fastabend <john.fastabend@gmail.com>
-R:     KP Singh <kpsingh@chromium.org>
+R:     KP Singh <kpsingh@kernel.org>
 L:     netdev@vger.kernel.org
 L:     bpf@vger.kernel.org
 S:     Supported
@@ -3366,6 +3365,17 @@ S:       Supported
 F:     arch/x86/net/
 X:     arch/x86/net/bpf_jit_comp32.c
 
+BPF LSM (Security Audit and Enforcement using BPF)
+M:     KP Singh <kpsingh@kernel.org>
+R:     Florent Revest <revest@chromium.org>
+R:     Brendan Jackman <jackmanb@chromium.org>
+L:     bpf@vger.kernel.org
+S:     Maintained
+F:     Documentation/bpf/bpf_lsm.rst
+F:     include/linux/bpf_lsm.h
+F:     kernel/bpf/bpf_lsm.c
+F:     security/bpf/
+
 BROADCOM B44 10/100 ETHERNET DRIVER
 M:     Michael Chan <michael.chan@broadcom.com>
 L:     netdev@vger.kernel.org
@@ -3538,11 +3548,12 @@ BROADCOM BRCM80211 IEEE802.11n WIRELESS DRIVER
 M:     Arend van Spriel <arend.vanspriel@broadcom.com>
 M:     Franky Lin <franky.lin@broadcom.com>
 M:     Hante Meuleman <hante.meuleman@broadcom.com>
-M:     Chi-Hsien Lin <chi-hsien.lin@cypress.com>
-M:     Wright Feng <wright.feng@cypress.com>
+M:     Chi-hsien Lin <chi-hsien.lin@infineon.com>
+M:     Wright Feng <wright.feng@infineon.com>
+M:     Chung-hsien Hsu <chung-hsien.hsu@infineon.com>
 L:     linux-wireless@vger.kernel.org
 L:     brcm80211-dev-list.pdl@broadcom.com
-L:     brcm80211-dev-list@cypress.com
+L:     SHA-cyfmac-dev-list@infineon.com
 S:     Supported
 F:     drivers/net/wireless/broadcom/brcm80211/
 
@@ -4284,6 +4295,7 @@ B:        https://github.com/ClangBuiltLinux/linux/issues
 C:     irc://chat.freenode.net/clangbuiltlinux
 F:     Documentation/kbuild/llvm.rst
 F:     scripts/clang-tools/
+F:     scripts/lld-version.sh
 K:     \b(?i:clang|llvm)\b
 
 CLEANCACHE API
@@ -4710,7 +4722,7 @@ T:        git git://linuxtv.org/anttip/media_tree.git
 F:     drivers/media/dvb-frontends/cxd2820r*
 
 CXGB3 ETHERNET DRIVER (CXGB3)
-M:     Vishal Kulkarni <vishal@chelsio.com>
+M:     Raju Rangoju <rajur@chelsio.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 W:     http://www.chelsio.com
@@ -4742,7 +4754,7 @@ W:        http://www.chelsio.com
 F:     drivers/net/ethernet/chelsio/inline_crypto/
 
 CXGB4 ETHERNET DRIVER (CXGB4)
-M:     Vishal Kulkarni <vishal@chelsio.com>
+M:     Raju Rangoju <rajur@chelsio.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 W:     http://www.chelsio.com
@@ -4764,7 +4776,7 @@ F:        drivers/infiniband/hw/cxgb4/
 F:     include/uapi/rdma/cxgb4-abi.h
 
 CXGB4VF ETHERNET DRIVER (CXGB4VF)
-M:     Vishal Kulkarni <vishal@gmail.com>
+M:     Raju Rangoju <rajur@chelsio.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 W:     http://www.chelsio.com
@@ -9077,10 +9089,7 @@ S:       Supported
 F:     drivers/net/wireless/intel/iwlegacy/
 
 INTEL WIRELESS WIFI LINK (iwlwifi)
-M:     Johannes Berg <johannes.berg@intel.com>
-M:     Emmanuel Grumbach <emmanuel.grumbach@intel.com>
 M:     Luca Coelho <luciano.coelho@intel.com>
-M:     Intel Linux Wireless <linuxwifi@intel.com>
 L:     linux-wireless@vger.kernel.org
 S:     Supported
 W:     https://wireless.wiki.kernel.org/en/users/drivers/iwlwifi
@@ -9172,6 +9181,7 @@ F:        include/linux/iomap.h
 
 IOMMU DRIVERS
 M:     Joerg Roedel <joro@8bytes.org>
+M:     Will Deacon <will@kernel.org>
 L:     iommu@lists.linux-foundation.org
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
@@ -9255,7 +9265,6 @@ F:        kernel/irq/
 
 IRQCHIP DRIVERS
 M:     Thomas Gleixner <tglx@linutronix.de>
-M:     Jason Cooper <jason@lakedaemon.net>
 M:     Marc Zyngier <maz@kernel.org>
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
@@ -9655,6 +9664,7 @@ F:        Documentation/virt/kvm/s390*
 F:     arch/s390/include/asm/gmap.h
 F:     arch/s390/include/asm/kvm*
 F:     arch/s390/include/uapi/asm/kvm*
+F:     arch/s390/kernel/uv.c
 F:     arch/s390/kvm/
 F:     arch/s390/mm/gmap.c
 F:     tools/testing/selftests/kvm/*/s390x/
@@ -9662,7 +9672,7 @@ F:        tools/testing/selftests/kvm/s390x/
 
 KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86)
 M:     Paolo Bonzini <pbonzini@redhat.com>
-R:     Sean Christopherson <sean.j.christopherson@intel.com>
+R:     Sean Christopherson <seanjc@google.com>
 R:     Vitaly Kuznetsov <vkuznets@redhat.com>
 R:     Wanpeng Li <wanpengli@tencent.com>
 R:     Jim Mattson <jmattson@google.com>
@@ -9843,13 +9853,6 @@ S:       Maintained
 F:     arch/mips/lantiq
 F:     drivers/soc/lantiq
 
-LAPB module
-L:     linux-x25@vger.kernel.org
-S:     Orphan
-F:     Documentation/networking/lapb-module.rst
-F:     include/*/lapb.h
-F:     net/lapb/
-
 LASI 53c700 driver for PARISC
 M:     "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
 L:     linux-scsi@vger.kernel.org
@@ -10560,6 +10563,13 @@ S:     Supported
 F:     Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
 F:     drivers/net/ethernet/marvell/octeontx2/af/
 
+MARVELL PRESTERA ETHERNET SWITCH DRIVER
+M:     Vadym Kochan <vkochan@marvell.com>
+M:     Taras Chornyi <tchornyi@marvell.com>
+S:     Supported
+W:     https://github.com/Marvell-switching/switchdev-prestera
+F:     drivers/net/ethernet/marvell/prestera/
+
 MARVELL SOC MMC/SD/SDIO CONTROLLER DRIVER
 M:     Nicolas Pitre <nico@fluxnic.net>
 S:     Odd Fixes
@@ -13177,7 +13187,9 @@ M:      Jesper Dangaard Brouer <hawk@kernel.org>
 M:     Ilias Apalodimas <ilias.apalodimas@linaro.org>
 L:     netdev@vger.kernel.org
 S:     Supported
+F:     Documentation/networking/page_pool.rst
 F:     include/net/page_pool.h
+F:     include/trace/events/page_pool.h
 F:     net/core/page_pool.c
 
 PANASONIC LAPTOP ACPI EXTRAS DRIVER
@@ -13405,7 +13417,6 @@ F:      drivers/pci/controller/mobiveil/pcie-mobiveil*
 
 PCI DRIVER FOR MVEBU (Marvell Armada 370 and Armada XP SOC support)
 M:     Thomas Petazzoni <thomas.petazzoni@bootlin.com>
-M:     Jason Cooper <jason@lakedaemon.net>
 L:     linux-pci@vger.kernel.org
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
@@ -14211,7 +14222,6 @@ F:      drivers/media/usb/pwc/*
 F:     include/trace/events/pwc.h
 
 PWM FAN DRIVER
-M:     Kamil Debski <kamil@wypas.org>
 M:     Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:     linux-hwmon@vger.kernel.org
 S:     Supported
@@ -14820,7 +14830,7 @@ T:      git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.g
 F:     drivers/net/wireless/realtek/rtlwifi/
 
 REALTEK WIRELESS DRIVER (rtw88)
-M:     Yan-Hsuan Chuang <yhchuang@realtek.com>
+M:     Yan-Hsuan Chuang <tony0620emma@gmail.com>
 L:     linux-wireless@vger.kernel.org
 S:     Maintained
 F:     drivers/net/wireless/realtek/rtw88/
@@ -15425,14 +15435,12 @@ F:    Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
 F:     drivers/nfc/s3fwrn5
 
 SAMSUNG S5C73M3 CAMERA DRIVER
-M:     Kyungmin Park <kyungmin.park@samsung.com>
 M:     Andrzej Hajda <a.hajda@samsung.com>
 L:     linux-media@vger.kernel.org
 S:     Supported
 F:     drivers/media/i2c/s5c73m3/*
 
 SAMSUNG S5K5BAF CAMERA DRIVER
-M:     Kyungmin Park <kyungmin.park@samsung.com>
 M:     Andrzej Hajda <a.hajda@samsung.com>
 L:     linux-media@vger.kernel.org
 S:     Supported
@@ -15450,7 +15458,6 @@ F:      Documentation/devicetree/bindings/crypto/samsung-sss.yaml
 F:     drivers/crypto/s5p-sss.c
 
 SAMSUNG S5P/EXYNOS4 SOC SERIES CAMERA SUBSYSTEM DRIVERS
-M:     Kyungmin Park <kyungmin.park@samsung.com>
 M:     Sylwester Nawrocki <s.nawrocki@samsung.com>
 L:     linux-media@vger.kernel.org
 S:     Supported
@@ -15498,7 +15505,6 @@ T:      git https://github.com/lmajewski/linux-samsung-thermal.git
 F:     drivers/thermal/samsung/
 
 SAMSUNG USB2 PHY DRIVER
-M:     Kamil Debski <kamil@wypas.org>
 M:     Sylwester Nawrocki <s.nawrocki@samsung.com>
 L:     linux-kernel@vger.kernel.org
 S:     Supported
@@ -15797,9 +15803,8 @@ F:      drivers/slimbus/
 F:     include/linux/slimbus.h
 
 SFC NETWORK DRIVER
-M:     Solarflare linux maintainers <linux-net-drivers@solarflare.com>
-M:     Edward Cree <ecree@solarflare.com>
-M:     Martin Habets <mhabets@solarflare.com>
+M:     Edward Cree <ecree.xilinx@gmail.com>
+M:     Martin Habets <habetsm.xilinx@gmail.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/sfc/
@@ -19006,12 +19011,18 @@ L:    linux-kernel@vger.kernel.org
 S:     Maintained
 N:     axp[128]
 
-X.25 NETWORK LAYER
-M:     Andrew Hendry <andrew.hendry@gmail.com>
+X.25 STACK
+M:     Martin Schiller <ms@dev.tdt.de>
 L:     linux-x25@vger.kernel.org
-S:     Odd Fixes
+S:     Maintained
+F:     Documentation/networking/lapb-module.rst
 F:     Documentation/networking/x25*
+F:     drivers/net/wan/hdlc_x25.c
+F:     drivers/net/wan/lapbether.c
+F:     include/*/lapb.h
 F:     include/net/x25*
+F:     include/uapi/linux/x25.h
+F:     net/lapb/
 F:     net/x25/
 
 X86 ARCHITECTURE (32-BIT AND 64-BIT)
@@ -19125,12 +19136,17 @@ L:    netdev@vger.kernel.org
 L:     bpf@vger.kernel.org
 S:     Supported
 F:     include/net/xdp.h
+F:     include/net/xdp_priv.h
 F:     include/trace/events/xdp.h
 F:     kernel/bpf/cpumap.c
 F:     kernel/bpf/devmap.c
 F:     net/core/xdp.c
-N:     xdp
-K:     xdp
+F:     samples/bpf/xdp*
+F:     tools/testing/selftests/bpf/*xdp*
+F:     tools/testing/selftests/bpf/*/*xdp*
+F:     drivers/net/ethernet/*/*/*/*/*xdp*
+F:     drivers/net/ethernet/*/*/*xdp*
+K:     (?:\b|_)xdp(?:\b|_)
 
 XDP SOCKETS (AF_XDP)
 M:     Björn Töpel <bjorn.topel@intel.com>
@@ -19139,9 +19155,12 @@ R:     Jonathan Lemon <jonathan.lemon@gmail.com>
 L:     netdev@vger.kernel.org
 L:     bpf@vger.kernel.org
 S:     Maintained
+F:     Documentation/networking/af_xdp.rst
 F:     include/net/xdp_sock*
 F:     include/net/xsk_buff_pool.h
 F:     include/uapi/linux/if_xdp.h
+F:     include/uapi/linux/xdp_diag.h
+F:     include/net/netns/xdp.h
 F:     net/xdp/
 F:     samples/bpf/xdpsock*
 F:     tools/lib/bpf/xsk*
index e2c3f65..e30cf02 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 10
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION =
 NAME = Kleptomaniac Octopus
 
 # *DOCUMENTATION*
@@ -433,7 +433,6 @@ NM          = llvm-nm
 OBJCOPY                = llvm-objcopy
 OBJDUMP                = llvm-objdump
 READELF                = llvm-readelf
-OBJSIZE                = llvm-size
 STRIP          = llvm-strip
 else
 CC             = $(CROSS_COMPILE)gcc
@@ -443,7 +442,6 @@ NM          = $(CROSS_COMPILE)nm
 OBJCOPY                = $(CROSS_COMPILE)objcopy
 OBJDUMP                = $(CROSS_COMPILE)objdump
 READELF                = $(CROSS_COMPILE)readelf
-OBJSIZE                = $(CROSS_COMPILE)size
 STRIP          = $(CROSS_COMPILE)strip
 endif
 PAHOLE         = pahole
@@ -509,7 +507,7 @@ KBUILD_LDFLAGS :=
 CLANG_FLAGS :=
 
 export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC
-export CPP AR NM STRIP OBJCOPY OBJDUMP OBJSIZE READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL
+export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL
 export PERL PYTHON PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
 export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD
 export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE
@@ -828,7 +826,9 @@ else
 DEBUG_CFLAGS   += -g
 endif
 
+ifneq ($(LLVM_IAS),1)
 KBUILD_AFLAGS  += -Wa,-gdwarf-2
+endif
 
 ifdef CONFIG_DEBUG_INFO_DWARF4
 DEBUG_CFLAGS   += -gdwarf-4
@@ -946,7 +946,7 @@ KBUILD_CFLAGS   += $(call cc-option,-Werror=incompatible-pointer-types)
 KBUILD_CFLAGS   += $(call cc-option,-Werror=designated-init)
 
 # change __FILE__ to the relative path from the srctree
-KBUILD_CFLAGS  += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
+KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
 
 # ensure -fcf-protection is disabled when using retpoline as it is
 # incompatible with -mindirect-branch=thunk-extern
@@ -984,6 +984,12 @@ ifeq ($(CONFIG_RELR),y)
 LDFLAGS_vmlinux        += --pack-dyn-relocs=relr
 endif
 
+# We never want expected sections to be placed heuristically by the
+# linker. All sections should be explicitly named in the linker script.
+ifdef CONFIG_LD_ORPHAN_WARN
+LDFLAGS_vmlinux += --orphan-handling=warn
+endif
+
 # Align the bit size of userspace programs with the kernel
 KBUILD_USERCFLAGS  += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS))
 KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS))
index 56b6ccc..ba4e966 100644 (file)
@@ -1028,6 +1028,15 @@ config HAVE_STATIC_CALL_INLINE
        bool
        depends on HAVE_STATIC_CALL
 
+config ARCH_WANT_LD_ORPHAN_WARN
+       bool
+       help
+         An arch should select this symbol once all linker sections are explicitly
+         included, size-asserted, or discarded in the linker scripts. This is
+         important because we never want expected sections to be placed heuristically
+         by the linker, since the locations of such sections can change between linker
+         versions.
+
 source "kernel/gcov/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
index 7462a79..4c7b041 100644 (file)
@@ -57,7 +57,7 @@ EXPORT_SYMBOL(pm_power_off);
 void arch_cpu_idle(void)
 {
        wtint(0);
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 void arch_cpu_idle_dead(void)
index c6606f4..fb98440 100644 (file)
@@ -243,10 +243,8 @@ static inline int constant_fls(unsigned int x)
                x <<= 2;
                r -= 2;
        }
-       if (!(x & 0x80000000u)) {
-               x <<= 1;
+       if (!(x & 0x80000000u))
                r -= 1;
-       }
        return r;
 }
 
index f1ed17e..1636417 100644 (file)
 
 #ifdef CONFIG_ARC_HAS_PAE40
 #define PTE_BITS_NON_RWX_IN_PD1        (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
+#define MAX_POSSIBLE_PHYSMEM_BITS 40
 #else
 #define PTE_BITS_NON_RWX_IN_PD1        (PAGE_MASK | _PAGE_CACHEABLE)
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
 #endif
 
 /**************************************************************************
index b23986f..f73da20 100644 (file)
 
 #ifdef CONFIG_ARC_DW2_UNWIND
 
-static void seed_unwind_frame_info(struct task_struct *tsk,
-                                  struct pt_regs *regs,
-                                  struct unwind_frame_info *frame_info)
+static int
+seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs,
+                      struct unwind_frame_info *frame_info)
 {
-       /*
-        * synchronous unwinding (e.g. dump_stack)
-        *  - uses current values of SP and friends
-        */
-       if (tsk == NULL && regs == NULL) {
+       if (regs) {
+               /*
+                * Asynchronous unwinding of intr/exception
+                *  - Just uses the pt_regs passed
+                */
+               frame_info->task = tsk;
+
+               frame_info->regs.r27 = regs->fp;
+               frame_info->regs.r28 = regs->sp;
+               frame_info->regs.r31 = regs->blink;
+               frame_info->regs.r63 = regs->ret;
+               frame_info->call_frame = 0;
+       } else if (tsk == NULL || tsk == current) {
+               /*
+                * synchronous unwinding (e.g. dump_stack)
+                *  - uses current values of SP and friends
+                */
                unsigned long fp, sp, blink, ret;
                frame_info->task = current;
 
@@ -63,13 +75,17 @@ static void seed_unwind_frame_info(struct task_struct *tsk,
                frame_info->regs.r31 = blink;
                frame_info->regs.r63 = ret;
                frame_info->call_frame = 0;
-       } else if (regs == NULL) {
+       } else {
                /*
-                * Asynchronous unwinding of sleeping task
-                *  - Gets SP etc from task's pt_regs (saved bottom of kernel
-                *    mode stack of task)
+                * Asynchronous unwinding of a likely sleeping task
+                *  - first ensure it is actually sleeping
+                *  - if so, it will be in __switch_to, kernel mode SP of task
+                *    is safe-kept and BLINK at a well known location in there
                 */
 
+               if (tsk->state == TASK_RUNNING)
+                       return -1;
+
                frame_info->task = tsk;
 
                frame_info->regs.r27 = TSK_K_FP(tsk);
@@ -90,19 +106,8 @@ static void seed_unwind_frame_info(struct task_struct *tsk,
                frame_info->regs.r28 += 60;
                frame_info->call_frame = 0;
 
-       } else {
-               /*
-                * Asynchronous unwinding of intr/exception
-                *  - Just uses the pt_regs passed
-                */
-               frame_info->task = tsk;
-
-               frame_info->regs.r27 = regs->fp;
-               frame_info->regs.r28 = regs->sp;
-               frame_info->regs.r31 = regs->blink;
-               frame_info->regs.r63 = regs->ret;
-               frame_info->call_frame = 0;
        }
+       return 0;
 }
 
 #endif
@@ -116,7 +121,8 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
        unsigned int address;
        struct unwind_frame_info frame_info;
 
-       seed_unwind_frame_info(tsk, regs, &frame_info);
+       if (seed_unwind_frame_info(tsk, regs, &frame_info))
+               return 0;
 
        while (1) {
                address = UNW_PC(&frame_info);
index c340acd..9bb3c24 100644 (file)
  *  -Changes related to MMU v2 (Rel 4.8)
  *
  * Vineetg: Aug 29th 2008
- *  -In TLB Flush operations (Metal Fix MMU) there is a explict command to
+ *  -In TLB Flush operations (Metal Fix MMU) there is a explicit command to
  *    flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd,
  *    it fails. Thus need to load it with ANY valid value before invoking
  *    TLBIVUTLB cmd
  *
  * Vineetg: Aug 21th 2008:
  *  -Reduced the duration of IRQ lockouts in TLB Flush routines
- *  -Multiple copies of TLB erase code seperated into a "single" function
+ *  -Multiple copies of TLB erase code separated into a "single" function
  *  -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID
  *       in interrupt-safe region.
  *
@@ -66,7 +66,7 @@
  *
  * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has
  * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways.
- * Given this, the thrasing problem should never happen because once the 3
+ * Given this, the thrashing problem should never happen because once the 3
  * J-TLB entries are created (even though 3rd will knock out one of the prev
  * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy
  *
@@ -127,7 +127,7 @@ static void utlb_invalidate(void)
         * There was however an obscure hardware bug, where uTLB flush would
         * fail when a prior probe for J-TLB (both totally unrelated) would
         * return lkup err - because the entry didn't exist in MMU.
-        * The Workround was to set Index reg with some valid value, prior to
+        * The Workaround was to set Index reg with some valid value, prior to
         * flush. This was fixed in MMU v3
         */
        unsigned int idx;
@@ -272,7 +272,7 @@ noinline void local_flush_tlb_all(void)
 }
 
 /*
- * Flush the entrie MM for userland. The fastest way is to move to Next ASID
+ * Flush the entire MM for userland. The fastest way is to move to Next ASID
  */
 noinline void local_flush_tlb_mm(struct mm_struct *mm)
 {
@@ -303,7 +303,7 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm)
  * Difference between this and Kernel Range Flush is
  *  -Here the fastest way (if range is too large) is to move to next ASID
  *      without doing any explicit Shootdown
- *  -In case of kernel Flush, entry has to be shot down explictly
+ *  -In case of kernel Flush, entry has to be shot down explicitly
  */
 void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
                           unsigned long end)
@@ -620,7 +620,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
  * Super Page size is configurable in hardware (4K to 16M), but fixed once
  * RTL builds.
  *
- * The exact THP size a Linx configuration will support is a function of:
+ * The exact THP size a Linux configuration will support is a function of:
  *  - MMU page size (typical 8K, RTL fixed)
  *  - software page walker address split between PGD:PTE:PFN (typical
  *    11:8:13, but can be changed with 1 line)
@@ -698,7 +698,7 @@ void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 
 #endif
 
-/* Read the Cache Build Confuration Registers, Decode them and save into
+/* Read the Cache Build Configuration Registers, Decode them and save into
  * the cpuinfo structure for later use.
  * No Validation is done here, simply read/convert the BCRs
  */
@@ -803,13 +803,13 @@ void arc_mmu_init(void)
        pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str)));
 
        /*
-        * Can't be done in processor.h due to header include depenedencies
+        * Can't be done in processor.h due to header include dependencies
         */
        BUILD_BUG_ON(!IS_ALIGNED((CONFIG_ARC_KVADDR_SIZE << 20), PMD_SIZE));
 
        /*
         * stack top size sanity check,
-        * Can't be done in processor.h due to header include depenedencies
+        * Can't be done in processor.h due to header include dependencies
         */
        BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE));
 
@@ -881,7 +881,7 @@ void arc_mmu_init(void)
  *      the duplicate one.
  * -Knob to be verbose abt it.(TODO: hook them up to debugfs)
  */
-volatile int dup_pd_silent; /* Be slient abt it or complain (default) */
+volatile int dup_pd_silent; /* Be silent abt it or complain (default) */
 
 void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
                          struct pt_regs *regs)
@@ -948,7 +948,7 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 
 /***********************************************************************
  * Diagnostic Routines
- *  -Called from Low Level TLB Hanlders if things don;t look good
+ *  -Called from Low Level TLB Handlers if things don;t look good
  **********************************************************************/
 
 #ifdef CONFIG_ARC_DBG_TLB_PARANOIA
index fe2f17e..002e0cf 100644 (file)
@@ -35,6 +35,7 @@ config ARM
        select ARCH_USE_CMPXCHG_LOCKREF
        select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
        select ARCH_WANT_IPC_PARSE_VERSION
+       select ARCH_WANT_LD_ORPHAN_WARN
        select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
        select BUILDTIME_TABLE_SORT if MMU
        select CLONE_BACKWARDS
index 4d76eab..e15f76c 100644 (file)
@@ -16,10 +16,6 @@ LDFLAGS_vmlinux      += --be8
 KBUILD_LDFLAGS_MODULE  += --be8
 endif
 
-# We never want expected sections to be placed heuristically by the
-# linker. All sections should be explicitly named in the linker script.
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
-
 GZFLAGS                :=-9
 #KBUILD_CFLAGS +=-pipe
 
index 47f001c..e156741 100644 (file)
@@ -129,7 +129,9 @@ LDFLAGS_vmlinux += --no-undefined
 # Delete all temporary local symbols
 LDFLAGS_vmlinux += -X
 # Report orphan sections
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
+ifdef CONFIG_LD_ORPHAN_WARN
+LDFLAGS_vmlinux += --orphan-handling=warn
+endif
 # Next argument is a linker script
 LDFLAGS_vmlinux += -T
 
index 2e04ec5..caa2732 100644 (file)
@@ -1472,6 +1472,9 @@ ENTRY(efi_enter_kernel)
                @ issued from HYP mode take us to the correct handler code. We
                @ will disable the MMU before jumping to the kernel proper.
                @
+ ARM(          bic     r1, r1, #(1 << 30)      ) @ clear HSCTLR.TE
+ THUMB(                orr     r1, r1, #(1 << 30)      ) @ set HSCTLR.TE
+               mcr     p15, 4, r1, c1, c0, 0
                adr     r0, __hyp_reentry_vectors
                mcr     p15, 4, r0, c12, c0, 0  @ set HYP vector base (HVBAR)
                isb
index c220dc3..243e35f 100644 (file)
                        ranges = <0x0 0x100000 0x8000>;
 
                        mac_sw: switch@0 {
-                               compatible = "ti,am4372-cpsw","ti,cpsw-switch";
+                               compatible = "ti,am4372-cpsw-switch", "ti,cpsw-switch";
                                reg = <0x0 0x4000>;
                                ranges = <0 0 0x4000>;
                                clocks = <&cpsw_125mhz_gclk>, <&dpll_clksel_mac_clk>;
index b69c7d4..2f32615 100644 (file)
@@ -32,8 +32,8 @@
                                interrupts = <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>,
                                             <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>;
                                interrupt-names = "int0", "int1";
-                               clocks = <&mcan_clk>, <&l3_iclk_div>;
-                               clock-names = "cclk", "hclk";
+                               clocks = <&l3_iclk_div>, <&mcan_clk>;
+                               clock-names = "hclk", "cclk";
                                bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>;
                        };
                };
index ab291ce..2983e91 100644 (file)
 };
 
 &clock {
-       clocks = <&clock CLK_XUSBXTI>;
        assigned-clocks = <&clock CLK_FOUT_EPLL>;
        assigned-clock-rates = <45158401>;
 };
index 878e89c..4ea5c23 100644 (file)
@@ -59,7 +59,7 @@
                                MX50_PAD_CSPI_MISO__CSPI_MISO           0x00
                                MX50_PAD_CSPI_MOSI__CSPI_MOSI           0x00
                                MX50_PAD_CSPI_SS0__GPIO4_11             0xc4
-                               MX50_PAD_ECSPI1_MOSI__CSPI_SS1          0xf4
+                               MX50_PAD_ECSPI1_MOSI__GPIO4_13          0x84
                        >;
                };
 
index d112b50..b4605ed 100644 (file)
                #size-cells = <0>;
 
                /* Microchip KSZ9031RNX PHY */
-               rgmii_phy: ethernet-phy@4 {
-                       reg = <4>;
+               rgmii_phy: ethernet-phy@0 {
+                       reg = <0>;
                        interrupts-extended = <&gpio1 28 IRQ_TYPE_LEVEL_LOW>;
                        reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>;
                        reset-assert-us = <10000>;
index 265f5f3..24f793c 100644 (file)
 
        pinctrl_i2c3: i2c3grp {
                fsl,pins = <
-                       MX6QDL_PAD_GPIO_3__I2C3_SCL             0x4001b8b1
+                       MX6QDL_PAD_GPIO_5__I2C3_SCL             0x4001b8b1
                        MX6QDL_PAD_GPIO_16__I2C3_SDA            0x4001b8b1
                >;
        };
index 828dd20..d07d8f8 100644 (file)
@@ -98,7 +98,7 @@
 &fec {
        pinctrl-names = "default";
        pinctrl-0 = <&pinctrl_enet>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        status = "okay";
 };
 
index 9390979..b9b698f 100644 (file)
                                MX6QDL_PAD_RGMII_RD2__RGMII_RD2         0x1b030
                                MX6QDL_PAD_RGMII_RD3__RGMII_RD3         0x1b030
                                MX6QDL_PAD_RGMII_RX_CTL__RGMII_RX_CTL   0x1b030
-                               MX6QDL_PAD_GPIO_6__ENET_IRQ             0x000b1
                        >;
                };
 
index adde62d..342304f 100644 (file)
 };
 
 &ssp3 {
-       /delete-property/ #address-cells;
-       /delete-property/ #size-cells;
+       #address-cells = <0>;
        spi-slave;
        status = "okay";
        ready-gpios = <&gpio 125 GPIO_ACTIVE_HIGH>;
index 5dff24e..8456f17 100644 (file)
                        linux,code = <KEY_A>;
                        gpios = <&gpiof 3 GPIO_ACTIVE_LOW>;
                };
+
+               /*
+                * The EXTi IRQ line 0 is shared with PMIC,
+                * so mark this as polled GPIO key.
+                */
+               button-2 {
+                       label = "TA3-GPIO-C";
+                       linux,code = <KEY_C>;
+                       gpios = <&gpiog 0 GPIO_ACTIVE_LOW>;
+               };
        };
 
        gpio-keys {
                        wakeup-source;
                };
 
-               button-2 {
-                       label = "TA3-GPIO-C";
-                       linux,code = <KEY_C>;
-                       gpios = <&gpioi 11 GPIO_ACTIVE_LOW>;
-                       wakeup-source;
-               };
-
                button-3 {
                        label = "TA4-GPIO-D";
                        linux,code = <KEY_D>;
@@ -79,7 +82,7 @@
 
                led-0 {
                        label = "green:led5";
-                       gpios = <&gpiog 2 GPIO_ACTIVE_HIGH>;
+                       gpios = <&gpioc 6 GPIO_ACTIVE_HIGH>;
                        default-state = "off";
                };
 
index b4b52cf..f796a61 100644 (file)
@@ -68,6 +68,7 @@
                gpio = <&gpiog 3 GPIO_ACTIVE_LOW>;
                regulator-always-on;
                regulator-boot-on;
+               vin-supply = <&vdd>;
        };
 };
 
 
                        vdda: ldo1 {
                                regulator-name = "vdda";
+                               regulator-always-on;
                                regulator-min-microvolt = <2900000>;
                                regulator-max-microvolt = <2900000>;
                                interrupts = <IT_CURLIM_LDO1 0>;
index 04fbb32..803eb8b 100644 (file)
        };
 };
 
+&dts {
+       status = "okay";
+};
+
 &i2c4 {
        pinctrl-names = "default";
        pinctrl-0 = <&i2c4_pins_a>;
index 049e6ab..73de34a 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&gmac_rgmii_pins>;
        phy-handle = <&phy1>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        status = "okay";
 };
 
index 32d5d45..8945dbb 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&gmac_rgmii_pins>;
        phy-handle = <&phy1>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        phy-supply = <&reg_gmac_3v3>;
        status = "okay";
 };
index bb3987e..0b3d9ae 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&gmac_rgmii_pins>;
        phy-handle = <&phy1>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        phy-supply = <&reg_gmac_3v3>;
        status = "okay";
 };
index 8c8dee6..9109ca0 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&gmac_rgmii_pins>;
        phy-handle = <&phy1>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        status = "okay";
 };
 
index fce2f7f..bf38c66 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Adam Sampson <ats@offog.org>
+ * Copyright 2015-2020 Adam Sampson <ats@offog.org>
  *
  * This file is dual-licensed: you can use it either under the terms
  * of the GPL or the X11 license, at your option. Note that this dual
        pinctrl-names = "default";
        pinctrl-0 = <&gmac_rgmii_pins>;
        phy-handle = <&phy1>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        status = "okay";
 };
 
index 9d34eab..431f702 100644 (file)
        pinctrl-0 = <&emac_rgmii_pins>;
        phy-supply = <&reg_sw>;
        phy-handle = <&rgmii_phy>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        allwinner,rx-delay-ps = <700>;
        allwinner,tx-delay-ps = <700>;
        status = "okay";
index d9be511..d8326a5 100644 (file)
        pinctrl-0 = <&emac_rgmii_pins>;
        phy-supply = <&reg_dldo4>;
        phy-handle = <&rgmii_phy>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        status = "okay";
 };
 
index 71fb732..babf4cf 100644 (file)
        };
 };
 
-&emac {
-       /* LEDs changed to active high on the plus */
-       /delete-property/ allwinner,leds-active-low;
-};
-
 &mmc1 {
        vmmc-supply = <&reg_vcc3v3>;
        bus-width = <4>;
index 6dbf7b2..b6ca45d 100644 (file)
@@ -67,7 +67,7 @@
        pinctrl-0 = <&emac_rgmii_pins>;
        phy-supply = <&reg_gmac_3v3>;
        phy-handle = <&ext_rgmii_phy>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        status = "okay";
 };
 
index 2fc62ef..a6a1087 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&gmac_rgmii_pins>;
        phy-handle = <&phy1>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        phy-supply = <&reg_dc1sw>;
        status = "okay";
 };
index 9bab6b7..4aa0ee8 100644 (file)
@@ -10,7 +10,7 @@
 
 / {
        model = "PineCube IP Camera";
-       compatible = "pine64,pinecube", "allwinner,sun8i-s3";
+       compatible = "pine64,pinecube", "sochip,s3", "allwinner,sun8i-v3";
 
        aliases {
                serial0 = &uart2;
index 0c73416..89abd4c 100644 (file)
                gic: interrupt-controller@1c81000 {
                        compatible = "arm,gic-400";
                        reg = <0x01c81000 0x1000>,
-                             <0x01c82000 0x1000>,
+                             <0x01c82000 0x2000>,
                              <0x01c84000 0x2000>,
                              <0x01c86000 0x2000>;
                        interrupt-controller;
index 15c22b0..4795455 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&gmac_rgmii_pins>;
        phy-handle = <&phy1>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        phy-supply = <&reg_dc1sw>;
        status = "okay";
 };
 };
 
 &reg_dc1sw {
-       regulator-min-microvolt = <3000000>;
-       regulator-max-microvolt = <3000000>;
+       regulator-min-microvolt = <3300000>;
+       regulator-max-microvolt = <3300000>;
        regulator-name = "vcc-gmac-phy";
 };
 
 &reg_dcdc1 {
        regulator-always-on;
-       regulator-min-microvolt = <3000000>;
-       regulator-max-microvolt = <3000000>;
-       regulator-name = "vcc-3v0";
+       regulator-min-microvolt = <3300000>;
+       regulator-max-microvolt = <3300000>;
+       regulator-name = "vcc-3v3";
 };
 
 &reg_dcdc2 {
index d3b337b..484b93d 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&gmac_rgmii_pins>;
        phy-handle = <&phy1>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        phy-supply = <&reg_cldo1>;
        status = "okay";
 };
index bbc6335..5c3580d 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&gmac_rgmii_pins>;
        phy-handle = <&phy1>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        phy-supply = <&reg_cldo1>;
        status = "okay";
 };
index 39263e7..8e5cb3b 100644 (file)
        pinctrl-0 = <&emac_rgmii_pins>;
        phy-supply = <&reg_gmac_3v3>;
        phy-handle = <&ext_rgmii_phy>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
 
        status = "okay";
 };
index e500911..6f1e0f0 100644 (file)
        };
 };
 
+&mdio1 {
+       clock-frequency = <5000000>;
+};
 
 &iomuxc {
        pinctrl_gpio_e6185_eeprom_sel: pinctrl-gpio-e6185-eeprom-spi0 {
index 34793aa..58df9fd 100644 (file)
@@ -81,7 +81,6 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_BINFMT_MISC=y
 CONFIG_CMA=y
 CONFIG_ZSMALLOC=m
-CONFIG_ZSMALLOC_PGTABLE_MAPPING=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
index 3502c2f..baf7d02 100644 (file)
@@ -75,6 +75,8 @@
 #define PTE_HWTABLE_OFF                (PTE_HWTABLE_PTRS * sizeof(pte_t))
 #define PTE_HWTABLE_SIZE       (PTRS_PER_PTE * sizeof(u32))
 
+#define MAX_POSSIBLE_PHYSMEM_BITS      32
+
 /*
  * PMD_SHIFT determines the size of the area a second-level page table can map
  * PGDIR_SHIFT determines what a third-level page table entry can map
index fbb6693..2b85d17 100644 (file)
@@ -25,6 +25,8 @@
 #define PTE_HWTABLE_OFF                (0)
 #define PTE_HWTABLE_SIZE       (PTRS_PER_PTE * sizeof(u64))
 
+#define MAX_POSSIBLE_PHYSMEM_BITS 40
+
 /*
  * PGDIR_SHIFT determines the size a top-level page table entry can map.
  */
index 8e6ace0..9f199b1 100644 (file)
@@ -71,7 +71,7 @@ void arch_cpu_idle(void)
                arm_pm_idle();
        else
                cpu_do_idle();
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 void arch_cpu_idle_prepare(void)
index d841bed..7bb47eb 100644 (file)
@@ -136,7 +136,7 @@ void __init imx_init_revision_from_anatop(void)
 
                        src_np = of_find_compatible_node(NULL, NULL,
                                                     "fsl,imx6ul-src");
-                       src_base = of_iomap(np, 0);
+                       src_base = of_iomap(src_np, 0);
                        of_node_put(src_np);
                        WARN_ON(!src_base);
                        sbmr2 = readl_relaxed(src_base + SRC_SBMR2);
index 9147565..1b9ed12 100644 (file)
@@ -6,9 +6,6 @@
 #ifndef __MEMORY_H
 #define __MEMORY_H
 
-#define MAX_PHYSMEM_BITS       36
-#define SECTION_SIZE_BITS      34
-
 #define KEYSTONE_LOW_PHYS_START                0x80000000ULL
 #define KEYSTONE_LOW_PHYS_SIZE         0x80000000ULL /* 2G */
 #define KEYSTONE_LOW_PHYS_END          (KEYSTONE_LOW_PHYS_START + \
index 144b9ca..a720259 100644 (file)
@@ -288,7 +288,7 @@ static struct gpiod_lookup_table osk_usb_gpio_table = {
        .dev_id = "ohci",
        .table = {
                /* Power GPIO on the I2C-attached TPS65010 */
-               GPIO_LOOKUP("i2c-tps65010", 1, "power", GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP("tps65010", 0, "power", GPIO_ACTIVE_HIGH),
                GPIO_LOOKUP(OMAP_GPIO_LABEL, 9, "overcurrent",
                            GPIO_ACTIVE_HIGH),
        },
index 3ee7bdf..3f62a0c 100644 (file)
@@ -7,7 +7,6 @@ config ARCH_OMAP2
        depends on ARCH_MULTI_V6
        select ARCH_OMAP2PLUS
        select CPU_V6
-       select PM_GENERIC_DOMAINS if PM
        select SOC_HAS_OMAP2_SDRC
 
 config ARCH_OMAP3
@@ -106,6 +105,8 @@ config ARCH_OMAP2PLUS
        select OMAP_DM_TIMER
        select OMAP_GPMC
        select PINCTRL
+       select PM_GENERIC_DOMAINS if PM
+       select PM_GENERIC_DOMAINS_OF if PM
        select RESET_CONTROLLER
        select SOC_BUS
        select TI_SYSC
index a92d277..c8d317f 100644 (file)
@@ -175,8 +175,11 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
                if (mpuss_can_lose_context) {
                        error = cpu_cluster_pm_enter();
                        if (error) {
-                               omap_set_pwrdm_state(mpu_pd, PWRDM_POWER_ON);
-                               goto cpu_cluster_pm_out;
+                               index = 0;
+                               cx = state_ptr + index;
+                               pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state);
+                               omap_set_pwrdm_state(mpu_pd, cx->mpu_state);
+                               mpuss_can_lose_context = 0;
                        }
                }
        }
@@ -184,7 +187,6 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
        omap4_enter_lowpower(dev->cpu, cx->cpu_state);
        cpu_done[dev->cpu] = true;
 
-cpu_cluster_pm_out:
        /* Wakeup CPU1 only if it is not offlined */
        if (dev->cpu == 0 && cpumask_test_cpu(1, cpu_online_mask)) {
 
index 06da274..1963572 100644 (file)
@@ -66,6 +66,7 @@ static const char * const sun8i_board_dt_compat[] = {
        "allwinner,sun8i-h2-plus",
        "allwinner,sun8i-h3",
        "allwinner,sun8i-r40",
+       "allwinner,sun8i-v3",
        "allwinner,sun8i-v3s",
        NULL,
 };
index 8d13b91..39432ac 100644 (file)
@@ -81,6 +81,7 @@ config ARM64
        select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
        select ARCH_WANT_FRAME_POINTERS
        select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
+       select ARCH_WANT_LD_ORPHAN_WARN
        select ARCH_HAS_UBSAN_SANITIZE_ALL
        select ARM_AMBA
        select ARM_ARCH_TIMER
index 5789c2d..6a87d59 100644 (file)
@@ -28,10 +28,6 @@ LDFLAGS_vmlinux      += --fix-cortex-a53-843419
   endif
 endif
 
-# We never want expected sections to be placed heuristically by the
-# linker. All sections should be explicitly named in the linker script.
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
-
 ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS), y)
   ifneq ($(CONFIG_ARM64_LSE_ATOMICS), y)
 $(warning LSE atomics not supported by binutils)
index 3ea5182..e5e840b 100644 (file)
 &emac {
        pinctrl-names = "default";
        pinctrl-0 = <&rgmii_pins>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        phy-handle = <&ext_rgmii_phy>;
        phy-supply = <&reg_dc1sw>;
        status = "okay";
index d894ec5..70e3174 100644 (file)
 &emac {
        pinctrl-names = "default";
        pinctrl-0 = <&rgmii_pins>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        phy-handle = <&ext_rgmii_phy>;
        phy-supply = <&reg_gmac_3v3>;
        status = "okay";
index b26181c..b54099b 100644 (file)
@@ -13,7 +13,7 @@
 &emac {
        pinctrl-names = "default";
        pinctrl-0 = <&rgmii_pins>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-txid";
        phy-handle = <&ext_rgmii_phy>;
        status = "okay";
 };
index 3ab0f03..0494bfa 100644 (file)
        status = "okay";
 
        port {
-               #address-cells = <1>;
-               #size-cells = <0>;
-
                csi_ep: endpoint {
                        remote-endpoint = <&ov5640_ep>;
                        bus-width = <8>;
index 9ebb9e0..d406974 100644 (file)
@@ -79,7 +79,7 @@
 &emac {
        pinctrl-names = "default";
        pinctrl-0 = <&rgmii_pins>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        phy-handle = <&ext_rgmii_phy>;
        phy-supply = <&reg_dc1sw>;
        status = "okay";
index df1b926..6e30a56 100644 (file)
@@ -36,7 +36,7 @@
        pinctrl-0 = <&emac_rgmii_pins>;
        phy-supply = <&reg_gmac_3v3>;
        phy-handle = <&ext_rgmii_phy>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        /delete-property/ allwinner,leds-active-low;
        status = "okay";
 };
index 4f9ba53..9d93fe1 100644 (file)
@@ -96,7 +96,7 @@
        pinctrl-0 = <&emac_rgmii_pins>;
        phy-supply = <&reg_gmac_3v3>;
        phy-handle = <&ext_rgmii_phy>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        status = "okay";
 };
 
index 7d7aad1..8bf2db9 100644 (file)
        pinctrl-0 = <&emac_rgmii_pins>;
        phy-supply = <&reg_gmac_3v3>;
        phy-handle = <&ext_rgmii_phy>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        status = "okay";
 };
 
index cb44bfa..33ab440 100644 (file)
        pinctrl-0 = <&emac_rgmii_pins>;
        phy-supply = <&reg_gmac_3v3>;
        phy-handle = <&ext_rgmii_phy>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        status = "okay";
 };
 
index 3f7ceeb..7c9dbde 100644 (file)
@@ -97,7 +97,7 @@
 &emac {
        pinctrl-names = "default";
        pinctrl-0 = <&ext_rgmii_pins>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        phy-handle = <&ext_rgmii_phy>;
        phy-supply = <&reg_aldo2>;
        status = "okay";
index fceb298..29a081e 100644 (file)
@@ -27,7 +27,7 @@
 &emac {
        pinctrl-names = "default";
        pinctrl-0 = <&ext_rgmii_pins>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        phy-handle = <&ext_rgmii_phy>;
        phy-supply = <&reg_gmac_3v3>;
        allwinner,rx-delay-ps = <200>;
index af85b20..961732c 100644 (file)
 &emac {
        pinctrl-names = "default";
        pinctrl-0 = <&ext_rgmii_pins>;
-       phy-mode = "rgmii";
+       phy-mode = "rgmii-id";
        phy-handle = <&ext_rgmii_phy>;
        phy-supply = <&reg_gmac_3v3>;
        allwinner,rx-delay-ps = <200>;
index feadd21..46e558a 100644 (file)
        flash@0 {
                #address-cells = <1>;
                #size-cells = <1>;
-               compatible = "n25q00a";
+               compatible = "micron,mt25qu02g", "jedec,spi-nor";
                reg = <0>;
                spi-max-frequency = <100000000>;
 
index c079667..f9b4a39 100644 (file)
        flash@0 {
                #address-cells = <1>;
                #size-cells = <1>;
-               compatible = "n25q00a";
+               compatible = "micron,mt25qu02g", "jedec,spi-nor";
                reg = <0>;
                spi-max-frequency = <100000000>;
 
index 55259f9..aef8f2b 100644 (file)
@@ -5,20 +5,20 @@
        usb {
                compatible = "simple-bus";
                dma-ranges;
-               #address-cells = <1>;
-               #size-cells = <1>;
-               ranges = <0x0 0x0 0x68500000 0x00400000>;
+               #address-cells = <2>;
+               #size-cells = <2>;
+               ranges = <0x0 0x0 0x0 0x68500000 0x0 0x00400000>;
 
                usbphy0: usb-phy@0 {
                        compatible = "brcm,sr-usb-combo-phy";
-                       reg = <0x00000000 0x100>;
+                       reg = <0x0 0x00000000 0x0 0x100>;
                        #phy-cells = <1>;
                        status = "disabled";
                };
 
                xhci0: usb@1000 {
                        compatible = "generic-xhci";
-                       reg = <0x00001000 0x1000>;
+                       reg = <0x0 0x00001000 0x0 0x1000>;
                        interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>;
                        phys = <&usbphy0 1>, <&usbphy0 0>;
                        phy-names = "phy0", "phy1";
@@ -28,7 +28,7 @@
 
                bdc0: usb@2000 {
                        compatible = "brcm,bdc-v0.16";
-                       reg = <0x00002000 0x1000>;
+                       reg = <0x0 0x00002000 0x0 0x1000>;
                        interrupts = <GIC_SPI 259 IRQ_TYPE_LEVEL_HIGH>;
                        phys = <&usbphy0 0>, <&usbphy0 1>;
                        phy-names = "phy0", "phy1";
 
                usbphy1: usb-phy@10000 {
                        compatible = "brcm,sr-usb-combo-phy";
-                       reg = <0x00010000 0x100>;
+                       reg = <0x0 0x00010000 0x0 0x100>;
                        #phy-cells = <1>;
                        status = "disabled";
                };
 
                usbphy2: usb-phy@20000 {
                        compatible = "brcm,sr-usb-hs-phy";
-                       reg = <0x00020000 0x100>;
+                       reg = <0x0 0x00020000 0x0 0x100>;
                        #phy-cells = <0>;
                        status = "disabled";
                };
 
                xhci1: usb@11000 {
                        compatible = "generic-xhci";
-                       reg = <0x00011000 0x1000>;
+                       reg = <0x0 0x00011000 0x0 0x1000>;
                        interrupts = <GIC_SPI 263 IRQ_TYPE_LEVEL_HIGH>;
                        phys = <&usbphy1 1>, <&usbphy2>, <&usbphy1 0>;
                        phy-names = "phy0", "phy1", "phy2";
@@ -62,7 +62,7 @@
 
                bdc1: usb@21000 {
                        compatible = "brcm,bdc-v0.16";
-                       reg = <0x00021000 0x1000>;
+                       reg = <0x0 0x00021000 0x0 0x1000>;
                        interrupts = <GIC_SPI 266 IRQ_TYPE_LEVEL_HIGH>;
                        phys = <&usbphy2>;
                        phy-names = "phy0";
index 73e4f94..7a6fb7e 100644 (file)
                        compatible = "fsl,ls1028a-rcpm", "fsl,qoriq-rcpm-2.1+";
                        reg = <0x0 0x1e34040 0x0 0x1c>;
                        #fsl,rcpm-wakeup-cells = <7>;
+                       little-endian;
                };
 
                ftm_alarm0: timer@2800000 {
index ff58052..692d8f4 100644 (file)
                        compatible = "fsl,ls1088a-rcpm", "fsl,qoriq-rcpm-2.1+";
                        reg = <0x0 0x1e34040 0x0 0x18>;
                        #fsl,rcpm-wakeup-cells = <6>;
+                       little-endian;
                };
 
                ftm_alarm0: timer@2800000 {
index bf72918..e7abb74 100644 (file)
                        compatible = "fsl,ls208xa-rcpm", "fsl,qoriq-rcpm-2.1+";
                        reg = <0x0 0x1e34040 0x0 0x18>;
                        #fsl,rcpm-wakeup-cells = <6>;
+                       little-endian;
                };
 
                ftm_alarm0: timer@2800000 {
index 6de86a4..b88c3c9 100644 (file)
@@ -72,6 +72,7 @@
        pmic@4b {
                compatible = "rohm,bd71847";
                reg = <0x4b>;
+               pinctrl-names = "default";
                pinctrl-0 = <&pinctrl_pmic>;
                interrupt-parent = <&gpio1>;
                interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
                host-wakeup-gpios = <&gpio2 8 GPIO_ACTIVE_HIGH>;
                device-wakeup-gpios = <&gpio2 7 GPIO_ACTIVE_HIGH>;
                clocks = <&osc_32k>;
+               max-speed = <4000000>;
                clock-names = "extclk";
        };
 };
index f305a53..521eb3a 100644 (file)
        pmic@4b {
                compatible = "rohm,bd71847";
                reg = <0x4b>;
+               pinctrl-names = "default";
                pinctrl-0 = <&pinctrl_pmic>;
                interrupt-parent = <&gpio1>;
                interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
index 4107fe9..4908252 100644 (file)
        pmic@4b {
                compatible = "rohm,bd71847";
                reg = <0x4b>;
+               pinctrl-names = "default";
                pinctrl-0 = <&pinctrl_pmic>;
                interrupt-parent = <&gpio2>;
-               /*
-                * The interrupt is not correct. It should be level low,
-                * however with internal pull up this causes IRQ storm.
-                */
-               interrupts = <8 IRQ_TYPE_EDGE_RISING>;
+               interrupts = <8 IRQ_TYPE_LEVEL_LOW>;
                rohm,reset-snvs-powered;
 
                #clock-cells = <0>;
 
        pinctrl_pmic: pmicirqgrp {
                fsl,pins = <
-                       MX8MM_IOMUXC_SD1_DATA6_GPIO2_IO8        0x41
+                       MX8MM_IOMUXC_SD1_DATA6_GPIO2_IO8        0x141
                >;
        };
 
index b83f400..05ee062 100644 (file)
 
                opp-1600000000 {
                        opp-hz = /bits/ 64 <1600000000>;
-                       opp-microvolt = <900000>;
+                       opp-microvolt = <950000>;
                        opp-supported-hw = <0xc>, <0x7>;
                        clock-latency-ns = <150000>;
                        opp-suspend;
index 46e76cf..7dfee71 100644 (file)
@@ -53,6 +53,7 @@
        pmic@4b {
                compatible = "rohm,bd71847";
                reg = <0x4b>;
+               pinctrl-names = "default";
                pinctrl-0 = <&pinctrl_pmic>;
                interrupt-parent = <&gpio1>;
                interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
index 707d848..8311b95 100644 (file)
@@ -18,6 +18,7 @@
        pmic: pmic@25 {
                compatible = "nxp,pca9450b";
                reg = <0x25>;
+               pinctrl-names = "default";
                pinctrl-0 = <&pinctrl_pmic>;
                interrupt-parent = <&gpio1>;
                interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
index a2d0190..7f356ed 100644 (file)
        pmic@4b {
                compatible = "rohm,bd71847";
                reg = <0x4b>;
+               pinctrl-names = "default";
                pinctrl-0 = <&pinctrl_pmic>;
                interrupt-parent = <&gpio2>;
-               /*
-                * The interrupt is not correct. It should be level low,
-                * however with internal pull up this causes IRQ storm.
-                */
-               interrupts = <8 IRQ_TYPE_EDGE_RISING>;
+               interrupts = <8 IRQ_TYPE_LEVEL_LOW>;
                rohm,reset-snvs-powered;
 
                regulators {
 
        pinctrl_pmic: pmicirqgrp {
                fsl,pins = <
-                       MX8MN_IOMUXC_SD1_DATA6_GPIO2_IO8        0x101
+                       MX8MN_IOMUXC_SD1_DATA6_GPIO2_IO8        0x141
                >;
        };
 
index 746faf1..16c7202 100644 (file)
                                #index-cells = <1>;
                                reg = <0x32e40200 0x200>;
                        };
-
-                       usbotg2: usb@32e50000 {
-                               compatible = "fsl,imx8mn-usb", "fsl,imx7d-usb";
-                               reg = <0x32e50000 0x200>;
-                               interrupts = <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>;
-                               clocks = <&clk IMX8MN_CLK_USB1_CTRL_ROOT>;
-                               clock-names = "usb1_ctrl_root_clk";
-                               assigned-clocks = <&clk IMX8MN_CLK_USB_BUS>,
-                                                 <&clk IMX8MN_CLK_USB_CORE_REF>;
-                               assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_500M>,
-                                                        <&clk IMX8MN_SYS_PLL1_100M>;
-                               fsl,usbphy = <&usbphynop2>;
-                               fsl,usbmisc = <&usbmisc2 0>;
-                               status = "disabled";
-                       };
-
-                       usbmisc2: usbmisc@32e50200 {
-                               compatible = "fsl,imx8mn-usbmisc", "fsl,imx7d-usbmisc";
-                               #index-cells = <1>;
-                               reg = <0x32e50200 0x200>;
-                       };
-
                };
 
                dma_apbh: dma-controller@33000000 {
                assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_100M>;
                clock-names = "main_clk";
        };
-
-       usbphynop2: usbphynop2 {
-               compatible = "usb-nop-xceiv";
-               clocks = <&clk IMX8MN_CLK_USB_PHY_REF>;
-               assigned-clocks = <&clk IMX8MN_CLK_USB_PHY_REF>;
-               assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_100M>;
-               clock-names = "main_clk";
-       };
 };
index 8bc6caa..4338db1 100644 (file)
@@ -19,6 +19,7 @@ fman0: fman@1a00000 {
        clock-names = "fmanclk";
        fsl,qman-channel-range = <0x800 0x10>;
        ptimer-handle = <&ptp_timer0>;
+       dma-coherent;
 
        muram@0 {
                compatible = "fsl,fman-muram";
index 96c50d4..a7a83f2 100644 (file)
        flash@0 {
                #address-cells = <1>;
                #size-cells = <1>;
-               compatible = "mt25qu02g";
+               compatible = "micron,mt25qu02g", "jedec,spi-nor";
                reg = <0>;
                spi-max-frequency = <100000000>;
 
index 381a849..c28d51c 100644 (file)
        model = "NVIDIA Jetson TX2 Developer Kit";
        compatible = "nvidia,p2771-0000", "nvidia,tegra186";
 
-       aconnect {
-               status = "okay";
-
-               dma-controller@2930000 {
-                       status = "okay";
-               };
-
-               interrupt-controller@2a40000 {
-                       status = "okay";
-               };
-       };
-
        i2c@3160000 {
                power-monitor@42 {
                        compatible = "ti,ina3221";
index a2893be..0dc8304 100644 (file)
@@ -54,7 +54,7 @@
                        status = "okay";
                };
 
-               serial@c280000 {
+               serial@3100000 {
                        status = "okay";
                };
 
index e9c90f0..93438d2 100644 (file)
 
                hsp_aon: hsp@c150000 {
                        compatible = "nvidia,tegra194-hsp", "nvidia,tegra186-hsp";
-                       reg = <0x0c150000 0xa0000>;
+                       reg = <0x0c150000 0x90000>;
                        interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>,
                                     <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
                                     <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
index e18e1a9..a9caaf7 100644 (file)
                vin-supply = <&vdd_5v0_sys>;
        };
 
-       vdd_usb_vbus_otg: regulator@11 {
-               compatible = "regulator-fixed";
-               regulator-name = "USB_VBUS_EN0";
-               regulator-min-microvolt = <5000000>;
-               regulator-max-microvolt = <5000000>;
-               gpio = <&gpio TEGRA_GPIO(CC, 4) GPIO_ACTIVE_HIGH>;
-               enable-active-high;
-               vin-supply = <&vdd_5v0_sys>;
-       };
-
        vdd_hdmi: regulator@10 {
                compatible = "regulator-fixed";
                regulator-name = "VDD_HDMI_5V0";
                enable-active-high;
                vin-supply = <&vdd_3v3_sys>;
        };
+
+       vdd_usb_vbus_otg: regulator@14 {
+               compatible = "regulator-fixed";
+               regulator-name = "USB_VBUS_EN0";
+               regulator-min-microvolt = <5000000>;
+               regulator-max-microvolt = <5000000>;
+               gpio = <&gpio TEGRA_GPIO(CC, 4) GPIO_ACTIVE_HIGH>;
+               enable-active-high;
+               vin-supply = <&vdd_5v0_sys>;
+       };
 };
index f6e6a24..b5d9a55 100644 (file)
@@ -8,7 +8,7 @@
        compatible = "nvidia,tegra234-vdk", "nvidia,tegra234";
 
        aliases {
-               sdhci3 = "/cbb@0/sdhci@3460000";
+               mmc3 = "/bus@0/mmc@3460000";
                serial0 = &uarta;
        };
 
                stdout-path = "serial0:115200n8";
        };
 
-       cbb@0 {
+       bus@0 {
                serial@3100000 {
                        status = "okay";
                };
 
-               sdhci@3460000 {
+               mmc@3460000 {
                        status = "okay";
                        bus-width = <8>;
                        non-removable;
index a94dac7..59e0cbf 100644 (file)
        };
 
        soc: soc {
-               #address-cells = <1>;
-               #size-cells = <1>;
-               ranges = <0 0 0 0xffffffff>;
+               #address-cells = <2>;
+               #size-cells = <2>;
+               ranges = <0 0 0 0 0x0 0xffffffff>;
                dma-ranges;
                compatible = "simple-bus";
 
                prng: qrng@e1000 {
                        compatible = "qcom,prng-ee";
-                       reg = <0xe3000 0x1000>;
+                       reg = <0x0 0xe3000 0x0 0x1000>;
                        clocks = <&gcc GCC_PRNG_AHB_CLK>;
                        clock-names = "core";
                };
 
                cryptobam: dma@704000 {
                        compatible = "qcom,bam-v1.7.0";
-                       reg = <0x00704000 0x20000>;
+                       reg = <0x0 0x00704000 0x0 0x20000>;
                        interrupts = <GIC_SPI 207 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&gcc GCC_CRYPTO_AHB_CLK>;
                        clock-names = "bam_clk";
 
                crypto: crypto@73a000 {
                        compatible = "qcom,crypto-v5.1";
-                       reg = <0x0073a000 0x6000>;
+                       reg = <0x0 0x0073a000 0x0 0x6000>;
                        clocks = <&gcc GCC_CRYPTO_AHB_CLK>,
                                <&gcc GCC_CRYPTO_AXI_CLK>,
                                <&gcc GCC_CRYPTO_CLK>;
 
                tlmm: pinctrl@1000000 {
                        compatible = "qcom,ipq6018-pinctrl";
-                       reg = <0x01000000 0x300000>;
+                       reg = <0x0 0x01000000 0x0 0x300000>;
                        interrupts = <GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>;
                        gpio-controller;
                        #gpio-cells = <2>;
 
                gcc: gcc@1800000 {
                        compatible = "qcom,gcc-ipq6018";
-                       reg = <0x01800000 0x80000>;
+                       reg = <0x0 0x01800000 0x0 0x80000>;
                        clocks = <&xo>, <&sleep_clk>;
                        clock-names = "xo", "sleep_clk";
                        #clock-cells = <1>;
 
                tcsr_mutex_regs: syscon@1905000 {
                        compatible = "syscon";
-                       reg = <0x01905000 0x8000>;
+                       reg = <0x0 0x01905000 0x0 0x8000>;
                };
 
                tcsr_q6: syscon@1945000 {
                        compatible = "syscon";
-                       reg = <0x01945000 0xe000>;
+                       reg = <0x0 0x01945000 0x0 0xe000>;
                };
 
                blsp_dma: dma@7884000 {
                        compatible = "qcom,bam-v1.7.0";
-                       reg = <0x07884000 0x2b000>;
+                       reg = <0x0 0x07884000 0x0 0x2b000>;
                        interrupts = <GIC_SPI 238 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&gcc GCC_BLSP1_AHB_CLK>;
                        clock-names = "bam_clk";
 
                blsp1_uart3: serial@78b1000 {
                        compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm";
-                       reg = <0x078b1000 0x200>;
+                       reg = <0x0 0x078b1000 0x0 0x200>;
                        interrupts = <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&gcc GCC_BLSP1_UART3_APPS_CLK>,
                                <&gcc GCC_BLSP1_AHB_CLK>;
                        compatible = "qcom,spi-qup-v2.2.1";
                        #address-cells = <1>;
                        #size-cells = <0>;
-                       reg = <0x078b5000 0x600>;
+                       reg = <0x0 0x078b5000 0x0 0x600>;
                        interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;
                        spi-max-frequency = <50000000>;
                        clocks = <&gcc GCC_BLSP1_QUP1_SPI_APPS_CLK>,
                        compatible = "qcom,spi-qup-v2.2.1";
                        #address-cells = <1>;
                        #size-cells = <0>;
-                       reg = <0x078b6000 0x600>;
+                       reg = <0x0 0x078b6000 0x0 0x600>;
                        interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
                        spi-max-frequency = <50000000>;
                        clocks = <&gcc GCC_BLSP1_QUP2_SPI_APPS_CLK>,
                        compatible = "qcom,i2c-qup-v2.2.1";
                        #address-cells = <1>;
                        #size-cells = <0>;
-                       reg = <0x078b6000 0x600>;
+                       reg = <0x0 0x078b6000 0x0 0x600>;
                        interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&gcc GCC_BLSP1_AHB_CLK>,
                                <&gcc GCC_BLSP1_QUP2_I2C_APPS_CLK>;
                        compatible = "qcom,i2c-qup-v2.2.1";
                        #address-cells = <1>;
                        #size-cells = <0>;
-                       reg = <0x078b7000 0x600>;
+                       reg = <0x0 0x078b7000 0x0 0x600>;
                        interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&gcc GCC_BLSP1_AHB_CLK>,
                                <&gcc GCC_BLSP1_QUP3_I2C_APPS_CLK>;
                        compatible = "qcom,msm-qgic2";
                        interrupt-controller;
                        #interrupt-cells = <0x3>;
-                       reg =   <0x0b000000 0x1000>,  /*GICD*/
-                               <0x0b002000 0x1000>,  /*GICC*/
-                               <0x0b001000 0x1000>,  /*GICH*/
-                               <0x0b004000 0x1000>;  /*GICV*/
+                       reg =   <0x0 0x0b000000 0x0 0x1000>,  /*GICD*/
+                               <0x0 0x0b002000 0x0 0x1000>,  /*GICC*/
+                               <0x0 0x0b001000 0x0 0x1000>,  /*GICH*/
+                               <0x0 0x0b004000 0x0 0x1000>;  /*GICV*/
                        interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
                };
 
                watchdog@b017000 {
                        compatible = "qcom,kpss-wdt";
                        interrupts = <GIC_SPI 3 IRQ_TYPE_EDGE_RISING>;
-                       reg = <0x0b017000 0x40>;
+                       reg = <0x0 0x0b017000 0x0 0x40>;
                        clocks = <&sleep_clk>;
                        timeout-sec = <10>;
                };
 
                apcs_glb: mailbox@b111000 {
                        compatible = "qcom,ipq6018-apcs-apps-global";
-                       reg = <0x0b111000 0x1000>;
+                       reg = <0x0 0x0b111000 0x0 0x1000>;
                        #clock-cells = <1>;
                        clocks = <&a53pll>, <&xo>;
                        clock-names = "pll", "xo";
 
                a53pll: clock@b116000 {
                        compatible = "qcom,ipq6018-a53pll";
-                       reg = <0x0b116000 0x40>;
+                       reg = <0x0 0x0b116000 0x0 0x40>;
                        #clock-cells = <0>;
                        clocks = <&xo>;
                        clock-names = "xo";
                };
 
                timer@b120000 {
-                       #address-cells = <1>;
-                       #size-cells = <1>;
+                       #address-cells = <2>;
+                       #size-cells = <2>;
                        ranges;
                        compatible = "arm,armv7-timer-mem";
-                       reg = <0x0b120000 0x1000>;
+                       reg = <0x0 0x0b120000 0x0 0x1000>;
                        clock-frequency = <19200000>;
 
                        frame@b120000 {
                                frame-number = <0>;
                                interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
                                             <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
-                               reg = <0x0b121000 0x1000>,
-                                     <0x0b122000 0x1000>;
+                               reg = <0x0 0x0b121000 0x0 0x1000>,
+                                     <0x0 0x0b122000 0x0 0x1000>;
                        };
 
                        frame@b123000 {
                                frame-number = <1>;
                                interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;
-                               reg = <0xb123000 0x1000>;
+                               reg = <0x0 0xb123000 0x0 0x1000>;
                                status = "disabled";
                        };
 
                        frame@b124000 {
                                frame-number = <2>;
                                interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
-                               reg = <0x0b124000 0x1000>;
+                               reg = <0x0 0x0b124000 0x0 0x1000>;
                                status = "disabled";
                        };
 
                        frame@b125000 {
                                frame-number = <3>;
                                interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
-                               reg = <0x0b125000 0x1000>;
+                               reg = <0x0 0x0b125000 0x0 0x1000>;
                                status = "disabled";
                        };
 
                        frame@b126000 {
                                frame-number = <4>;
                                interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
-                               reg = <0x0b126000 0x1000>;
+                               reg = <0x0 0x0b126000 0x0 0x1000>;
                                status = "disabled";
                        };
 
                        frame@b127000 {
                                frame-number = <5>;
                                interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
-                               reg = <0x0b127000 0x1000>;
+                               reg = <0x0 0x0b127000 0x0 0x1000>;
                                status = "disabled";
                        };
 
                        frame@b128000 {
                                frame-number = <6>;
                                interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
-                               reg = <0x0b128000 0x1000>;
+                               reg = <0x0 0x0b128000 0x0 0x1000>;
                                status = "disabled";
                        };
                };
 
                q6v5_wcss: remoteproc@cd00000 {
                        compatible = "qcom,ipq8074-wcss-pil";
-                       reg = <0x0cd00000 0x4040>,
-                               <0x004ab000 0x20>;
+                       reg = <0x0 0x0cd00000 0x0 0x4040>,
+                             <0x0 0x004ab000 0x0 0x20>;
                        reg-names = "qdsp6",
                                    "rmb";
                        interrupts-extended = <&intc GIC_SPI 325 IRQ_TYPE_EDGE_RISING>,
index 9cbf963..c296434 100644 (file)
                clock-frequency = <0>;
        };
 
+       audio_clk_b: audio_clk_b {
+               compatible = "fixed-clock";
+               #clock-cells = <0>;
+               clock-frequency = <0>;
+       };
+
        audio_clk_c: audio_clk_c {
                compatible = "fixed-clock";
                #clock-cells = <0>;
index 35bd6b9..3376810 100644 (file)
                interrupts = <RK_PB2 IRQ_TYPE_LEVEL_LOW>;
                pinctrl-names = "default";
                pinctrl-0 = <&pmic_int>;
-               rockchip,system-power-controller;
                wakeup-source;
                #clock-cells = <1>;
                clock-output-names = "rk808-clkout1", "xin32k";
index be7a31d..2ee07d1 100644 (file)
@@ -20,7 +20,7 @@
        gmac_clk: gmac-clock {
                compatible = "fixed-clock";
                clock-frequency = <125000000>;
-               clock-output-names = "gmac_clk";
+               clock-output-names = "gmac_clkin";
                #clock-cells = <0>;
        };
 
index e7a459f..2030907 100644 (file)
                        label = "red:diy";
                        gpios = <&gpio0 RK_PB5 GPIO_ACTIVE_HIGH>;
                        default-state = "off";
-                       linux,default-trigger = "mmc1";
+                       linux,default-trigger = "mmc2";
                };
 
                yellow_led: led-2 {
                        label = "yellow:yellow-led";
                        gpios = <&gpio0 RK_PA2 GPIO_ACTIVE_HIGH>;
                        default-state = "off";
-                       linux,default-trigger = "mmc0";
+                       linux,default-trigger = "mmc1";
                };
        };
 
index ada724b..7a9a7ac 100644 (file)
@@ -29,6 +29,9 @@
                i2c6 = &i2c6;
                i2c7 = &i2c7;
                i2c8 = &i2c8;
+               mmc0 = &sdio0;
+               mmc1 = &sdmmc;
+               mmc2 = &sdhci;
                serial0 = &uart0;
                serial1 = &uart1;
                serial2 = &uart2;
index ec213b4..1c26d7b 100644 (file)
@@ -128,6 +128,9 @@ static inline void local_daif_inherit(struct pt_regs *regs)
 {
        unsigned long flags = regs->pstate & DAIF_MASK;
 
+       if (interrupts_enabled(regs))
+               trace_hardirqs_on();
+
        /*
         * We can't use local_daif_restore(regs->pstate) here as
         * system_has_prio_mask_debugging() won't restore the I bit if it can
index 22c81f1..85a3e49 100644 (file)
 /* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */
 #define ESR_ELx_FSC            (0x3F)
 #define ESR_ELx_FSC_TYPE       (0x3C)
+#define ESR_ELx_FSC_LEVEL      (0x03)
 #define ESR_ELx_FSC_EXTABT     (0x10)
 #define ESR_ELx_FSC_SERROR     (0x11)
 #define ESR_ELx_FSC_ACCESS     (0x08)
index 99b9383..0756191 100644 (file)
@@ -31,7 +31,12 @@ static inline u32 disr_to_esr(u64 disr)
        return esr;
 }
 
+asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs);
+asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs);
 asmlinkage void enter_from_user_mode(void);
+asmlinkage void exit_to_user_mode(void);
+void arm64_enter_nmi(struct pt_regs *regs);
+void arm64_exit_nmi(struct pt_regs *regs);
 void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
 void do_undefinstr(struct pt_regs *regs);
 void do_bti(struct pt_regs *regs);
index c8f550a..f612c09 100644 (file)
@@ -331,6 +331,11 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vc
        return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE;
 }
 
+static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu)
+{
+       return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL;
+}
+
 static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
 {
        switch (kvm_vcpu_trap_get_fault(vcpu)) {
index 4ff12a7..5628289 100644 (file)
@@ -115,8 +115,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 #define pte_valid(pte)         (!!(pte_val(pte) & PTE_VALID))
 #define pte_valid_not_user(pte) \
        ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
-#define pte_valid_young(pte) \
-       ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
 #define pte_valid_user(pte) \
        ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
 
@@ -124,9 +122,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
  * Could the pte be present in the TLB? We must check mm_tlb_flush_pending
  * so that we don't erroneously return false for pages that have been
  * remapped as PROT_NONE but are yet to be flushed from the TLB.
+ * Note that we can't make any assumptions based on the state of the access
+ * flag, since ptep_clear_flush_young() elides a DSB when invalidating the
+ * TLB.
  */
 #define pte_accessible(mm, pte)        \
-       (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte))
+       (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
 
 /*
  * p??_access_permitted() is true for valid user mappings (subject to the
@@ -164,13 +165,6 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)
        return pmd;
 }
 
-static inline pte_t pte_wrprotect(pte_t pte)
-{
-       pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
-       pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
-       return pte;
-}
-
 static inline pte_t pte_mkwrite(pte_t pte)
 {
        pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
@@ -196,6 +190,20 @@ static inline pte_t pte_mkdirty(pte_t pte)
        return pte;
 }
 
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+       /*
+        * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
+        * clear), set the PTE_DIRTY bit.
+        */
+       if (pte_hw_dirty(pte))
+               pte = pte_mkdirty(pte);
+
+       pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
+       pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+       return pte;
+}
+
 static inline pte_t pte_mkold(pte_t pte)
 {
        return clear_pte_bit(pte, __pgprot(PTE_AF));
@@ -845,12 +853,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
        pte = READ_ONCE(*ptep);
        do {
                old_pte = pte;
-               /*
-                * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
-                * clear), set the PTE_DIRTY bit.
-                */
-               if (pte_hw_dirty(pte))
-                       pte = pte_mkdirty(pte);
                pte = pte_wrprotect(pte);
                pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
                                               pte_val(old_pte), pte_val(pte));
index 4266262..0069467 100644 (file)
@@ -7,6 +7,8 @@
 #ifndef _ARM_PROBES_H
 #define _ARM_PROBES_H
 
+#include <asm/insn.h>
+
 typedef u32 probe_opcode_t;
 typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *);
 
index 2bb53bc..e58bca8 100644 (file)
@@ -197,6 +197,10 @@ struct pt_regs {
        /* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */
        u64 pmr_save;
        u64 stackframe[2];
+
+       /* Only valid for some EL1 exceptions. */
+       u64 lockdep_hardirqs;
+       u64 exit_rcu;
 };
 
 static inline bool in_syscall(struct pt_regs const *regs)
index 82521cd..8b5e7e5 100644 (file)
 #define SYS_TFSR_EL1_TF0_SHIFT 0
 #define SYS_TFSR_EL1_TF1_SHIFT 1
 #define SYS_TFSR_EL1_TF0       (UL(1) << SYS_TFSR_EL1_TF0_SHIFT)
-#define SYS_TFSR_EL1_TF1       (UK(2) << SYS_TFSR_EL1_TF1_SHIFT)
+#define SYS_TFSR_EL1_TF1       (UL(1) << SYS_TFSR_EL1_TF1_SHIFT)
 
 /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
 #define SYS_MPIDR_SAFE_VAL     (BIT(31))
index 43d4c32..70e0a75 100644 (file)
 #include <asm/mmu.h>
 #include <asm/sysreg.h>
 
-static void notrace el1_abort(struct pt_regs *regs, unsigned long esr)
+/*
+ * This is intended to match the logic in irqentry_enter(), handling the kernel
+ * mode transitions only.
+ */
+static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
+{
+       regs->exit_rcu = false;
+
+       if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
+               lockdep_hardirqs_off(CALLER_ADDR0);
+               rcu_irq_enter();
+               trace_hardirqs_off_finish();
+
+               regs->exit_rcu = true;
+               return;
+       }
+
+       lockdep_hardirqs_off(CALLER_ADDR0);
+       rcu_irq_enter_check_tick();
+       trace_hardirqs_off_finish();
+}
+
+/*
+ * This is intended to match the logic in irqentry_exit(), handling the kernel
+ * mode transitions only, and with preemption handled elsewhere.
+ */
+static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
+{
+       lockdep_assert_irqs_disabled();
+
+       if (interrupts_enabled(regs)) {
+               if (regs->exit_rcu) {
+                       trace_hardirqs_on_prepare();
+                       lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+                       rcu_irq_exit();
+                       lockdep_hardirqs_on(CALLER_ADDR0);
+                       return;
+               }
+
+               trace_hardirqs_on();
+       } else {
+               if (regs->exit_rcu)
+                       rcu_irq_exit();
+       }
+}
+
+void noinstr arm64_enter_nmi(struct pt_regs *regs)
+{
+       regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
+
+       __nmi_enter();
+       lockdep_hardirqs_off(CALLER_ADDR0);
+       lockdep_hardirq_enter();
+       rcu_nmi_enter();
+
+       trace_hardirqs_off_finish();
+       ftrace_nmi_enter();
+}
+
+void noinstr arm64_exit_nmi(struct pt_regs *regs)
+{
+       bool restore = regs->lockdep_hardirqs;
+
+       ftrace_nmi_exit();
+       if (restore) {
+               trace_hardirqs_on_prepare();
+               lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+       }
+
+       rcu_nmi_exit();
+       lockdep_hardirq_exit();
+       if (restore)
+               lockdep_hardirqs_on(CALLER_ADDR0);
+       __nmi_exit();
+}
+
+asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
+{
+       if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
+               arm64_enter_nmi(regs);
+       else
+               enter_from_kernel_mode(regs);
+}
+
+asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
+{
+       if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
+               arm64_exit_nmi(regs);
+       else
+               exit_to_kernel_mode(regs);
+}
+
+static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
 {
        unsigned long far = read_sysreg(far_el1);
 
+       enter_from_kernel_mode(regs);
        local_daif_inherit(regs);
        far = untagged_addr(far);
        do_mem_abort(far, esr, regs);
+       local_daif_mask();
+       exit_to_kernel_mode(regs);
 }
-NOKPROBE_SYMBOL(el1_abort);
 
-static void notrace el1_pc(struct pt_regs *regs, unsigned long esr)
+static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr)
 {
        unsigned long far = read_sysreg(far_el1);
 
+       enter_from_kernel_mode(regs);
        local_daif_inherit(regs);
        do_sp_pc_abort(far, esr, regs);
+       local_daif_mask();
+       exit_to_kernel_mode(regs);
 }
-NOKPROBE_SYMBOL(el1_pc);
 
-static void notrace el1_undef(struct pt_regs *regs)
+static void noinstr el1_undef(struct pt_regs *regs)
 {
+       enter_from_kernel_mode(regs);
        local_daif_inherit(regs);
        do_undefinstr(regs);
+       local_daif_mask();
+       exit_to_kernel_mode(regs);
 }
-NOKPROBE_SYMBOL(el1_undef);
 
-static void notrace el1_inv(struct pt_regs *regs, unsigned long esr)
+static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr)
 {
+       enter_from_kernel_mode(regs);
        local_daif_inherit(regs);
        bad_mode(regs, 0, esr);
+       local_daif_mask();
+       exit_to_kernel_mode(regs);
 }
-NOKPROBE_SYMBOL(el1_inv);
 
-static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr)
+static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
+{
+       regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
+
+       lockdep_hardirqs_off(CALLER_ADDR0);
+       rcu_nmi_enter();
+
+       trace_hardirqs_off_finish();
+}
+
+static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
+{
+       bool restore = regs->lockdep_hardirqs;
+
+       if (restore) {
+               trace_hardirqs_on_prepare();
+               lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+       }
+
+       rcu_nmi_exit();
+       if (restore)
+               lockdep_hardirqs_on(CALLER_ADDR0);
+}
+
+static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
 {
        unsigned long far = read_sysreg(far_el1);
 
@@ -62,18 +186,21 @@ static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr)
        if (system_uses_irq_prio_masking())
                gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
 
+       arm64_enter_el1_dbg(regs);
        do_debug_exception(far, esr, regs);
+       arm64_exit_el1_dbg(regs);
 }
-NOKPROBE_SYMBOL(el1_dbg);
 
-static void notrace el1_fpac(struct pt_regs *regs, unsigned long esr)
+static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
 {
+       enter_from_kernel_mode(regs);
        local_daif_inherit(regs);
        do_ptrauth_fault(regs, esr);
+       local_daif_mask();
+       exit_to_kernel_mode(regs);
 }
-NOKPROBE_SYMBOL(el1_fpac);
 
-asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
+asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
 {
        unsigned long esr = read_sysreg(esr_el1);
 
@@ -106,20 +233,34 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
                el1_inv(regs, esr);
        }
 }
-NOKPROBE_SYMBOL(el1_sync_handler);
 
-static void notrace el0_da(struct pt_regs *regs, unsigned long esr)
+asmlinkage void noinstr enter_from_user_mode(void)
+{
+       lockdep_hardirqs_off(CALLER_ADDR0);
+       CT_WARN_ON(ct_state() != CONTEXT_USER);
+       user_exit_irqoff();
+       trace_hardirqs_off_finish();
+}
+
+asmlinkage void noinstr exit_to_user_mode(void)
+{
+       trace_hardirqs_on_prepare();
+       lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+       user_enter_irqoff();
+       lockdep_hardirqs_on(CALLER_ADDR0);
+}
+
+static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
 {
        unsigned long far = read_sysreg(far_el1);
 
-       user_exit_irqoff();
+       enter_from_user_mode();
        local_daif_restore(DAIF_PROCCTX);
        far = untagged_addr(far);
        do_mem_abort(far, esr, regs);
 }
-NOKPROBE_SYMBOL(el0_da);
 
-static void notrace el0_ia(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
 {
        unsigned long far = read_sysreg(far_el1);
 
@@ -131,90 +272,80 @@ static void notrace el0_ia(struct pt_regs *regs, unsigned long esr)
        if (!is_ttbr0_addr(far))
                arm64_apply_bp_hardening();
 
-       user_exit_irqoff();
+       enter_from_user_mode();
        local_daif_restore(DAIF_PROCCTX);
        do_mem_abort(far, esr, regs);
 }
-NOKPROBE_SYMBOL(el0_ia);
 
-static void notrace el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
 {
-       user_exit_irqoff();
+       enter_from_user_mode();
        local_daif_restore(DAIF_PROCCTX);
        do_fpsimd_acc(esr, regs);
 }
-NOKPROBE_SYMBOL(el0_fpsimd_acc);
 
-static void notrace el0_sve_acc(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
 {
-       user_exit_irqoff();
+       enter_from_user_mode();
        local_daif_restore(DAIF_PROCCTX);
        do_sve_acc(esr, regs);
 }
-NOKPROBE_SYMBOL(el0_sve_acc);
 
-static void notrace el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
 {
-       user_exit_irqoff();
+       enter_from_user_mode();
        local_daif_restore(DAIF_PROCCTX);
        do_fpsimd_exc(esr, regs);
 }
-NOKPROBE_SYMBOL(el0_fpsimd_exc);
 
-static void notrace el0_sys(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr)
 {
-       user_exit_irqoff();
+       enter_from_user_mode();
        local_daif_restore(DAIF_PROCCTX);
        do_sysinstr(esr, regs);
 }
-NOKPROBE_SYMBOL(el0_sys);
 
-static void notrace el0_pc(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
 {
        unsigned long far = read_sysreg(far_el1);
 
        if (!is_ttbr0_addr(instruction_pointer(regs)))
                arm64_apply_bp_hardening();
 
-       user_exit_irqoff();
+       enter_from_user_mode();
        local_daif_restore(DAIF_PROCCTX);
        do_sp_pc_abort(far, esr, regs);
 }
-NOKPROBE_SYMBOL(el0_pc);
 
-static void notrace el0_sp(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
 {
-       user_exit_irqoff();
+       enter_from_user_mode();
        local_daif_restore(DAIF_PROCCTX);
        do_sp_pc_abort(regs->sp, esr, regs);
 }
-NOKPROBE_SYMBOL(el0_sp);
 
-static void notrace el0_undef(struct pt_regs *regs)
+static void noinstr el0_undef(struct pt_regs *regs)
 {
-       user_exit_irqoff();
+       enter_from_user_mode();
        local_daif_restore(DAIF_PROCCTX);
        do_undefinstr(regs);
 }
-NOKPROBE_SYMBOL(el0_undef);
 
-static void notrace el0_bti(struct pt_regs *regs)
+static void noinstr el0_bti(struct pt_regs *regs)
 {
-       user_exit_irqoff();
+       enter_from_user_mode();
        local_daif_restore(DAIF_PROCCTX);
        do_bti(regs);
 }
-NOKPROBE_SYMBOL(el0_bti);
 
-static void notrace el0_inv(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
 {
-       user_exit_irqoff();
+       enter_from_user_mode();
        local_daif_restore(DAIF_PROCCTX);
        bad_el0_sync(regs, 0, esr);
 }
-NOKPROBE_SYMBOL(el0_inv);
 
-static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
 {
        /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
        unsigned long far = read_sysreg(far_el1);
@@ -222,30 +353,28 @@ static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr)
        if (system_uses_irq_prio_masking())
                gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
 
-       user_exit_irqoff();
+       enter_from_user_mode();
        do_debug_exception(far, esr, regs);
        local_daif_restore(DAIF_PROCCTX_NOIRQ);
 }
-NOKPROBE_SYMBOL(el0_dbg);
 
-static void notrace el0_svc(struct pt_regs *regs)
+static void noinstr el0_svc(struct pt_regs *regs)
 {
        if (system_uses_irq_prio_masking())
                gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
 
+       enter_from_user_mode();
        do_el0_svc(regs);
 }
-NOKPROBE_SYMBOL(el0_svc);
 
-static void notrace el0_fpac(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
 {
-       user_exit_irqoff();
+       enter_from_user_mode();
        local_daif_restore(DAIF_PROCCTX);
        do_ptrauth_fault(regs, esr);
 }
-NOKPROBE_SYMBOL(el0_fpac);
 
-asmlinkage void notrace el0_sync_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs)
 {
        unsigned long esr = read_sysreg(esr_el1);
 
@@ -297,27 +426,25 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs)
                el0_inv(regs, esr);
        }
 }
-NOKPROBE_SYMBOL(el0_sync_handler);
 
 #ifdef CONFIG_COMPAT
-static void notrace el0_cp15(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
 {
-       user_exit_irqoff();
+       enter_from_user_mode();
        local_daif_restore(DAIF_PROCCTX);
        do_cp15instr(esr, regs);
 }
-NOKPROBE_SYMBOL(el0_cp15);
 
-static void notrace el0_svc_compat(struct pt_regs *regs)
+static void noinstr el0_svc_compat(struct pt_regs *regs)
 {
        if (system_uses_irq_prio_masking())
                gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
 
+       enter_from_user_mode();
        do_el0_svc_compat(regs);
 }
-NOKPROBE_SYMBOL(el0_svc_compat);
 
-asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs)
 {
        unsigned long esr = read_sysreg(esr_el1);
 
@@ -360,5 +487,4 @@ asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs)
                el0_inv(regs, esr);
        }
 }
-NOKPROBE_SYMBOL(el0_sync_compat_handler);
 #endif /* CONFIG_COMPAT */
index bdd3b57..0dffaa0 100644 (file)
 #include <asm/unistd.h>
 
 /*
- * Context tracking subsystem.  Used to instrument transitions
- * between user and kernel mode.
+ * Context tracking and irqflag tracing need to instrument transitions between
+ * user and kernel mode.
  */
-       .macro ct_user_exit_irqoff
-#ifdef CONFIG_CONTEXT_TRACKING
+       .macro user_exit_irqoff
+#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
        bl      enter_from_user_mode
 #endif
        .endm
 
-       .macro ct_user_enter
-#ifdef CONFIG_CONTEXT_TRACKING
-       bl      context_tracking_user_enter
+       .macro user_enter_irqoff
+#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
+       bl      exit_to_user_mode
 #endif
        .endm
 
@@ -286,9 +286,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 alternative_else_nop_endif
 
        ldp     x21, x22, [sp, #S_PC]           // load ELR, SPSR
-       .if     \el == 0
-       ct_user_enter
-       .endif
 
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
 alternative_if_not ARM64_HAS_PAN
@@ -625,16 +622,8 @@ SYM_CODE_START_LOCAL_NOALIGN(el1_irq)
        gic_prio_irq_setup pmr=x20, tmp=x1
        enable_da_f
 
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-       test_irqs_unmasked      res=x0, pmr=x20
-       cbz     x0, 1f
-       bl      asm_nmi_enter
-1:
-#endif
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-       bl      trace_hardirqs_off
-#endif
+       mov     x0, sp
+       bl      enter_el1_irq_or_nmi
 
        irq_handler
 
@@ -653,26 +642,8 @@ alternative_else_nop_endif
 1:
 #endif
 
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-       /*
-        * When using IRQ priority masking, we can get spurious interrupts while
-        * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a
-        * section with interrupts disabled. Skip tracing in those cases.
-        */
-       test_irqs_unmasked      res=x0, pmr=x20
-       cbz     x0, 1f
-       bl      asm_nmi_exit
-1:
-#endif
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-       test_irqs_unmasked      res=x0, pmr=x20
-       cbnz    x0, 1f
-#endif
-       bl      trace_hardirqs_on
-1:
-#endif
+       mov     x0, sp
+       bl      exit_el1_irq_or_nmi
 
        kernel_exit 1
 SYM_CODE_END(el1_irq)
@@ -714,21 +685,14 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_irq)
        kernel_entry 0
 el0_irq_naked:
        gic_prio_irq_setup pmr=x20, tmp=x0
-       ct_user_exit_irqoff
+       user_exit_irqoff
        enable_da_f
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-       bl      trace_hardirqs_off
-#endif
-
        tbz     x22, #55, 1f
        bl      do_el0_irq_bp_hardening
 1:
        irq_handler
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-       bl      trace_hardirqs_on
-#endif
        b       ret_to_user
 SYM_CODE_END(el0_irq)
 
@@ -747,7 +711,7 @@ SYM_CODE_START_LOCAL(el0_error)
 el0_error_naked:
        mrs     x25, esr_el1
        gic_prio_kentry_setup tmp=x2
-       ct_user_exit_irqoff
+       user_exit_irqoff
        enable_dbg
        mov     x0, sp
        mov     x1, x25
@@ -762,13 +726,17 @@ SYM_CODE_END(el0_error)
 SYM_CODE_START_LOCAL(ret_to_user)
        disable_daif
        gic_prio_kentry_setup tmp=x3
-       ldr     x1, [tsk, #TSK_TI_FLAGS]
-       and     x2, x1, #_TIF_WORK_MASK
+#ifdef CONFIG_TRACE_IRQFLAGS
+       bl      trace_hardirqs_off
+#endif
+       ldr     x19, [tsk, #TSK_TI_FLAGS]
+       and     x2, x19, #_TIF_WORK_MASK
        cbnz    x2, work_pending
 finish_ret_to_user:
+       user_enter_irqoff
        /* Ignore asynchronous tag check faults in the uaccess routines */
        clear_mte_async_tcf
-       enable_step_tsk x1, x2
+       enable_step_tsk x19, x2
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
        bl      stackleak_erase
 #endif
@@ -779,11 +747,9 @@ finish_ret_to_user:
  */
 work_pending:
        mov     x0, sp                          // 'regs'
+       mov     x1, x19
        bl      do_notify_resume
-#ifdef CONFIG_TRACE_IRQFLAGS
-       bl      trace_hardirqs_on               // enabled while in userspace
-#endif
-       ldr     x1, [tsk, #TSK_TI_FLAGS]        // re-check for single-step
+       ldr     x19, [tsk, #TSK_TI_FLAGS]       // re-check for single-step
        b       finish_ret_to_user
 SYM_CODE_END(ret_to_user)
 
index 9cf2fb8..60456a6 100644 (file)
@@ -67,18 +67,3 @@ void __init init_IRQ(void)
                local_daif_restore(DAIF_PROCCTX_NOIRQ);
        }
 }
-
-/*
- * Stubs to make nmi_enter/exit() code callable from ASM
- */
-asmlinkage void notrace asm_nmi_enter(void)
-{
-       nmi_enter();
-}
-NOKPROBE_SYMBOL(asm_nmi_enter);
-
-asmlinkage void notrace asm_nmi_exit(void)
-{
-       nmi_exit();
-}
-NOKPROBE_SYMBOL(asm_nmi_exit);
index 71005cb..6616486 100644 (file)
@@ -72,13 +72,13 @@ EXPORT_SYMBOL_GPL(pm_power_off);
 
 void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
 
-static void __cpu_do_idle(void)
+static void noinstr __cpu_do_idle(void)
 {
        dsb(sy);
        wfi();
 }
 
-static void __cpu_do_idle_irqprio(void)
+static void noinstr __cpu_do_idle_irqprio(void)
 {
        unsigned long pmr;
        unsigned long daif_bits;
@@ -108,7 +108,7 @@ static void __cpu_do_idle_irqprio(void)
  *     ensure that interrupts are not masked at the PMR (because the core will
  *     not wake up if we block the wake up signal in the interrupt controller).
  */
-void cpu_do_idle(void)
+void noinstr cpu_do_idle(void)
 {
        if (system_uses_irq_prio_masking())
                __cpu_do_idle_irqprio();
@@ -119,14 +119,14 @@ void cpu_do_idle(void)
 /*
  * This is our default idle handler.
  */
-void arch_cpu_idle(void)
+void noinstr arch_cpu_idle(void)
 {
        /*
         * This should do all the clock switching and wait for interrupt
         * tricks
         */
        cpu_do_idle();
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
index e04b3e9..2132bd9 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/uaccess.h>
 
 #include <asm/alternative.h>
+#include <asm/exception.h>
 #include <asm/kprobes.h>
 #include <asm/mmu.h>
 #include <asm/ptrace.h>
@@ -238,7 +239,7 @@ static void __kprobes notrace __sdei_pstate_entry(void)
                set_pstate_pan(0);
 }
 
-asmlinkage __kprobes notrace unsigned long
+asmlinkage noinstr unsigned long
 __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
 {
        unsigned long ret;
@@ -249,11 +250,11 @@ __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
         */
        __sdei_pstate_entry();
 
-       nmi_enter();
+       arm64_enter_nmi(regs);
 
        ret = _sdei_handler(regs, arg);
 
-       nmi_exit();
+       arm64_exit_nmi(regs);
 
        return ret;
 }
index e4c0dad..f8f758e 100644 (file)
@@ -121,7 +121,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 
        cortex_a76_erratum_1463225_svc_handler();
        local_daif_restore(DAIF_PROCCTX);
-       user_exit();
 
        if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) {
                /*
index 8af4e0e..2059d8f 100644 (file)
@@ -34,6 +34,7 @@
 #include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
+#include <asm/exception.h>
 #include <asm/extable.h>
 #include <asm/insn.h>
 #include <asm/kprobes.h>
@@ -753,8 +754,10 @@ const char *esr_get_class_string(u32 esr)
  * bad_mode handles the impossible case in the exception vector. This is always
  * fatal.
  */
-asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
+asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
 {
+       arm64_enter_nmi(regs);
+
        console_verbose();
 
        pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n",
@@ -786,7 +789,7 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
 DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
        __aligned(16);
 
-asmlinkage void handle_bad_stack(struct pt_regs *regs)
+asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs)
 {
        unsigned long tsk_stk = (unsigned long)current->stack;
        unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
@@ -794,6 +797,8 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
        unsigned int esr = read_sysreg(esr_el1);
        unsigned long far = read_sysreg(far_el1);
 
+       arm64_enter_nmi(regs);
+
        console_verbose();
        pr_emerg("Insufficient stack space to handle exception!");
 
@@ -865,23 +870,16 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
        }
 }
 
-asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
+asmlinkage void noinstr do_serror(struct pt_regs *regs, unsigned int esr)
 {
-       nmi_enter();
+       arm64_enter_nmi(regs);
 
        /* non-RAS errors are not containable */
        if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
                arm64_serror_panic(regs, esr);
 
-       nmi_exit();
-}
-
-asmlinkage void enter_from_user_mode(void)
-{
-       CT_WARN_ON(ct_state() != CONTEXT_USER);
-       user_exit_irqoff();
+       arm64_exit_nmi(regs);
 }
-NOKPROBE_SYMBOL(enter_from_user_mode);
 
 /* GENERIC_BUG traps */
 
index 5d76ff2..1206d0d 100644 (file)
 
 SECTIONS {
        HYP_SECTION(.text)
+       /*
+        * .hyp..data..percpu needs to be page aligned to maintain the same
+        * alignment for when linking into vmlinux.
+        */
+       . = ALIGN(PAGE_SIZE);
        HYP_SECTION_NAME(.data..percpu) : {
                PERCPU_INPUT(L1_CACHE_BYTES)
        }
index 0271b4a..bdf8e55 100644 (file)
@@ -470,6 +470,15 @@ static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
        if (!kvm_block_mapping_supported(addr, end, phys, level))
                return false;
 
+       /*
+        * If the PTE was already valid, drop the refcount on the table
+        * early, as it will be bumped-up again in stage2_map_walk_leaf().
+        * This ensures that the refcount stays constant across a valid to
+        * valid PTE update.
+        */
+       if (kvm_pte_valid(*ptep))
+               put_page(virt_to_page(ptep));
+
        if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level))
                goto out;
 
@@ -493,7 +502,13 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
                return 0;
 
        kvm_set_invalid_pte(ptep);
-       kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, 0);
+
+       /*
+        * Invalidate the whole stage-2, as we may have numerous leaf
+        * entries below us which would otherwise need invalidating
+        * individually.
+        */
+       kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
        data->anchor = ptep;
        return 0;
 }
index 1f41173..7d2257c 100644 (file)
@@ -754,10 +754,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        gfn_t gfn;
        kvm_pfn_t pfn;
        bool logging_active = memslot_is_logging(memslot);
-       unsigned long vma_pagesize;
+       unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
+       unsigned long vma_pagesize, fault_granule;
        enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
        struct kvm_pgtable *pgt;
 
+       fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level);
        write_fault = kvm_is_write_fault(vcpu);
        exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
        VM_BUG_ON(write_fault && exec_fault);
@@ -896,7 +898,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
                prot |= KVM_PGTABLE_PROT_X;
 
-       if (fault_status == FSC_PERM && !(logging_active && writable)) {
+       /*
+        * Under the premise of getting a FSC_PERM fault, we just need to relax
+        * permissions only if vma_pagesize equals fault_granule. Otherwise,
+        * kvm_pgtable_stage2_map() should be called to change block size.
+        */
+       if (fault_status == FSC_PERM && vma_pagesize == fault_granule) {
                ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
        } else {
                ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
index 52d6f24..15a6c98 100644 (file)
@@ -273,6 +273,23 @@ static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
        return extract_bytes(value, addr & 7, len);
 }
 
+static unsigned long vgic_uaccess_read_v3r_typer(struct kvm_vcpu *vcpu,
+                                                gpa_t addr, unsigned int len)
+{
+       unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
+       int target_vcpu_id = vcpu->vcpu_id;
+       u64 value;
+
+       value = (u64)(mpidr & GENMASK(23, 0)) << 32;
+       value |= ((target_vcpu_id & 0xffff) << 8);
+
+       if (vgic_has_its(vcpu->kvm))
+               value |= GICR_TYPER_PLPIS;
+
+       /* reporting of the Last bit is not supported for userspace */
+       return extract_bytes(value, addr & 7, len);
+}
+
 static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu,
                                             gpa_t addr, unsigned int len)
 {
@@ -593,8 +610,9 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
        REGISTER_DESC_WITH_LENGTH(GICR_IIDR,
                vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4,
                VGIC_ACCESS_32bit),
-       REGISTER_DESC_WITH_LENGTH(GICR_TYPER,
-               vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8,
+       REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_TYPER,
+               vgic_mmio_read_v3r_typer, vgic_mmio_write_wi,
+               vgic_uaccess_read_v3r_typer, vgic_mmio_uaccess_write_wi, 8,
                VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_LENGTH(GICR_WAKER,
                vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
index 8dc17e6..51549a6 100644 (file)
@@ -784,25 +784,6 @@ void __init hook_debug_fault_code(int nr,
  */
 static void debug_exception_enter(struct pt_regs *regs)
 {
-       /*
-        * Tell lockdep we disabled irqs in entry.S. Do nothing if they were
-        * already disabled to preserve the last enabled/disabled addresses.
-        */
-       if (interrupts_enabled(regs))
-               trace_hardirqs_off();
-
-       if (user_mode(regs)) {
-               RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
-       } else {
-               /*
-                * We might have interrupted pretty much anything.  In
-                * fact, if we're a debug exception, we can even interrupt
-                * NMI processing. We don't want this code makes in_nmi()
-                * to return true, but we need to notify RCU.
-                */
-               rcu_nmi_enter();
-       }
-
        preempt_disable();
 
        /* This code is a bit fragile.  Test it. */
@@ -813,12 +794,6 @@ NOKPROBE_SYMBOL(debug_exception_enter);
 static void debug_exception_exit(struct pt_regs *regs)
 {
        preempt_enable_no_resched();
-
-       if (!user_mode(regs))
-               rcu_nmi_exit();
-
-       if (interrupts_enabled(regs))
-               trace_hardirqs_on();
 }
 NOKPROBE_SYMBOL(debug_exception_exit);
 
index f730869..69af6bc 100644 (file)
@@ -102,6 +102,6 @@ void arch_cpu_idle(void)
 #ifdef CONFIG_CPU_PM_STOP
        asm volatile("stop\n");
 #endif
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 #endif
index aea0a40..bc1364d 100644 (file)
@@ -57,7 +57,7 @@ asmlinkage void ret_from_kernel_thread(void);
  */
 void arch_cpu_idle(void)
 {
-       local_irq_enable();
+       raw_local_irq_enable();
        __asm__("sleep");
 }
 
index 5a0a95d..67767c5 100644 (file)
@@ -44,7 +44,7 @@ void arch_cpu_idle(void)
 {
        __vmwait();
        /*  interrupts wake us up, but irqs are still disabled */
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 /*
index 336d057..dd8c166 100644 (file)
 #endif
 
 #endif /* CONFIG_SPARSEMEM */
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int memory_add_physaddr_to_nid(u64 addr);
+#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
+#endif
+
 #endif /* _ASM_IA64_SPARSEMEM_H */
index 6b61a70..c9ff879 100644 (file)
@@ -239,7 +239,7 @@ void arch_cpu_idle(void)
        if (mark_idle)
                (*mark_idle)(1);
 
-       safe_halt();
+       raw_safe_halt();
 
        if (mark_idle)
                (*mark_idle)(0);
index a9e46e5..f998607 100644 (file)
@@ -149,5 +149,5 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs)
 
 void arch_cpu_idle(void)
 {
-       local_irq_enable();
+       raw_local_irq_enable();
 }
index a95a894..f0c8303 100644 (file)
@@ -152,6 +152,7 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name,
 {
        struct clk_init_data id;
        struct clk_hw *h;
+       struct clk *clk;
 
        h = kzalloc(sizeof(*h), GFP_KERNEL);
        if (!h)
@@ -164,7 +165,13 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name,
        id.ops = &alchemy_clkops_cpu;
        h->init = &id;
 
-       return clk_register(NULL, h);
+       clk = clk_register(NULL, h);
+       if (IS_ERR(clk)) {
+               pr_err("failed to register clock\n");
+               kfree(h);
+       }
+
+       return clk;
 }
 
 /* AUXPLLs ************************************************************/
index a950fc1..6c0532d 100644 (file)
@@ -154,6 +154,7 @@ static inline void pmd_clear(pmd_t *pmdp)
 
 #if defined(CONFIG_XPA)
 
+#define MAX_POSSIBLE_PHYSMEM_BITS 40
 #define pte_pfn(x)             (((unsigned long)((x).pte_high >> _PFN_SHIFT)) | (unsigned long)((x).pte_low << _PAGE_PRESENT_SHIFT))
 static inline pte_t
 pfn_pte(unsigned long pfn, pgprot_t prot)
@@ -169,6 +170,7 @@ pfn_pte(unsigned long pfn, pgprot_t prot)
 
 #elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
 
+#define MAX_POSSIBLE_PHYSMEM_BITS 36
 #define pte_pfn(x)             ((unsigned long)((x).pte_high >> 6))
 
 static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
@@ -183,6 +185,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
 
 #else
 
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
 #ifdef CONFIG_CPU_VR41XX
 #define pte_pfn(x)             ((unsigned long)((x).pte >> (PAGE_SHIFT + 2)))
 #define pfn_pte(pfn, prot)     __pte(((pfn) << (PAGE_SHIFT + 2)) | pgprot_val(prot))
index 5bc3b04..18e69eb 100644 (file)
@@ -33,19 +33,19 @@ static void __cpuidle r3081_wait(void)
 {
        unsigned long cfg = read_c0_conf();
        write_c0_conf(cfg | R30XX_CONF_HALT);
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 static void __cpuidle r39xx_wait(void)
 {
        if (!need_resched())
                write_c0_conf(read_c0_conf() | TX39_CONF_HALT);
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 void __cpuidle r4k_wait(void)
 {
-       local_irq_enable();
+       raw_local_irq_enable();
        __r4k_wait();
 }
 
@@ -64,7 +64,7 @@ void __cpuidle r4k_wait_irqoff(void)
                "       .set    arch=r4000      \n"
                "       wait                    \n"
                "       .set    pop             \n");
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 /*
@@ -84,7 +84,7 @@ static void __cpuidle rm7k_wait_irqoff(void)
                "       wait                                            \n"
                "       mtc0    $1, $12         # stalls until W stage  \n"
                "       .set    pop                                     \n");
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 /*
@@ -257,7 +257,7 @@ void arch_cpu_idle(void)
        if (cpu_wait)
                cpu_wait();
        else
-               local_irq_enable();
+               raw_local_irq_enable();
 }
 
 #ifdef CONFIG_CPU_IDLE
index 0d42532..ca579de 100644 (file)
@@ -262,8 +262,8 @@ static void __init bootmem_init(void)
 static void __init bootmem_init(void)
 {
        phys_addr_t ramstart, ramend;
-       phys_addr_t start, end;
-       u64 i;
+       unsigned long start, end;
+       int i;
 
        ramstart = memblock_start_of_DRAM();
        ramend = memblock_end_of_DRAM();
@@ -300,7 +300,7 @@ static void __init bootmem_init(void)
 
        min_low_pfn = ARCH_PFN_OFFSET;
        max_pfn = PFN_DOWN(ramend);
-       for_each_mem_range(i, &start, &end) {
+       for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
                /*
                 * Skip highmem here so we get an accurate max_low_pfn if low
                 * memory stops short of high memory.
index 38e2894..1b939ab 100644 (file)
@@ -438,6 +438,7 @@ int has_transparent_hugepage(void)
        }
        return mask == PM_HUGE_MASK;
 }
+EXPORT_SYMBOL(has_transparent_hugepage);
 
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE  */
 
index 4ffe857..50b4eb1 100644 (file)
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(pm_power_off);
 
 void arch_cpu_idle(void)
 {
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 /*
index 0ff391f..3c98728 100644 (file)
@@ -79,7 +79,7 @@ void machine_power_off(void)
  */
 void arch_cpu_idle(void)
 {
-       local_irq_enable();
+       raw_local_irq_enable();
        if (mfspr(SPR_UPR) & SPR_UPR_PMP)
                mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
 }
index f196d96..a92a23d 100644 (file)
@@ -169,7 +169,7 @@ void __cpuidle arch_cpu_idle_dead(void)
 
 void __cpuidle arch_cpu_idle(void)
 {
-       local_irq_enable();
+       raw_local_irq_enable();
 
        /* nop on real hardware, qemu will idle sleep. */
        asm volatile("or %%r10,%%r10,%%r10\n":::);
index e9f13fe..5181872 100644 (file)
@@ -152,6 +152,7 @@ config PPC
        select ARCH_USE_QUEUED_SPINLOCKS        if PPC_QUEUED_SPINLOCKS
        select ARCH_WANT_IPC_PARSE_VERSION
        select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
+       select ARCH_WANT_LD_ORPHAN_WARN
        select ARCH_WEAK_RELEASE_ACQUIRE
        select BINFMT_ELF
        select BUILDTIME_TABLE_SORT
index a4d56f0..5c8c062 100644 (file)
@@ -123,7 +123,6 @@ endif
 LDFLAGS_vmlinux-y := -Bstatic
 LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie
 LDFLAGS_vmlinux        := $(LDFLAGS_vmlinux-y)
-LDFLAGS_vmlinux += $(call ld-option,--orphan-handling=warn)
 
 ifdef CONFIG_PPC64
 ifeq ($(call cc-option-yn,-mcmodel=medium),y)
@@ -248,7 +247,6 @@ KBUILD_CFLAGS               += $(call cc-option,-mno-string)
 cpu-as-$(CONFIG_40x)           += -Wa,-m405
 cpu-as-$(CONFIG_44x)           += -Wa,-m440
 cpu-as-$(CONFIG_ALTIVEC)       += $(call as-option,-Wa$(comma)-maltivec)
-cpu-as-$(CONFIG_E200)          += -Wa,-me200
 cpu-as-$(CONFIG_E500)          += -Wa,-me500
 
 # When using '-many -mpower4' gas will first try and find a matching power4
index 36443cd..1376be9 100644 (file)
@@ -36,8 +36,10 @@ static inline bool pte_user(pte_t pte)
  */
 #ifdef CONFIG_PTE_64BIT
 #define PTE_RPN_MASK   (~((1ULL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 36
 #else
 #define PTE_RPN_MASK   (~((1UL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
 #endif
 
 /*
index 3ee1ec6..a39e2d1 100644 (file)
@@ -27,6 +27,7 @@
 #endif
 .endm
 
+#ifdef CONFIG_PPC_KUAP
 .macro kuap_check_amr gpr1, gpr2
 #ifdef CONFIG_PPC_KUAP_DEBUG
        BEGIN_MMU_FTR_SECTION_NESTED(67)
@@ -38,6 +39,7 @@
        END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
 #endif
 .endm
+#endif
 
 .macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr
 #ifdef CONFIG_PPC_KUAP
 
 #else /* !__ASSEMBLY__ */
 
+#include <linux/jump_label.h>
+
+DECLARE_STATIC_KEY_FALSE(uaccess_flush_key);
+
 #ifdef CONFIG_PPC_KUAP
 
 #include <asm/mmu.h>
@@ -103,8 +109,16 @@ static inline void kuap_check_amr(void)
 
 static inline unsigned long get_kuap(void)
 {
+       /*
+        * We return AMR_KUAP_BLOCKED when we don't support KUAP because
+        * prevent_user_access_return needs to return AMR_KUAP_BLOCKED to
+        * cause restore_user_access to do a flush.
+        *
+        * This has no effect in terms of actually blocking things on hash,
+        * so it doesn't break anything.
+        */
        if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP))
-               return 0;
+               return AMR_KUAP_BLOCKED;
 
        return mfspr(SPRN_AMR);
 }
@@ -123,6 +137,29 @@ static inline void set_kuap(unsigned long value)
        isync();
 }
 
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+       return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
+                   (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
+                   "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
+}
+#else /* CONFIG_PPC_KUAP */
+static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { }
+
+static inline unsigned long kuap_get_and_check_amr(void)
+{
+       return 0UL;
+}
+
+static inline unsigned long get_kuap(void)
+{
+       return AMR_KUAP_BLOCKED;
+}
+
+static inline void set_kuap(unsigned long value) { }
+#endif /* !CONFIG_PPC_KUAP */
+
 static __always_inline void allow_user_access(void __user *to, const void __user *from,
                                              unsigned long size, unsigned long dir)
 {
@@ -142,6 +179,8 @@ static inline void prevent_user_access(void __user *to, const void __user *from,
                                       unsigned long size, unsigned long dir)
 {
        set_kuap(AMR_KUAP_BLOCKED);
+       if (static_branch_unlikely(&uaccess_flush_key))
+               do_uaccess_flush();
 }
 
 static inline unsigned long prevent_user_access_return(void)
@@ -149,6 +188,8 @@ static inline unsigned long prevent_user_access_return(void)
        unsigned long flags = get_kuap();
 
        set_kuap(AMR_KUAP_BLOCKED);
+       if (static_branch_unlikely(&uaccess_flush_key))
+               do_uaccess_flush();
 
        return flags;
 }
@@ -156,30 +197,9 @@ static inline unsigned long prevent_user_access_return(void)
 static inline void restore_user_access(unsigned long flags)
 {
        set_kuap(flags);
+       if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED)
+               do_uaccess_flush();
 }
-
-static inline bool
-bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
-{
-       return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
-                   (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
-                   "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
-}
-#else /* CONFIG_PPC_KUAP */
-static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr)
-{
-}
-
-static inline void kuap_check_amr(void)
-{
-}
-
-static inline unsigned long kuap_get_and_check_amr(void)
-{
-       return 0;
-}
-#endif /* CONFIG_PPC_KUAP */
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */
index e0b5294..7509184 100644 (file)
@@ -242,6 +242,18 @@ extern void radix_init_pseries(void);
 static inline void radix_init_pseries(void) { };
 #endif
 
+#ifdef CONFIG_HOTPLUG_CPU
+#define arch_clear_mm_cpumask_cpu(cpu, mm)                             \
+       do {                                                            \
+               if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {            \
+                       atomic_dec(&(mm)->context.active_cpus);         \
+                       cpumask_clear_cpu(cpu, mm_cpumask(mm));         \
+               }                                                       \
+       } while (0)
+
+void cleanup_cpu_mmu_context(void);
+#endif
+
 static inline int get_user_context(mm_context_t *ctx, unsigned long ea)
 {
        int index = ea >> MAX_EA_BITS_PER_CONTEXT;
index ebe95aa..1d32b17 100644 (file)
        nop;                                                            \
        nop
 
+#define ENTRY_FLUSH_SLOT                                               \
+       ENTRY_FLUSH_FIXUP_SECTION;                                      \
+       nop;                                                            \
+       nop;                                                            \
+       nop;
+
 /*
  * r10 must be free to use, r13 must be paca
  */
 #define INTERRUPT_TO_KERNEL                                            \
-       STF_ENTRY_BARRIER_SLOT
+       STF_ENTRY_BARRIER_SLOT;                                         \
+       ENTRY_FLUSH_SLOT
 
 /*
  * Macros for annotating the expected destination of (h)rfid
        RFSCV;                                                          \
        b       rfscv_flush_fallback
 
+#else /* __ASSEMBLY__ */
+/* Prototype for function defined in exceptions-64s.S */
+void do_uaccess_flush(void);
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_EXCEPTION_H */
index b0af97a..fbd406c 100644 (file)
@@ -205,6 +205,22 @@ label##3:                                          \
        FTR_ENTRY_OFFSET 955b-956b;                     \
        .popsection;
 
+#define UACCESS_FLUSH_FIXUP_SECTION                    \
+959:                                                   \
+       .pushsection __uaccess_flush_fixup,"a";         \
+       .align 2;                                       \
+960:                                                   \
+       FTR_ENTRY_OFFSET 959b-960b;                     \
+       .popsection;
+
+#define ENTRY_FLUSH_FIXUP_SECTION                      \
+957:                                                   \
+       .pushsection __entry_flush_fixup,"a";           \
+       .align 2;                                       \
+958:                                                   \
+       FTR_ENTRY_OFFSET 957b-958b;                     \
+       .popsection;
+
 #define RFI_FLUSH_FIXUP_SECTION                                \
 951:                                                   \
        .pushsection __rfi_flush_fixup,"a";             \
@@ -237,8 +253,11 @@ label##3:                                          \
 #include <linux/types.h>
 
 extern long stf_barrier_fallback;
+extern long entry_flush_fallback;
 extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup;
 extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup;
+extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup;
+extern long __start___entry_flush_fixup, __stop___entry_flush_fixup;
 extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
 extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup;
 extern long __start__btb_flush_fixup, __stop__btb_flush_fixup;
index 1d0f7d8..0d93331 100644 (file)
@@ -14,7 +14,7 @@
 #define KUAP_CURRENT_WRITE     8
 #define KUAP_CURRENT           (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE)
 
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
 #include <asm/book3s/64/kup-radix.h>
 #endif
 #ifdef CONFIG_PPC_8xx
@@ -35,6 +35,9 @@
 .macro kuap_check      current, gpr
 .endm
 
+.macro kuap_check_amr  gpr1, gpr2
+.endm
+
 #endif
 
 #else /* !__ASSEMBLY__ */
@@ -53,17 +56,28 @@ static inline void setup_kuep(bool disabled) { }
 void setup_kuap(bool disabled);
 #else
 static inline void setup_kuap(bool disabled) { }
+
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+       return false;
+}
+
+static inline void kuap_check_amr(void) { }
+
+/*
+ * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush
+ * the L1D cache after user accesses. Only include the empty stubs for other
+ * platforms.
+ */
+#ifndef CONFIG_PPC_BOOK3S_64
 static inline void allow_user_access(void __user *to, const void __user *from,
                                     unsigned long size, unsigned long dir) { }
 static inline void prevent_user_access(void __user *to, const void __user *from,
                                       unsigned long size, unsigned long dir) { }
 static inline unsigned long prevent_user_access_return(void) { return 0UL; }
 static inline void restore_user_access(unsigned long flags) { }
-static inline bool
-bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
-{
-       return false;
-}
+#endif /* CONFIG_PPC_BOOK3S_64 */
 #endif /* CONFIG_PPC_KUAP */
 
 static inline void allow_read_from_user(const void __user *from, unsigned long size)
index 91c69ff..6cda76b 100644 (file)
@@ -46,5 +46,10 @@ u64 memory_hotplug_max(void);
 #define __HAVE_ARCH_RESERVED_KERNEL_PAGES
 #endif
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+extern int create_section_mapping(unsigned long start, unsigned long end,
+                                 int nid, pgprot_t prot);
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_MMZONE_H_ */
index ee2243b..96522f7 100644 (file)
@@ -153,8 +153,10 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
  */
 #if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
 #define PTE_RPN_MASK   (~((1ULL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 36
 #else
 #define PTE_RPN_MASK   (~((1UL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
 #endif
 
 /*
index fbb8fa3..b774a44 100644 (file)
@@ -86,12 +86,19 @@ static inline bool security_ftr_enabled(u64 feature)
 // Software required to flush link stack on context switch
 #define SEC_FTR_FLUSH_LINK_STACK       0x0000000000001000ull
 
+// The L1-D cache should be flushed when entering the kernel
+#define SEC_FTR_L1D_FLUSH_ENTRY                0x0000000000004000ull
+
+// The L1-D cache should be flushed after user accesses from the kernel
+#define SEC_FTR_L1D_FLUSH_UACCESS      0x0000000000008000ull
 
 // Features enabled by default
 #define SEC_FTR_DEFAULT \
        (SEC_FTR_L1D_FLUSH_HV | \
         SEC_FTR_L1D_FLUSH_PR | \
         SEC_FTR_BNDS_CHK_SPEC_BAR | \
+        SEC_FTR_L1D_FLUSH_ENTRY | \
+        SEC_FTR_L1D_FLUSH_UACCESS | \
         SEC_FTR_FAVOUR_SECURITY)
 
 #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
index 9efbdde..a466749 100644 (file)
@@ -52,12 +52,16 @@ enum l1d_flush_type {
 };
 
 void setup_rfi_flush(enum l1d_flush_type, bool enable);
+void setup_entry_flush(bool enable);
+void setup_uaccess_flush(bool enable);
 void do_rfi_flush_fixups(enum l1d_flush_type types);
 #ifdef CONFIG_PPC_BARRIER_NOSPEC
 void setup_barrier_nospec(void);
 #else
 static inline void setup_barrier_nospec(void) { };
 #endif
+void do_uaccess_flush_fixups(enum l1d_flush_type types);
+void do_entry_flush_fixups(enum l1d_flush_type types);
 void do_barrier_nospec_fixups(bool enable);
 extern bool barrier_nospec_enabled;
 
index 1e6fa37..d072866 100644 (file)
@@ -13,9 +13,9 @@
 #endif /* CONFIG_SPARSEMEM */
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-extern int create_section_mapping(unsigned long start, unsigned long end,
-                                 int nid, pgprot_t prot);
 extern int remove_section_mapping(unsigned long start, unsigned long end);
+extern int memory_add_physaddr_to_nid(u64 start);
+#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
 
 #ifdef CONFIG_NUMA
 extern int hot_add_scn_to_nid(unsigned long scn_addr);
@@ -26,6 +26,5 @@ static inline int hot_add_scn_to_nid(unsigned long scn_addr)
 }
 #endif /* CONFIG_NUMA */
 #endif /* CONFIG_MEMORY_HOTPLUG */
-
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SPARSEMEM_H */
index f7d748b..4d01f09 100644 (file)
@@ -1000,8 +1000,6 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)
  * Vectors for the FWNMI option.  Share common code.
  */
 TRAMP_REAL_BEGIN(system_reset_fwnmi)
-       /* XXX: fwnmi guest could run a nested/PR guest, so why no test?  */
-       __IKVM_REAL(system_reset)=0
        GEN_INT_ENTRY system_reset, virt=0
 
 #endif /* CONFIG_PPC_PSERIES */
@@ -1412,6 +1410,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
  *   If none is found, do a Linux page fault. Linux page faults can happen in
  *   kernel mode due to user copy operations of course.
  *
+ *   KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest
+ *   MMU context, which may cause a DSI in the host, which must go to the
+ *   KVM handler. MSR[IR] is not enabled, so the real-mode handler will
+ *   always be used regardless of AIL setting.
+ *
  * - Radix MMU
  *   The hardware loads from the Linux page table directly, so a fault goes
  *   immediately to Linux page fault.
@@ -1422,10 +1425,8 @@ INT_DEFINE_BEGIN(data_access)
        IVEC=0x300
        IDAR=1
        IDSISR=1
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
        IKVM_SKIP=1
        IKVM_REAL=1
-#endif
 INT_DEFINE_END(data_access)
 
 EXC_REAL_BEGIN(data_access, 0x300, 0x80)
@@ -1464,6 +1465,8 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
  *   ppc64_bolted_size (first segment). The kernel handler must avoid stomping
  *   on user-handler data structures.
  *
+ *   KVM: Same as 0x300, DSLB must test for KVM guest.
+ *
  * A dedicated save area EXSLB is used (XXX: but it actually need not be
  * these days, we could use EXGEN).
  */
@@ -1472,10 +1475,8 @@ INT_DEFINE_BEGIN(data_access_slb)
        IAREA=PACA_EXSLB
        IRECONCILE=0
        IDAR=1
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
        IKVM_SKIP=1
        IKVM_REAL=1
-#endif
 INT_DEFINE_END(data_access_slb)
 
 EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
@@ -2951,15 +2952,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback)
        .endr
        blr
 
-TRAMP_REAL_BEGIN(rfi_flush_fallback)
-       SET_SCRATCH0(r13);
-       GET_PACA(r13);
-       std     r1,PACA_EXRFI+EX_R12(r13)
-       ld      r1,PACAKSAVE(r13)
-       std     r9,PACA_EXRFI+EX_R9(r13)
-       std     r10,PACA_EXRFI+EX_R10(r13)
-       std     r11,PACA_EXRFI+EX_R11(r13)
-       mfctr   r9
+/* Clobbers r10, r11, ctr */
+.macro L1D_DISPLACEMENT_FLUSH
        ld      r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
        ld      r11,PACA_L1D_FLUSH_SIZE(r13)
        srdi    r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
@@ -2970,7 +2964,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
        sync
 
        /*
-        * The load adresses are at staggered offsets within cachelines,
+        * The load addresses are at staggered offsets within cachelines,
         * which suits some pipelines better (on others it should not
         * hurt).
         */
@@ -2985,7 +2979,30 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
        ld      r11,(0x80 + 8)*7(r10)
        addi    r10,r10,0x80*8
        bdnz    1b
+.endm
 
+TRAMP_REAL_BEGIN(entry_flush_fallback)
+       std     r9,PACA_EXRFI+EX_R9(r13)
+       std     r10,PACA_EXRFI+EX_R10(r13)
+       std     r11,PACA_EXRFI+EX_R11(r13)
+       mfctr   r9
+       L1D_DISPLACEMENT_FLUSH
+       mtctr   r9
+       ld      r9,PACA_EXRFI+EX_R9(r13)
+       ld      r10,PACA_EXRFI+EX_R10(r13)
+       ld      r11,PACA_EXRFI+EX_R11(r13)
+       blr
+
+TRAMP_REAL_BEGIN(rfi_flush_fallback)
+       SET_SCRATCH0(r13);
+       GET_PACA(r13);
+       std     r1,PACA_EXRFI+EX_R12(r13)
+       ld      r1,PACAKSAVE(r13)
+       std     r9,PACA_EXRFI+EX_R9(r13)
+       std     r10,PACA_EXRFI+EX_R10(r13)
+       std     r11,PACA_EXRFI+EX_R11(r13)
+       mfctr   r9
+       L1D_DISPLACEMENT_FLUSH
        mtctr   r9
        ld      r9,PACA_EXRFI+EX_R9(r13)
        ld      r10,PACA_EXRFI+EX_R10(r13)
@@ -3003,32 +3020,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
        std     r10,PACA_EXRFI+EX_R10(r13)
        std     r11,PACA_EXRFI+EX_R11(r13)
        mfctr   r9
-       ld      r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
-       ld      r11,PACA_L1D_FLUSH_SIZE(r13)
-       srdi    r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
-       mtctr   r11
-       DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
-
-       /* order ld/st prior to dcbt stop all streams with flushing */
-       sync
-
-       /*
-        * The load adresses are at staggered offsets within cachelines,
-        * which suits some pipelines better (on others it should not
-        * hurt).
-        */
-1:
-       ld      r11,(0x80 + 8)*0(r10)
-       ld      r11,(0x80 + 8)*1(r10)
-       ld      r11,(0x80 + 8)*2(r10)
-       ld      r11,(0x80 + 8)*3(r10)
-       ld      r11,(0x80 + 8)*4(r10)
-       ld      r11,(0x80 + 8)*5(r10)
-       ld      r11,(0x80 + 8)*6(r10)
-       ld      r11,(0x80 + 8)*7(r10)
-       addi    r10,r10,0x80*8
-       bdnz    1b
-
+       L1D_DISPLACEMENT_FLUSH
        mtctr   r9
        ld      r9,PACA_EXRFI+EX_R9(r13)
        ld      r10,PACA_EXRFI+EX_R10(r13)
@@ -3079,8 +3071,21 @@ TRAMP_REAL_BEGIN(rfscv_flush_fallback)
        RFSCV
 
 USE_TEXT_SECTION()
-       MASKED_INTERRUPT
-       MASKED_INTERRUPT hsrr=1
+
+_GLOBAL(do_uaccess_flush)
+       UACCESS_FLUSH_FIXUP_SECTION
+       nop
+       nop
+       nop
+       blr
+       L1D_DISPLACEMENT_FLUSH
+       blr
+_ASM_NOKPROBE_SYMBOL(do_uaccess_flush)
+EXPORT_SYMBOL(do_uaccess_flush)
+
+
+MASKED_INTERRUPT
+MASKED_INTERRUPT hsrr=1
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 kvmppc_skip_interrupt:
index 2aa16d5..a0dda2a 100644 (file)
@@ -156,6 +156,7 @@ __after_mmu_off:
        bl      initial_bats
        bl      load_segment_registers
 BEGIN_MMU_FTR_SECTION
+       bl      reloc_offset
        bl      early_hash_table
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
 #if defined(CONFIG_BOOTX_TEXT)
@@ -920,7 +921,7 @@ early_hash_table:
        ori     r6, r6, 3       /* 256kB table */
        mtspr   SPRN_SDR1, r6
        lis     r6, early_hash@h
-       lis     r3, Hash@ha
+       addis   r3, r3, Hash@ha
        stw     r6, Hash@l(r3)
        blr
 
index ae0e263..1f83553 100644 (file)
@@ -52,9 +52,9 @@ void arch_cpu_idle(void)
                 * interrupts enabled, some don't.
                 */
                if (irqs_disabled())
-                       local_irq_enable();
+                       raw_local_irq_enable();
        } else {
-               local_irq_enable();
+               raw_local_irq_enable();
                /*
                 * Go into low thread priority and possibly
                 * low power mode.
index bb9cab3..74fd47f 100644 (file)
@@ -945,7 +945,13 @@ early_initcall(disable_hardlockup_detector);
 static enum l1d_flush_type enabled_flush_types;
 static void *l1d_flush_fallback_area;
 static bool no_rfi_flush;
+static bool no_entry_flush;
+static bool no_uaccess_flush;
 bool rfi_flush;
+bool entry_flush;
+bool uaccess_flush;
+DEFINE_STATIC_KEY_FALSE(uaccess_flush_key);
+EXPORT_SYMBOL(uaccess_flush_key);
 
 static int __init handle_no_rfi_flush(char *p)
 {
@@ -955,6 +961,22 @@ static int __init handle_no_rfi_flush(char *p)
 }
 early_param("no_rfi_flush", handle_no_rfi_flush);
 
+static int __init handle_no_entry_flush(char *p)
+{
+       pr_info("entry-flush: disabled on command line.");
+       no_entry_flush = true;
+       return 0;
+}
+early_param("no_entry_flush", handle_no_entry_flush);
+
+static int __init handle_no_uaccess_flush(char *p)
+{
+       pr_info("uaccess-flush: disabled on command line.");
+       no_uaccess_flush = true;
+       return 0;
+}
+early_param("no_uaccess_flush", handle_no_uaccess_flush);
+
 /*
  * The RFI flush is not KPTI, but because users will see doco that says to use
  * nopti we hijack that option here to also disable the RFI flush.
@@ -986,6 +1008,32 @@ void rfi_flush_enable(bool enable)
        rfi_flush = enable;
 }
 
+void entry_flush_enable(bool enable)
+{
+       if (enable) {
+               do_entry_flush_fixups(enabled_flush_types);
+               on_each_cpu(do_nothing, NULL, 1);
+       } else {
+               do_entry_flush_fixups(L1D_FLUSH_NONE);
+       }
+
+       entry_flush = enable;
+}
+
+void uaccess_flush_enable(bool enable)
+{
+       if (enable) {
+               do_uaccess_flush_fixups(enabled_flush_types);
+               static_branch_enable(&uaccess_flush_key);
+               on_each_cpu(do_nothing, NULL, 1);
+       } else {
+               static_branch_disable(&uaccess_flush_key);
+               do_uaccess_flush_fixups(L1D_FLUSH_NONE);
+       }
+
+       uaccess_flush = enable;
+}
+
 static void __ref init_fallback_flush(void)
 {
        u64 l1d_size, limit;
@@ -1044,10 +1092,28 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable)
 
        enabled_flush_types = types;
 
-       if (!no_rfi_flush && !cpu_mitigations_off())
+       if (!cpu_mitigations_off() && !no_rfi_flush)
                rfi_flush_enable(enable);
 }
 
+void setup_entry_flush(bool enable)
+{
+       if (cpu_mitigations_off())
+               return;
+
+       if (!no_entry_flush)
+               entry_flush_enable(enable);
+}
+
+void setup_uaccess_flush(bool enable)
+{
+       if (cpu_mitigations_off())
+               return;
+
+       if (!no_uaccess_flush)
+               uaccess_flush_enable(enable);
+}
+
 #ifdef CONFIG_DEBUG_FS
 static int rfi_flush_set(void *data, u64 val)
 {
@@ -1075,9 +1141,63 @@ static int rfi_flush_get(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
 
+static int entry_flush_set(void *data, u64 val)
+{
+       bool enable;
+
+       if (val == 1)
+               enable = true;
+       else if (val == 0)
+               enable = false;
+       else
+               return -EINVAL;
+
+       /* Only do anything if we're changing state */
+       if (enable != entry_flush)
+               entry_flush_enable(enable);
+
+       return 0;
+}
+
+static int entry_flush_get(void *data, u64 *val)
+{
+       *val = entry_flush ? 1 : 0;
+       return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n");
+
+static int uaccess_flush_set(void *data, u64 val)
+{
+       bool enable;
+
+       if (val == 1)
+               enable = true;
+       else if (val == 0)
+               enable = false;
+       else
+               return -EINVAL;
+
+       /* Only do anything if we're changing state */
+       if (enable != uaccess_flush)
+               uaccess_flush_enable(enable);
+
+       return 0;
+}
+
+static int uaccess_flush_get(void *data, u64 *val)
+{
+       *val = uaccess_flush ? 1 : 0;
+       return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n");
+
 static __init int rfi_flush_debugfs_init(void)
 {
        debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
+       debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush);
+       debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush);
        return 0;
 }
 device_initcall(rfi_flush_debugfs_init);
index 8e50818..310bcd7 100644 (file)
@@ -2,7 +2,7 @@
 
 #include <linux/err.h>
 #include <asm/asm-prototypes.h>
-#include <asm/book3s/64/kup-radix.h>
+#include <asm/kup.h>
 #include <asm/cputime.h>
 #include <asm/hw_irq.h>
 #include <asm/kprobes.h>
index e0548b4..6db90cd 100644 (file)
@@ -132,6 +132,20 @@ SECTIONS
        }
 
        . = ALIGN(8);
+       __uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) {
+               __start___uaccess_flush_fixup = .;
+               *(__uaccess_flush_fixup)
+               __stop___uaccess_flush_fixup = .;
+       }
+
+       . = ALIGN(8);
+       __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) {
+               __start___entry_flush_fixup = .;
+               *(__entry_flush_fixup)
+               __stop___entry_flush_fixup = .;
+       }
+
+       . = ALIGN(8);
        __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
                __start___stf_exit_barrier_fixup = .;
                *(__stf_exit_barrier_fixup)
index 85215e7..a0ebc29 100644 (file)
@@ -1214,12 +1214,9 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
 static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu)
 {
        /* We have a block of xive->nr_servers VPs. We just need to check
-        * raw vCPU ids are below the expected limit for this guest's
-        * core stride ; kvmppc_pack_vcpu_id() will pack them down to an
-        * index that can be safely used to compute a VP id that belongs
-        * to the VP block.
+        * packed vCPU ids are below that.
         */
-       return cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode;
+       return kvmppc_pack_vcpu_id(xive->kvm, cpu) < xive->nr_servers;
 }
 
 int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp)
index d0c2db0..a59a94f 100644 (file)
@@ -251,6 +251,13 @@ static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
        }
 
        state = &sb->irq_state[src];
+
+       /* Some sanity checking */
+       if (!state->valid) {
+               pr_devel("%s: source %lx invalid !\n", __func__, irq);
+               return VM_FAULT_SIGBUS;
+       }
+
        kvmppc_xive_select_irq(state, &hw_num, &xd);
 
        arch_spin_lock(&sb->lock);
index 4c0a7ee..321c12a 100644 (file)
@@ -234,6 +234,110 @@ void do_stf_barrier_fixups(enum stf_barrier_type types)
        do_stf_exit_barrier_fixups(types);
 }
 
+void do_uaccess_flush_fixups(enum l1d_flush_type types)
+{
+       unsigned int instrs[4], *dest;
+       long *start, *end;
+       int i;
+
+       start = PTRRELOC(&__start___uaccess_flush_fixup);
+       end = PTRRELOC(&__stop___uaccess_flush_fixup);
+
+       instrs[0] = 0x60000000; /* nop */
+       instrs[1] = 0x60000000; /* nop */
+       instrs[2] = 0x60000000; /* nop */
+       instrs[3] = 0x4e800020; /* blr */
+
+       i = 0;
+       if (types == L1D_FLUSH_FALLBACK) {
+               instrs[3] = 0x60000000; /* nop */
+               /* fallthrough to fallback flush */
+       }
+
+       if (types & L1D_FLUSH_ORI) {
+               instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+               instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+       }
+
+       if (types & L1D_FLUSH_MTTRIG)
+               instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+
+       for (i = 0; start < end; start++, i++) {
+               dest = (void *)start + *start;
+
+               pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+               patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+
+               patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
+               patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+               patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3]));
+       }
+
+       printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i,
+               (types == L1D_FLUSH_NONE)       ? "no" :
+               (types == L1D_FLUSH_FALLBACK)   ? "fallback displacement" :
+               (types &  L1D_FLUSH_ORI)        ? (types & L1D_FLUSH_MTTRIG)
+                                                       ? "ori+mttrig type"
+                                                       : "ori type" :
+               (types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
+                                               : "unknown");
+}
+
+void do_entry_flush_fixups(enum l1d_flush_type types)
+{
+       unsigned int instrs[3], *dest;
+       long *start, *end;
+       int i;
+
+       start = PTRRELOC(&__start___entry_flush_fixup);
+       end = PTRRELOC(&__stop___entry_flush_fixup);
+
+       instrs[0] = 0x60000000; /* nop */
+       instrs[1] = 0x60000000; /* nop */
+       instrs[2] = 0x60000000; /* nop */
+
+       i = 0;
+       if (types == L1D_FLUSH_FALLBACK) {
+               instrs[i++] = 0x7d4802a6; /* mflr r10           */
+               instrs[i++] = 0x60000000; /* branch patched below */
+               instrs[i++] = 0x7d4803a6; /* mtlr r10           */
+       }
+
+       if (types & L1D_FLUSH_ORI) {
+               instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+               instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+       }
+
+       if (types & L1D_FLUSH_MTTRIG)
+               instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+
+       for (i = 0; start < end; start++, i++) {
+               dest = (void *)start + *start;
+
+               pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+               patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+
+               if (types == L1D_FLUSH_FALLBACK)
+                       patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback,
+                                    BRANCH_SET_LINK);
+               else
+                       patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
+
+               patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+       }
+
+       printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i,
+               (types == L1D_FLUSH_NONE)       ? "no" :
+               (types == L1D_FLUSH_FALLBACK)   ? "fallback displacement" :
+               (types &  L1D_FLUSH_ORI)        ? (types & L1D_FLUSH_MTTRIG)
+                                                       ? "ori+mttrig type"
+                                                       : "ori type" :
+               (types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
+                                               : "unknown");
+}
+
 void do_rfi_flush_fixups(enum l1d_flush_type types)
 {
        unsigned int instrs[3], *dest;
index 5e14798..55b4a8b 100644 (file)
@@ -5,7 +5,7 @@
 
 ccflags-$(CONFIG_PPC64)        := $(NO_MINIMAL_TOC)
 
-obj-y                          := fault.o mem.o pgtable.o mmap.o \
+obj-y                          := fault.o mem.o pgtable.o mmap.o maccess.o \
                                   init_$(BITS).o pgtable_$(BITS).o \
                                   pgtable-frag.o ioremap.o ioremap_$(BITS).o \
                                   init-common.o mmu_context.o drmem.o
index 0203cdf..52e170b 100644 (file)
@@ -68,7 +68,7 @@ static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned in
        rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
 
        asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
-                    : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r)
+                    : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r)
                     : "memory");
 }
 
@@ -92,16 +92,15 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
        asm volatile("ptesync": : :"memory");
 
        /*
-        * Flush the first set of the TLB, and any caching of partition table
-        * entries. Then flush the remaining sets of the TLB. Hash mode uses
-        * partition scoped TLB translations.
+        * Flush the partition table cache if this is HV mode.
         */
-       tlbiel_hash_set_isa300(0, is, 0, 2, 0);
-       for (set = 1; set < num_sets; set++)
-               tlbiel_hash_set_isa300(set, is, 0, 0, 0);
+       if (early_cpu_has_feature(CPU_FTR_HVMODE))
+               tlbiel_hash_set_isa300(0, is, 0, 2, 0);
 
        /*
-        * Now invalidate the process table cache.
+        * Now invalidate the process table cache. UPRT=0 HPT modes (what
+        * current hardware implements) do not use the process table, but
+        * add the flushes anyway.
         *
         * From ISA v3.0B p. 1078:
         *     The following forms are invalid.
@@ -110,6 +109,14 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
         */
        tlbiel_hash_set_isa300(0, is, 0, 2, 1);
 
+       /*
+        * Then flush the sets of the TLB proper. Hash mode uses
+        * partition scoped TLB translations, which may be flushed
+        * in !HV mode.
+        */
+       for (set = 0; set < num_sets; set++)
+               tlbiel_hash_set_isa300(set, is, 0, 0, 0);
+
        ppc_after_tlbiel_barrier();
 
        asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
index 1c54821..0c85572 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/export.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
+#include <linux/cpu.h>
 
 #include <asm/mmu_context.h>
 #include <asm/pgalloc.h>
@@ -307,3 +308,22 @@ void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
        isync();
 }
 #endif
+
+/**
+ * cleanup_cpu_mmu_context - Clean up MMU details for this CPU (newly offlined)
+ *
+ * This clears the CPU from mm_cpumask for all processes, and then flushes the
+ * local TLB to ensure TLB coherency in case the CPU is onlined again.
+ *
+ * KVM guest translations are not necessarily flushed here. If KVM started
+ * using mm_cpumask or the Linux APIs which do, this would have to be resolved.
+ */
+#ifdef CONFIG_HOTPLUG_CPU
+void cleanup_cpu_mmu_context(void)
+{
+       int cpu = smp_processor_id();
+
+       clear_tasks_mm_cpumask(cpu);
+       tlbiel_all();
+}
+#endif
diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c
new file mode 100644 (file)
index 0000000..fa9a7a7
--- /dev/null
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+
+bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
+{
+       return is_kernel_addr((unsigned long)unsafe_src);
+}
index 01ec2a2..3fc325b 100644 (file)
@@ -50,6 +50,7 @@
 #include <asm/rtas.h>
 #include <asm/kasan.h>
 #include <asm/svm.h>
+#include <asm/mmzone.h>
 
 #include <mm/mmu_decl.h>
 
index 63f61d8..f2bf98b 100644 (file)
@@ -742,8 +742,7 @@ static int __init parse_numa_properties(void)
                        of_node_put(cpu);
                }
 
-               if (likely(nid > 0))
-                       node_set_online(nid);
+               node_set_online(nid);
        }
 
        get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
index 74ebe66..adae2a6 100644 (file)
@@ -911,6 +911,8 @@ static int smp_core99_cpu_disable(void)
 
        mpic_cpu_set_priority(0xf);
 
+       cleanup_cpu_mmu_context();
+
        return 0;
 }
 
index 9acaa0f..4426a10 100644 (file)
@@ -98,7 +98,7 @@ static void init_fw_feat_flags(struct device_node *np)
                security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
 }
 
-static void pnv_setup_rfi_flush(void)
+static void pnv_setup_security_mitigations(void)
 {
        struct device_node *np, *fw_features;
        enum l1d_flush_type type;
@@ -122,12 +122,31 @@ static void pnv_setup_rfi_flush(void)
                        type = L1D_FLUSH_ORI;
        }
 
+       /*
+        * If we are non-Power9 bare metal, we don't need to flush on kernel
+        * entry or after user access: they fix a P9 specific vulnerability.
+        */
+       if (!pvr_version_is(PVR_POWER9)) {
+               security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
+               security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
+       }
+
        enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
                 (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)   || \
                  security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
 
        setup_rfi_flush(type, enable);
        setup_count_cache_flush();
+
+       enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+                security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
+       setup_entry_flush(enable);
+
+       enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+                security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
+       setup_uaccess_flush(enable);
+
+       setup_stf_barrier();
 }
 
 static void __init pnv_check_guarded_cores(void)
@@ -156,8 +175,7 @@ static void __init pnv_setup_arch(void)
 {
        set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
 
-       pnv_setup_rfi_flush();
-       setup_stf_barrier();
+       pnv_setup_security_mitigations();
 
        /* Initialize SMP */
        pnv_smp_init();
@@ -193,11 +211,16 @@ static void __init pnv_init(void)
                add_preferred_console("hvc", 0, NULL);
 
        if (!radix_enabled()) {
+               size_t size = sizeof(struct slb_entry) * mmu_slb_size;
                int i;
 
                /* Allocate per cpu area to save old slb contents during MCE */
-               for_each_possible_cpu(i)
-                       paca_ptrs[i]->mce_faulty_slbs = memblock_alloc_node(mmu_slb_size, __alignof__(*paca_ptrs[i]->mce_faulty_slbs), cpu_to_node(i));
+               for_each_possible_cpu(i) {
+                       paca_ptrs[i]->mce_faulty_slbs =
+                                       memblock_alloc_node(size,
+                                               __alignof__(struct slb_entry),
+                                               cpu_to_node(i));
+               }
        }
 }
 
index 54c4ba4..cbb6781 100644 (file)
@@ -143,6 +143,9 @@ static int pnv_smp_cpu_disable(void)
                xive_smp_disable_cpu();
        else
                xics_migrate_irqs_away();
+
+       cleanup_cpu_mmu_context();
+
        return 0;
 }
 
index f2837e3..a02012f 100644 (file)
@@ -90,6 +90,9 @@ static int pseries_cpu_disable(void)
                xive_smp_disable_cpu();
        else
                xics_migrate_irqs_away();
+
+       cleanup_cpu_mmu_context();
+
        return 0;
 }
 
index d6f4162..2f73cb5 100644 (file)
@@ -349,8 +349,8 @@ void post_mobility_fixup(void)
 
        cpus_read_unlock();
 
-       /* Possibly switch to a new RFI flush type */
-       pseries_setup_rfi_flush();
+       /* Possibly switch to a new L1 flush type */
+       pseries_setup_security_mitigations();
 
        /* Reinitialise system information for hv-24x7 */
        read_24x7_sys_info();
index 133f6ad..b3ac245 100644 (file)
@@ -458,7 +458,8 @@ again:
                        return hwirq;
                }
 
-               virq = irq_create_mapping(NULL, hwirq);
+               virq = irq_create_mapping_affinity(NULL, hwirq,
+                                                  entry->affinity);
 
                if (!virq) {
                        pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq);
index 13fa370..5938408 100644 (file)
@@ -111,7 +111,7 @@ static inline unsigned long cmo_get_page_size(void)
 
 int dlpar_workqueue_init(void);
 
-void pseries_setup_rfi_flush(void);
+void pseries_setup_security_mitigations(void);
 void pseries_lpar_read_hblkrm_characteristics(void);
 
 #endif /* _PSERIES_PSERIES_H */
index 633c45e..090c13f 100644 (file)
@@ -542,7 +542,7 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
                security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
 }
 
-void pseries_setup_rfi_flush(void)
+void pseries_setup_security_mitigations(void)
 {
        struct h_cpu_char_result result;
        enum l1d_flush_type types;
@@ -579,6 +579,16 @@ void pseries_setup_rfi_flush(void)
 
        setup_rfi_flush(types, enable);
        setup_count_cache_flush();
+
+       enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+                security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
+       setup_entry_flush(enable);
+
+       enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+                security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
+       setup_uaccess_flush(enable);
+
+       setup_stf_barrier();
 }
 
 #ifdef CONFIG_PCI_IOV
@@ -768,8 +778,7 @@ static void __init pSeries_setup_arch(void)
 
        fwnmi_init();
 
-       pseries_setup_rfi_flush();
-       setup_stf_barrier();
+       pseries_setup_security_mitigations();
        pseries_lpar_read_hblkrm_characteristics();
 
        /* By default, only probe PCI (can be overridden by rtas_pci) */
index b0ab66e..5b2e79e 100644 (file)
@@ -14,4 +14,6 @@
 #define PGDIR_SIZE      (_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK      (~(PGDIR_SIZE - 1))
 
+#define MAX_POSSIBLE_PHYSMEM_BITS 34
+
 #endif /* _ASM_RISCV_PGTABLE_32_H */
index ab10490..81de51e 100644 (file)
@@ -60,6 +60,8 @@ static inline u32 get_cycles_hi(void)
 }
 #define get_cycles_hi get_cycles_hi
 
+#endif /* !CONFIG_RISCV_M_MODE */
+
 #ifdef CONFIG_64BIT
 static inline u64 get_cycles64(void)
 {
@@ -79,8 +81,6 @@ static inline u64 get_cycles64(void)
 }
 #endif /* CONFIG_64BIT */
 
-#endif /* !CONFIG_RISCV_M_MODE */
-
 #define ARCH_HAS_READ_CURRENT_TIMER
 static inline int read_current_timer(unsigned long *timer_val)
 {
index 82a5693..134388c 100644 (file)
@@ -4,6 +4,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/barrier.h>
+
 static inline void cpu_relax(void)
 {
 #ifdef __riscv_muldiv
index 19225ec..dd5f985 100644 (file)
@@ -36,7 +36,7 @@ extern asmlinkage void ret_from_kernel_thread(void);
 void arch_cpu_idle(void)
 {
        wait_for_interrupt();
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 void show_regs(struct pt_regs *regs)
index c424cc6..117f321 100644 (file)
@@ -75,6 +75,7 @@ void __init setup_arch(char **cmdline_p)
        *cmdline_p = boot_command_line;
 
        early_ioremap_setup();
+       jump_label_init();
        parse_early_param();
 
        efi_init();
index cb8f9e4..0cfd6da 100644 (file)
@@ -44,7 +44,7 @@ SYSCFLAGS_vdso.so.dbg = $(c_flags)
 $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE
        $(call if_changed,vdsold)
 SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \
-       -Wl,--build-id -Wl,--hash-style=both
+       -Wl,--build-id=sha1 -Wl,--hash-style=both
 
 # We also create a special relocatable object that should mirror the symbol
 # table and layout of the linked DSO. With ld --just-symbols we can then
index a4d3c57..fe6f529 100644 (file)
@@ -1,3 +1,4 @@
+CONFIG_UAPI_HEADER_TEST=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_WATCH_QUEUE=y
index 463c24e..74f9a03 100644 (file)
@@ -459,6 +459,7 @@ struct kvm_vcpu_stat {
        u64 diagnose_308;
        u64 diagnose_500;
        u64 diagnose_other;
+       u64 pfault_sync;
 };
 
 #define PGM_OPERATION                  0x01
index 2012c1c..483051e 100644 (file)
@@ -53,11 +53,11 @@ int main(void)
        /* stack_frame offsets */
        OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
        OFFSET(__SF_GPRS, stack_frame, gprs);
-       OFFSET(__SF_EMPTY, stack_frame, empty1);
-       OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]);
-       OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]);
-       OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]);
-       OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[3]);
+       OFFSET(__SF_EMPTY, stack_frame, empty1[0]);
+       OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[1]);
+       OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[2]);
+       OFFSET(__SF_SIE_REASON, stack_frame, empty1[3]);
+       OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[4]);
        BLANK();
        OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val);
        BLANK();
index 8623591..92beb14 100644 (file)
@@ -422,6 +422,7 @@ ENTRY(system_call)
 #endif
        LOCKDEP_SYS_EXIT
 .Lsysc_tif:
+       DISABLE_INTS
        TSTMSK  __PT_FLAGS(%r11),_PIF_WORK
        jnz     .Lsysc_work
        TSTMSK  __TI_flags(%r12),_TIF_WORK
@@ -444,6 +445,7 @@ ENTRY(system_call)
 # One of the work bits is on. Find out which one.
 #
 .Lsysc_work:
+       ENABLE_INTS
        TSTMSK  __TI_flags(%r12),_TIF_NEED_RESCHED
        jo      .Lsysc_reschedule
        TSTMSK  __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART
@@ -761,12 +763,7 @@ ENTRY(io_int_handler)
        xc      __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
        TSTMSK  __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
        jo      .Lio_restore
-#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS)
-       tmhh    %r8,0x300
-       jz      1f
        TRACE_IRQS_OFF
-1:
-#endif
        xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 .Lio_loop:
        lgr     %r2,%r11                # pass pointer to pt_regs
@@ -789,12 +786,7 @@ ENTRY(io_int_handler)
        TSTMSK  __LC_CPU_FLAGS,_CIF_WORK
        jnz     .Lio_work
 .Lio_restore:
-#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS)
-       tm      __PT_PSW(%r11),3
-       jno     0f
        TRACE_IRQS_ON
-0:
-#endif
        mvc     __LC_RETURN_PSW(16),__PT_PSW(%r11)
        tm      __PT_PSW+1(%r11),0x01   # returning to user ?
        jno     .Lio_exit_kernel
@@ -974,12 +966,7 @@ ENTRY(ext_int_handler)
        xc      __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
        TSTMSK  __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
        jo      .Lio_restore
-#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS)
-       tmhh    %r8,0x300
-       jz      1f
        TRACE_IRQS_OFF
-1:
-#endif
        xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
        lgr     %r2,%r11                # pass pointer to pt_regs
        lghi    %r3,EXT_INTERRUPT
@@ -1066,6 +1053,7 @@ EXPORT_SYMBOL(save_fpu_regs)
  *     %r4
  */
 load_fpu_regs:
+       stnsm   __SF_EMPTY(%r15),0xfc
        lg      %r4,__LC_CURRENT
        aghi    %r4,__TASK_thread
        TSTMSK  __LC_CPU_FLAGS,_CIF_FPU
@@ -1097,6 +1085,7 @@ load_fpu_regs:
 .Lload_fpu_regs_done:
        ni      __LC_CPU_FLAGS+7,255-_CIF_FPU
 .Lload_fpu_regs_exit:
+       ssm     __SF_EMPTY(%r15)
        BR_EX   %r14
 .Lload_fpu_regs_end:
 ENDPROC(load_fpu_regs)
index f7f1e64..2b85096 100644 (file)
@@ -33,10 +33,10 @@ void enabled_wait(void)
                PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
        clear_cpu_flag(CIF_NOHZ_DELAY);
 
-       local_irq_save(flags);
+       raw_local_irq_save(flags);
        /* Call the assembler magic in entry.S */
        psw_idle(idle, psw_mask);
-       local_irq_restore(flags);
+       raw_local_irq_restore(flags);
 
        /* Account time spent with enabled wait psw loaded as idle time. */
        raw_write_seqcount_begin(&idle->seqcount);
@@ -123,7 +123,7 @@ void arch_cpu_idle_enter(void)
 void arch_cpu_idle(void)
 {
        enabled_wait();
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 void arch_cpu_idle_exit(void)
index 00255ae..19cd7b9 100644 (file)
@@ -2228,4 +2228,4 @@ out:
 }
 
 arch_initcall(init_cpum_sampling_pmu);
-core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640);
+core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644);
index 14bd9d5..883bfed 100644 (file)
@@ -129,8 +129,15 @@ int uv_destroy_page(unsigned long paddr)
                .paddr = paddr
        };
 
-       if (uv_call(0, (u64)&uvcb))
+       if (uv_call(0, (u64)&uvcb)) {
+               /*
+                * Older firmware uses 107/d as an indication of a non secure
+                * page. Let us emulate the newer variant (no-op).
+                */
+               if (uvcb.header.rc == 0x107 && uvcb.header.rrc == 0xd)
+                       return 0;
                return -EINVAL;
+       }
        return 0;
 }
 
index 394a5f5..3765c42 100644 (file)
@@ -184,7 +184,7 @@ static int __import_wp_info(struct kvm_vcpu *vcpu,
        if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE)
                return -EINVAL;
 
-       wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL);
+       wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL_ACCOUNT);
        if (!wp_info->old_data)
                return -ENOMEM;
        /* try to backup the original value */
@@ -234,7 +234,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
        if (nr_wp > 0) {
                wp_info = kmalloc_array(nr_wp,
                                        sizeof(*wp_info),
-                                       GFP_KERNEL);
+                                       GFP_KERNEL_ACCOUNT);
                if (!wp_info) {
                        ret = -ENOMEM;
                        goto error;
@@ -243,7 +243,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
        if (nr_bp > 0) {
                bp_info = kmalloc_array(nr_bp,
                                        sizeof(*bp_info),
-                                       GFP_KERNEL);
+                                       GFP_KERNEL_ACCOUNT);
                if (!bp_info) {
                        ret = -ENOMEM;
                        goto error;
@@ -349,7 +349,7 @@ static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
                if (!wp_info || !wp_info->old_data || wp_info->len <= 0)
                        continue;
 
-               temp = kmalloc(wp_info->len, GFP_KERNEL);
+               temp = kmalloc(wp_info->len, GFP_KERNEL_ACCOUNT);
                if (!temp)
                        continue;
 
index e7a7c49..72b25b7 100644 (file)
@@ -398,7 +398,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
        if (!kvm_s390_pv_cpu_is_protected(vcpu) && (addr & ~PAGE_MASK))
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-       sctns = (void *)get_zeroed_page(GFP_KERNEL);
+       sctns = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
        if (!sctns)
                return -ENOMEM;
 
index 2f17729..e3183bd 100644 (file)
@@ -1792,7 +1792,7 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
                goto out;
        }
 gisa_out:
-       tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+       tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT);
        if (tmp_inti) {
                tmp_inti->type = KVM_S390_INT_IO(1, 0, 0, 0);
                tmp_inti->io.io_int_word = isc_to_int_word(isc);
@@ -2015,7 +2015,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
        struct kvm_s390_interrupt_info *inti;
        int rc;
 
-       inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+       inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT);
        if (!inti)
                return -ENOMEM;
 
@@ -2414,7 +2414,7 @@ static int enqueue_floating_irq(struct kvm_device *dev,
                return -EINVAL;
 
        while (len >= sizeof(struct kvm_s390_irq)) {
-               inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+               inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT);
                if (!inti)
                        return -ENOMEM;
 
@@ -2462,7 +2462,7 @@ static int register_io_adapter(struct kvm_device *dev,
        if (dev->kvm->arch.adapters[adapter_info.id] != NULL)
                return -EINVAL;
 
-       adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+       adapter = kzalloc(sizeof(*adapter), GFP_KERNEL_ACCOUNT);
        if (!adapter)
                return -ENOMEM;
 
@@ -3290,7 +3290,7 @@ int kvm_s390_gib_init(u8 nisc)
                goto out;
        }
 
-       gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
+       gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
        if (!gib) {
                rc = -ENOMEM;
                goto out;
index 6b74b92..dbafd05 100644 (file)
@@ -60,6 +60,7 @@
 struct kvm_stats_debugfs_item debugfs_entries[] = {
        VCPU_STAT("userspace_handled", exit_userspace),
        VCPU_STAT("exit_null", exit_null),
+       VCPU_STAT("pfault_sync", pfault_sync),
        VCPU_STAT("exit_validity", exit_validity),
        VCPU_STAT("exit_stop_request", exit_stop_request),
        VCPU_STAT("exit_external_request", exit_external_request),
@@ -1254,7 +1255,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
                ret = -EBUSY;
                goto out;
        }
-       proc = kzalloc(sizeof(*proc), GFP_KERNEL);
+       proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
        if (!proc) {
                ret = -ENOMEM;
                goto out;
@@ -1416,7 +1417,7 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
        struct kvm_s390_vm_cpu_processor *proc;
        int ret = 0;
 
-       proc = kzalloc(sizeof(*proc), GFP_KERNEL);
+       proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
        if (!proc) {
                ret = -ENOMEM;
                goto out;
@@ -1444,7 +1445,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
        struct kvm_s390_vm_cpu_machine *mach;
        int ret = 0;
 
-       mach = kzalloc(sizeof(*mach), GFP_KERNEL);
+       mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
        if (!mach) {
                ret = -ENOMEM;
                goto out;
@@ -1812,7 +1813,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
                return -EINVAL;
 
-       keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
+       keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
        if (!keys)
                return -ENOMEM;
 
@@ -1857,7 +1858,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
                return -EINVAL;
 
-       keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
+       keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
        if (!keys)
                return -ENOMEM;
 
@@ -2312,7 +2313,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
                struct kvm_s390_pv_unp unp = {};
 
                r = -EINVAL;
-               if (!kvm_s390_pv_is_protected(kvm))
+               if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
                        break;
 
                r = -EFAULT;
@@ -2625,7 +2626,7 @@ static void sca_dispose(struct kvm *kvm)
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
-       gfp_t alloc_flags = GFP_KERNEL;
+       gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
        int i, rc;
        char debug_name[16];
        static unsigned long sca_offset;
@@ -2670,7 +2671,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
        BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
        kvm->arch.sie_page2 =
-            (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+            (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
        if (!kvm->arch.sie_page2)
                goto out_err;
 
@@ -2900,7 +2901,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
        if (kvm->arch.use_esca)
                return 0;
 
-       new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
+       new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
        if (!new_sca)
                return -ENOMEM;
 
@@ -3133,7 +3134,7 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
 
 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
 {
-       vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
+       vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
        if (!vcpu->arch.sie_block->cbrlo)
                return -ENOMEM;
        return 0;
@@ -3243,7 +3244,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
        int rc;
 
        BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
-       sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
+       sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
        if (!sie_page)
                return -ENOMEM;
 
@@ -3564,7 +3565,6 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
                vcpu->arch.sie_block->pp = 0;
                vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
                vcpu->arch.sie_block->todpr = 0;
-               vcpu->arch.sie_block->cpnc = 0;
        }
 }
 
@@ -3582,7 +3582,6 @@ static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
 
        regs->etoken = 0;
        regs->etoken_extension = 0;
-       regs->diag318 = 0;
 }
 
 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -4111,6 +4110,7 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
                current->thread.gmap_pfault = 0;
                if (kvm_arch_setup_async_pf(vcpu))
                        return 0;
+               vcpu->stat.pfault_sync++;
                return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
        }
        return vcpu_post_run_fault_in_sie(vcpu);
index cd74989..9928f78 100644 (file)
@@ -879,7 +879,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
        switch (fc) {
        case 1: /* same handling for 1 and 2 */
        case 2:
-               mem = get_zeroed_page(GFP_KERNEL);
+               mem = get_zeroed_page(GFP_KERNEL_ACCOUNT);
                if (!mem)
                        goto out_no_data;
                if (stsi((void *) mem, fc, sel1, sel2))
@@ -888,7 +888,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
        case 3:
                if (sel1 != 2 || sel2 != 2)
                        goto out_no_data;
-               mem = get_zeroed_page(GFP_KERNEL);
+               mem = get_zeroed_page(GFP_KERNEL_ACCOUNT);
                if (!mem)
                        goto out_no_data;
                handle_stsi_3_2_2(vcpu, (void *) mem);
index eb99e2f..813b6e9 100644 (file)
@@ -60,7 +60,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
        if (kvm_s390_pv_cpu_get_handle(vcpu))
                return -EINVAL;
 
-       vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL,
+       vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT,
                                                   get_order(uv_info.guest_cpu_stor_len));
        if (!vcpu->arch.pv.stor_base)
                return -ENOMEM;
@@ -72,7 +72,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
        uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
 
        /* Alloc Secure Instruction Data Area Designation */
-       vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL | __GFP_ZERO);
+       vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
        if (!vcpu->arch.sie_block->sidad) {
                free_pages(vcpu->arch.pv.stor_base,
                           get_order(uv_info.guest_cpu_stor_len));
@@ -120,7 +120,7 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
        struct kvm_memory_slot *memslot;
 
        kvm->arch.pv.stor_var = NULL;
-       kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, get_order(base));
+       kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
        if (!kvm->arch.pv.stor_base)
                return -ENOMEM;
 
@@ -208,7 +208,6 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
                return -EIO;
        }
        kvm->arch.gmap->guest_handle = uvcb.guest_handle;
-       atomic_set(&kvm->mm->context.is_protected, 1);
        return 0;
 }
 
@@ -228,6 +227,8 @@ int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
        *rrc = uvcb.header.rrc;
        KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
                     *rc, *rrc);
+       if (!cc)
+               atomic_set(&kvm->mm->context.is_protected, 1);
        return cc ? -EINVAL : 0;
 }
 
index 4f3cbf6..c5d0a58 100644 (file)
@@ -1234,7 +1234,7 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
 
        mutex_lock(&kvm->arch.vsie.mutex);
        if (kvm->arch.vsie.page_count < nr_vcpus) {
-               page = alloc_page(GFP_KERNEL | __GFP_ZERO | GFP_DMA);
+               page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
                if (!page) {
                        mutex_unlock(&kvm->arch.vsie.mutex);
                        return ERR_PTR(-ENOMEM);
@@ -1336,7 +1336,7 @@ out_put:
 void kvm_s390_vsie_init(struct kvm *kvm)
 {
        mutex_init(&kvm->arch.vsie.mutex);
-       INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL);
+       INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL_ACCOUNT);
 }
 
 /* Destroy the vsie data structures. To be called when a vm is destroyed. */
index daca7ba..8c0c68e 100644 (file)
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(__delay);
 
 static void __udelay_disabled(unsigned long long usecs)
 {
-       unsigned long cr0, cr0_new, psw_mask, flags;
+       unsigned long cr0, cr0_new, psw_mask;
        struct s390_idle_data idle;
        u64 end;
 
@@ -45,9 +45,8 @@ static void __udelay_disabled(unsigned long long usecs)
        psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT;
        set_clock_comparator(end);
        set_cpu_flag(CIF_IGNORE_IRQ);
-       local_irq_save(flags);
        psw_idle(&idle, psw_mask);
-       local_irq_restore(flags);
+       trace_hardirqs_off();
        clear_cpu_flag(CIF_IGNORE_IRQ);
        set_clock_comparator(S390_lowcore.clock_comparator);
        __ctl_load(cr0, 0, 0);
index cfb0017..9bb2c75 100644 (file)
@@ -2,7 +2,7 @@
 /*
  *  KVM guest address space mapping code
  *
- *    Copyright IBM Corp. 2007, 2016, 2018
+ *    Copyright IBM Corp. 2007, 2020
  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  *              David Hildenbrand <david@redhat.com>
  *              Janosch Frank <frankja@linux.vnet.ibm.com>
@@ -56,19 +56,19 @@ static struct gmap *gmap_alloc(unsigned long limit)
                atype = _ASCE_TYPE_REGION1;
                etype = _REGION1_ENTRY_EMPTY;
        }
-       gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
+       gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT);
        if (!gmap)
                goto out;
        INIT_LIST_HEAD(&gmap->crst_list);
        INIT_LIST_HEAD(&gmap->children);
        INIT_LIST_HEAD(&gmap->pt_list);
-       INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
-       INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
-       INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC);
+       INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT);
+       INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT);
+       INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT);
        spin_lock_init(&gmap->guest_table_lock);
        spin_lock_init(&gmap->shadow_lock);
        refcount_set(&gmap->ref_count, 1);
-       page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+       page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
        if (!page)
                goto out_free;
        page->index = 0;
@@ -309,7 +309,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
        unsigned long *new;
 
        /* since we dont free the gmap table until gmap_free we can unlock */
-       page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+       page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
        if (!page)
                return -ENOMEM;
        new = (unsigned long *) page_to_phys(page);
@@ -594,7 +594,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
        if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
                return -EFAULT;
        /* Link gmap segment table entry location to page table. */
-       rc = radix_tree_preload(GFP_KERNEL);
+       rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
        if (rc)
                return rc;
        ptl = pmd_lock(mm, pmd);
@@ -1218,11 +1218,11 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
                vmaddr = __gmap_translate(parent, paddr);
                if (IS_ERR_VALUE(vmaddr))
                        return vmaddr;
-               rmap = kzalloc(sizeof(*rmap), GFP_KERNEL);
+               rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT);
                if (!rmap)
                        return -ENOMEM;
                rmap->raddr = raddr;
-               rc = radix_tree_preload(GFP_KERNEL);
+               rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
                if (rc) {
                        kfree(rmap);
                        return rc;
@@ -1741,7 +1741,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
 
        BUG_ON(!gmap_is_shadow(sg));
        /* Allocate a shadow region second table */
-       page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+       page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
        if (!page)
                return -ENOMEM;
        page->index = r2t & _REGION_ENTRY_ORIGIN;
@@ -1825,7 +1825,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
 
        BUG_ON(!gmap_is_shadow(sg));
        /* Allocate a shadow region second table */
-       page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+       page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
        if (!page)
                return -ENOMEM;
        page->index = r3t & _REGION_ENTRY_ORIGIN;
@@ -1909,7 +1909,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
 
        BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE));
        /* Allocate a shadow segment table */
-       page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+       page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
        if (!page)
                return -ENOMEM;
        page->index = sgt & _REGION_ENTRY_ORIGIN;
@@ -2116,7 +2116,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
        parent = sg->parent;
        prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE;
 
-       rmap = kzalloc(sizeof(*rmap), GFP_KERNEL);
+       rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT);
        if (!rmap)
                return -ENOMEM;
        rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE;
@@ -2128,7 +2128,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
                        rc = vmaddr;
                        break;
                }
-               rc = radix_tree_preload(GFP_KERNEL);
+               rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
                if (rc)
                        break;
                rc = -EAGAIN;
@@ -2690,6 +2690,8 @@ static const struct mm_walk_ops reset_acc_walk_ops = {
 #include <linux/sched/mm.h>
 void s390_reset_acc(struct mm_struct *mm)
 {
+       if (!mm_is_protected(mm))
+               return;
        /*
         * we might be called during
         * reset:                             we walk the pages and clear
index 743f257..75217fb 100644 (file)
@@ -103,9 +103,10 @@ static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *de
 {
        struct msi_desc *entry = irq_get_msi_desc(data->irq);
        struct msi_msg msg = entry->msg;
+       int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest));
 
        msg.address_lo &= 0xff0000ff;
-       msg.address_lo |= (cpumask_first(dest) << 8);
+       msg.address_lo |= (cpu_addr << 8);
        pci_write_msi_msg(data->irq, &msg);
 
        return IRQ_SET_MASK_OK;
@@ -238,6 +239,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
        unsigned long bit;
        struct msi_desc *msi;
        struct msi_msg msg;
+       int cpu_addr;
        int rc, irq;
 
        zdev->aisb = -1UL;
@@ -287,9 +289,15 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
                                         handle_percpu_irq);
                msg.data = hwirq - bit;
                if (irq_delivery == DIRECTED) {
+                       if (msi->affinity)
+                               cpu = cpumask_first(&msi->affinity->mask);
+                       else
+                               cpu = 0;
+                       cpu_addr = smp_cpu_get_cpu_address(cpu);
+
                        msg.address_lo = zdev->msi_addr & 0xff0000ff;
-                       msg.address_lo |= msi->affinity ?
-                               (cpumask_first(&msi->affinity->mask) << 8) : 0;
+                       msg.address_lo |= (cpu_addr << 8);
+
                        for_each_possible_cpu(cpu) {
                                airq_iv_set_data(zpci_ibv[cpu], hwirq, irq);
                        }
index 0dc0f52..f598149 100644 (file)
@@ -22,7 +22,7 @@ static void (*sh_idle)(void);
 void default_idle(void)
 {
        set_bl_bit();
-       local_irq_enable();
+       raw_local_irq_enable();
        /* Isn't this racy ? */
        cpu_sleep();
        clear_bl_bit();
index 065e2d4..396f46b 100644 (file)
@@ -50,7 +50,7 @@ static void pmc_leon_idle_fixup(void)
        register unsigned int address = (unsigned int)leon3_irqctrl_regs;
 
        /* Interrupts need to be enabled to not hang the CPU */
-       local_irq_enable();
+       raw_local_irq_enable();
 
        __asm__ __volatile__ (
                "wr     %%g0, %%asr19\n"
@@ -66,7 +66,7 @@ static void pmc_leon_idle_fixup(void)
 static void pmc_leon_idle(void)
 {
        /* Interrupts need to be enabled to not hang the CPU */
-       local_irq_enable();
+       raw_local_irq_enable();
 
        /* For systems without power-down, this will be no-op */
        __asm__ __volatile__ ("wr       %g0, %asr19\n\t");
index adfcaea..a023637 100644 (file)
@@ -74,7 +74,7 @@ void arch_cpu_idle(void)
 {
        if (sparc_idle)
                (*sparc_idle)();
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 /* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */
index a75093b..6f8c782 100644 (file)
@@ -62,11 +62,11 @@ void arch_cpu_idle(void)
 {
        if (tlb_type != hypervisor) {
                touch_nmi_watchdog();
-               local_irq_enable();
+               raw_local_irq_enable();
        } else {
                unsigned long pstate;
 
-               local_irq_enable();
+               raw_local_irq_enable();
 
                 /* The sun4v sleeping code requires that we have PSTATE.IE cleared over
                  * the cpu sleep hypervisor call.
index 0c0268e..d839956 100644 (file)
@@ -71,7 +71,7 @@
 FUNC_NAME:             /* %o0=src, %o1=dst, %o2=len */
        LOAD(prefetch, %o0 + 0x000, #n_reads)
        xor             %o0, %o1, %g1
-       mov             1, %o3
+       mov             -1, %o3
        clr             %o4
        andcc           %g1, 0x3, %g0
        bne,pn          %icc, 95f
index 3bed095..9505a7e 100644 (file)
@@ -217,7 +217,7 @@ void arch_cpu_idle(void)
 {
        cpu_tasks[current_thread_info()->cpu].pid = os_getpid();
        um_idle_sleep();
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 int __cant_sleep(void) {
index f6946b8..fbf26e0 100644 (file)
@@ -100,6 +100,7 @@ config X86
        select ARCH_WANT_DEFAULT_BPF_JIT        if X86_64
        select ARCH_WANTS_DYNAMIC_TASK_STRUCT
        select ARCH_WANT_HUGE_PMD_SHARE
+       select ARCH_WANT_LD_ORPHAN_WARN
        select ARCH_WANTS_THP_SWAP              if X86_64
        select BUILDTIME_TABLE_SORT
        select CLKEVT_I8253
index 154259f..1bf2174 100644 (file)
@@ -209,9 +209,6 @@ ifdef CONFIG_X86_64
 LDFLAGS_vmlinux += -z max-page-size=0x200000
 endif
 
-# We never want expected sections to be placed heuristically by the
-# linker. All sections should be explicitly named in the linker script.
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
 
 archscripts: scripts_basic
        $(Q)$(MAKE) $(build)=arch/x86/tools relocs
index ee24908..40b8fd3 100644 (file)
@@ -61,7 +61,9 @@ KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info)
 # Compressed kernel should be built as PIE since it may be loaded at any
 # address by the bootloader.
 LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker)
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
+ifdef CONFIG_LD_ORPHAN_WARN
+LDFLAGS_vmlinux += --orphan-handling=warn
+endif
 LDFLAGS_vmlinux += -T
 
 hostprogs      := mkpiggy
index 954cb27..27826c2 100644 (file)
@@ -32,13 +32,12 @@ struct ghcb *boot_ghcb;
  */
 static bool insn_has_rep_prefix(struct insn *insn)
 {
+       insn_byte_t p;
        int i;
 
        insn_get_prefixes(insn);
 
-       for (i = 0; i < insn->prefixes.nbytes; i++) {
-               insn_byte_t p = insn->prefixes.bytes[i];
-
+       for_each_insn_prefix(insn, i, p) {
                if (p == 0xf2 || p == 0xf3)
                        return true;
        }
index 442e1ed..4eb7ee5 100644 (file)
 MODULE_LICENSE("GPL");
 
 #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)                \
-static ssize_t __cstate_##_var##_show(struct kobject *kobj,    \
-                               struct kobj_attribute *attr,    \
+static ssize_t __cstate_##_var##_show(struct device *dev,      \
+                               struct device_attribute *attr,  \
                                char *page)                     \
 {                                                              \
        BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);             \
        return sprintf(page, _format "\n");                     \
 }                                                              \
-static struct kobj_attribute format_attr_##_var =              \
+static struct device_attribute format_attr_##_var =            \
        __ATTR(_name, 0444, __cstate_##_var##_show, NULL)
 
 static ssize_t cstate_get_attr_cpumask(struct device *dev,
index b47cc42..485c506 100644 (file)
@@ -1916,7 +1916,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
                 * that caused the PEBS record. It's called collision.
                 * If collision happened, the record will be dropped.
                 */
-               if (p->status != (1ULL << bit)) {
+               if (pebs_status != (1ULL << bit)) {
                        for_each_set_bit(i, (unsigned long *)&pebs_status, size)
                                error[i]++;
                        continue;
@@ -1940,7 +1940,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
                if (error[bit]) {
                        perf_log_lost_samples(event, error[bit]);
 
-                       if (perf_event_account_interrupt(event))
+                       if (iregs && perf_event_account_interrupt(event))
                                x86_pmu_stop(event, 0);
                }
 
index 86d012b..80d52cb 100644 (file)
@@ -94,8 +94,8 @@ end:
        return map;
 }
 
-ssize_t uncore_event_show(struct kobject *kobj,
-                         struct kobj_attribute *attr, char *buf)
+ssize_t uncore_event_show(struct device *dev,
+                         struct device_attribute *attr, char *buf)
 {
        struct uncore_event_desc *event =
                container_of(attr, struct uncore_event_desc, attr);
index 83d2a7d..9efea15 100644 (file)
@@ -157,7 +157,7 @@ struct intel_uncore_box {
 #define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS     2
 
 struct uncore_event_desc {
-       struct kobj_attribute attr;
+       struct device_attribute attr;
        const char *config;
 };
 
@@ -179,8 +179,8 @@ struct pci2phy_map {
 struct pci2phy_map *__find_pci2phy_map(int segment);
 int uncore_pcibus_to_physid(struct pci_bus *bus);
 
-ssize_t uncore_event_show(struct kobject *kobj,
-                         struct kobj_attribute *attr, char *buf);
+ssize_t uncore_event_show(struct device *dev,
+                         struct device_attribute *attr, char *buf);
 
 static inline struct intel_uncore_pmu *dev_to_uncore_pmu(struct device *dev)
 {
@@ -201,14 +201,14 @@ extern int __uncore_max_dies;
 }
 
 #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)                        \
-static ssize_t __uncore_##_var##_show(struct kobject *kobj,            \
-                               struct kobj_attribute *attr,            \
+static ssize_t __uncore_##_var##_show(struct device *dev,              \
+                               struct device_attribute *attr,          \
                                char *page)                             \
 {                                                                      \
        BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);                     \
        return sprintf(page, _format "\n");                             \
 }                                                                      \
-static struct kobj_attribute format_attr_##_var =                      \
+static struct device_attribute format_attr_##_var =                    \
        __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
 
 static inline bool uncore_pmc_fixed(int idx)
index 7c0120e..7dbbeaa 100644 (file)
@@ -93,18 +93,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
  * any other bit is reserved
  */
 #define RAPL_EVENT_MASK        0xFFULL
-
-#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format)          \
-static ssize_t __rapl_##_var##_show(struct kobject *kobj,      \
-                               struct kobj_attribute *attr,    \
-                               char *page)                     \
-{                                                              \
-       BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);             \
-       return sprintf(page, _format "\n");                     \
-}                                                              \
-static struct kobj_attribute format_attr_##_var =              \
-       __ATTR(_name, 0444, __rapl_##_var##_show, NULL)
-
 #define RAPL_CNTR_WIDTH 32
 
 #define RAPL_EVENT_ATTR_STR(_name, v, str)                                     \
@@ -441,7 +429,7 @@ static struct attribute_group rapl_pmu_events_group = {
        .attrs = attrs_empty,
 };
 
-DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
+PMU_FORMAT_ATTR(event, "config:0-7");
 static struct attribute *rapl_formats_attr[] = {
        &format_attr_event.attr,
        NULL,
index dad350d..9f9e951 100644 (file)
 #define X86_FEATURE_VMCALL             ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
 #define X86_FEATURE_VMW_VMMCALL                ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
 #define X86_FEATURE_SEV_ES             ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_VM_PAGE_FLUSH      ( 8*32+21) /* "" VM Page Flush MSR is supported */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE           ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
 #define X86_FEATURE_TSXLDTRK           (18*32+16) /* TSX Suspend Load Address Tracking */
 #define X86_FEATURE_PCONFIG            (18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_ARCH_LBR           (18*32+19) /* Intel ARCH LBR */
+#define X86_FEATURE_AVX512_FP16                (18*32+23) /* AVX512 FP16 */
 #define X86_FEATURE_SPEC_CTRL          (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP                (18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_FLUSH_L1D          (18*32+28) /* Flush L1D cache */
index 5c1ae3e..a8c3d28 100644 (file)
@@ -201,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn)
        return insn_offset_displacement(insn) + insn->displacement.nbytes;
 }
 
+/**
+ * for_each_insn_prefix() -- Iterate prefixes in the instruction
+ * @insn: Pointer to struct insn.
+ * @idx:  Index storage.
+ * @prefix: Prefix byte.
+ *
+ * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
+ * and the index is stored in @idx (note that this @idx is just for a cursor,
+ * do not change it.)
+ * Since prefixes.nbytes can be bigger than 4 if some prefixes
+ * are repeated, it cannot be used for looping over the prefixes.
+ */
+#define for_each_insn_prefix(insn, idx, prefix)        \
+       for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+
 #define POP_SS_OPCODE 0x1f
 #define MOV_SREG_OPCODE 0x8e
 
index 324ddd7..3d6616f 100644 (file)
@@ -614,6 +614,7 @@ struct kvm_vcpu_arch {
 
        struct kvm_pio_request pio;
        void *pio_data;
+       void *guest_ins_data;
 
        u8 event_exit_inst_len;
 
@@ -805,6 +806,9 @@ struct kvm_vcpu_arch {
                 */
                bool enforce;
        } pv_cpuid;
+
+       /* Protected Guests */
+       bool guest_state_protected;
 };
 
 struct kvm_lpage_info {
@@ -1006,9 +1010,21 @@ struct kvm_arch {
         */
        bool tdp_mmu_enabled;
 
-       /* List of struct tdp_mmu_pages being used as roots */
+       /*
+        * List of struct kvmp_mmu_pages being used as roots.
+        * All struct kvm_mmu_pages in the list should have
+        * tdp_mmu_page set.
+        * All struct kvm_mmu_pages in the list should have a positive
+        * root_count except when a thread holds the MMU lock and is removing
+        * an entry from the list.
+        */
        struct list_head tdp_mmu_roots;
-       /* List of struct tdp_mmu_pages not being used as roots */
+
+       /*
+        * List of struct kvmp_mmu_pages not being used as roots.
+        * All struct kvm_mmu_pages in the list should have
+        * tdp_mmu_page set and a root_count of 0.
+        */
        struct list_head tdp_mmu_pages;
 };
 
@@ -1088,7 +1104,7 @@ struct kvm_x86_ops {
        void (*hardware_disable)(void);
        void (*hardware_unsetup)(void);
        bool (*cpu_has_accelerated_tpr)(void);
-       bool (*has_emulated_msr)(u32 index);
+       bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
        void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
 
        unsigned int vm_size;
@@ -1115,7 +1131,8 @@ struct kvm_x86_ops {
                            struct kvm_segment *var, int seg);
        void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
        void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
-       int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
+       bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
+       void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
        int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
        void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
        void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
@@ -1231,6 +1248,7 @@ struct kvm_x86_ops {
        void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
                                           struct kvm_memory_slot *slot,
                                           gfn_t offset, unsigned long mask);
+       int (*cpu_dirty_log_size)(void);
 
        /* pmu operations of sub-arch */
        const struct kvm_pmu_ops *pmu_ops;
@@ -1280,6 +1298,9 @@ struct kvm_x86_ops {
 
        void (*migrate_timers)(struct kvm_vcpu *vcpu);
        void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
+       int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
+
+       void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
 };
 
 struct kvm_x86_nested_ops {
@@ -1461,6 +1482,7 @@ int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
 int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
 int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
+int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
@@ -1470,6 +1492,10 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
                    int reason, bool has_error_code, u32 error_code);
 
+void kvm_free_guest_fpu(struct kvm_vcpu *vcpu);
+
+void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0);
+void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4);
 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
@@ -1656,6 +1682,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
+int kvm_cpu_has_extint(struct kvm_vcpu *v);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
@@ -1695,7 +1722,8 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
 
 int kvm_is_in_guest(void);
 
-int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
+void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
+                                    u32 size);
 bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
 
@@ -1742,4 +1770,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
 #define GET_SMSTATE(type, buf, offset)         \
        (*(type *)((buf) + (offset) - 0x7e00))
 
+int kvm_cpu_dirty_log_size(void);
+
 #endif /* _ASM_X86_KVM_HOST_H */
index 972a34d..abfc9b0 100644 (file)
 #define MSR_AMD64_ICIBSEXTDCTL         0xc001103c
 #define MSR_AMD64_IBSOPDATA4           0xc001103d
 #define MSR_AMD64_IBS_REG_COUNT_MAX    8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_VM_PAGE_FLUSH                0xc001011e
 #define MSR_AMD64_SEV_ES_GHCB          0xc0010130
 #define MSR_AMD64_SEV                  0xc0010131
 #define MSR_AMD64_SEV_ENABLED_BIT      0
index e039a93..29dd27b 100644 (file)
@@ -88,8 +88,6 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,
 
 static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
 {
-       trace_hardirqs_on();
-
        mds_idle_clear_cpu_buffers();
        /* "mwait %eax, %ecx;" */
        asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
index 816b31c..394757e 100644 (file)
@@ -155,6 +155,7 @@ enum page_cache_mode {
 #define _PAGE_ENC              (_AT(pteval_t, sme_me_mask))
 
 #define _PAGE_CACHE_MASK       (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
+#define _PAGE_LARGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT_LARGE)
 
 #define _PAGE_NOCACHE          (cachemode2protval(_PAGE_CACHE_MODE_UC))
 #define _PAGE_CACHE_WP         (cachemode2protval(_PAGE_CACHE_MODE_WP))
index 6bfc878..6a9ccc1 100644 (file)
 #endif
 
 #endif /* CONFIG_SPARSEMEM */
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_NUMA_KEEP_MEMINFO
+extern int phys_to_target_node(phys_addr_t start);
+#define phys_to_target_node phys_to_target_node
+extern int memory_add_physaddr_to_nid(u64 start);
+#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
+#endif
+#endif /* __ASSEMBLY__ */
+
 #endif /* _ASM_X86_SPARSEMEM_H */
index 71d630b..1c56194 100644 (file)
@@ -98,6 +98,16 @@ enum {
        INTERCEPT_MWAIT_COND,
        INTERCEPT_XSETBV,
        INTERCEPT_RDPRU,
+       TRAP_EFER_WRITE,
+       TRAP_CR0_WRITE,
+       TRAP_CR1_WRITE,
+       TRAP_CR2_WRITE,
+       TRAP_CR3_WRITE,
+       TRAP_CR4_WRITE,
+       TRAP_CR5_WRITE,
+       TRAP_CR6_WRITE,
+       TRAP_CR7_WRITE,
+       TRAP_CR8_WRITE,
        /* Byte offset 014h (word 5) */
        INTERCEPT_INVLPGB = 160,
        INTERCEPT_INVLPGB_ILLEGAL,
@@ -130,7 +140,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
        u32 exit_int_info_err;
        u64 nested_ctl;
        u64 avic_vapic_bar;
-       u8 reserved_4[8];
+       u64 ghcb_gpa;
        u32 event_inj;
        u32 event_inj_err;
        u64 nested_cr3;
@@ -144,6 +154,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
        u8 reserved_6[8];       /* Offset 0xe8 */
        u64 avic_logical_id;    /* Offset 0xf0 */
        u64 avic_physical_id;   /* Offset 0xf8 */
+       u8 reserved_7[8];
+       u64 vmsa_pa;            /* Used for an SEV-ES guest */
 };
 
 
@@ -178,7 +190,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 #define LBR_CTL_ENABLE_MASK BIT_ULL(0)
 #define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
 
-#define SVM_INTERRUPT_SHADOW_MASK 1
+#define SVM_INTERRUPT_SHADOW_MASK      BIT_ULL(0)
+#define SVM_GUEST_INTERRUPT_MASK       BIT_ULL(1)
 
 #define SVM_IOIO_STR_SHIFT 2
 #define SVM_IOIO_REP_SHIFT 3
@@ -197,6 +210,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 
 #define SVM_NESTED_CTL_NP_ENABLE       BIT(0)
 #define SVM_NESTED_CTL_SEV_ENABLE      BIT(1)
+#define SVM_NESTED_CTL_SEV_ES_ENABLE   BIT(2)
 
 struct vmcb_seg {
        u16 selector;
@@ -220,7 +234,8 @@ struct vmcb_save_area {
        u8 cpl;
        u8 reserved_2[4];
        u64 efer;
-       u8 reserved_3[112];
+       u8 reserved_3[104];
+       u64 xss;                /* Valid for SEV-ES only */
        u64 cr4;
        u64 cr3;
        u64 cr0;
@@ -251,9 +266,12 @@ struct vmcb_save_area {
 
        /*
         * The following part of the save area is valid only for
-        * SEV-ES guests when referenced through the GHCB.
+        * SEV-ES guests when referenced through the GHCB or for
+        * saving to the host save area.
         */
-       u8 reserved_7[104];
+       u8 reserved_7[80];
+       u32 pkru;
+       u8 reserved_7a[20];
        u64 reserved_8;         /* rax already available at 0x01f8 */
        u64 rcx;
        u64 rdx;
@@ -294,7 +312,7 @@ struct ghcb {
 
 
 #define EXPECTED_VMCB_SAVE_AREA_SIZE           1032
-#define EXPECTED_VMCB_CONTROL_AREA_SIZE                256
+#define EXPECTED_VMCB_CONTROL_AREA_SIZE                272
 #define EXPECTED_GHCB_SIZE                     PAGE_SIZE
 
 static inline void __unused_size_checks(void)
@@ -379,6 +397,16 @@ struct vmcb {
                                (unsigned long *)&ghcb->save.valid_bitmap);     \
        }                                                                       \
                                                                                \
+       static inline u64 ghcb_get_##field(struct ghcb *ghcb)                   \
+       {                                                                       \
+               return ghcb->save.field;                                        \
+       }                                                                       \
+                                                                               \
+       static inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb)        \
+       {                                                                       \
+               return ghcb_##field##_is_valid(ghcb) ? ghcb->save.field : 0;    \
+       }                                                                       \
+                                                                               \
        static inline void ghcb_set_##field(struct ghcb *ghcb, u64 value)       \
        {                                                                       \
                __set_bit(GHCB_BITMAP_IDX(field),                               \
index 0fd4a9d..ab7382f 100644 (file)
@@ -98,12 +98,13 @@ static inline void sync_core_before_usermode(void)
        /* With PTI, we unconditionally serialize before running user code. */
        if (static_cpu_has(X86_FEATURE_PTI))
                return;
+
        /*
-        * Return from interrupt and NMI is done through iret, which is core
-        * serializing.
+        * Even if we're in an interrupt, we might reschedule before returning,
+        * in which case we could switch to a different thread in the same mm
+        * and return using SYSRET or SYSEXIT.  Instead of trying to keep
+        * track of our need to sync the core, just sync right away.
         */
-       if (in_irq() || in_nmi())
-               return;
        sync_core();
 }
 
index f8ba528..38ca445 100644 (file)
 #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK    0x0000001f
 #define VMX_MISC_SAVE_EFER_LMA                 0x00000020
 #define VMX_MISC_ACTIVITY_HLT                  0x00000040
+#define VMX_MISC_ACTIVITY_WAIT_SIPI            0x00000100
 #define VMX_MISC_ZERO_LEN_INS                  0x40000000
 #define VMX_MISC_MSR_LIST_MULTIPLIER           512
 
index 89e5f3d..8e76d37 100644 (file)
@@ -12,6 +12,7 @@
 
 #define KVM_PIO_PAGE_OFFSET 1
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64
 
 #define DE_VECTOR 0
 #define DB_VECTOR 1
index f1d8307..554f75f 100644 (file)
 #define SVM_EXIT_MWAIT_COND    0x08c
 #define SVM_EXIT_XSETBV        0x08d
 #define SVM_EXIT_RDPRU         0x08e
+#define SVM_EXIT_EFER_WRITE_TRAP               0x08f
+#define SVM_EXIT_CR0_WRITE_TRAP                        0x090
+#define SVM_EXIT_CR1_WRITE_TRAP                        0x091
+#define SVM_EXIT_CR2_WRITE_TRAP                        0x092
+#define SVM_EXIT_CR3_WRITE_TRAP                        0x093
+#define SVM_EXIT_CR4_WRITE_TRAP                        0x094
+#define SVM_EXIT_CR5_WRITE_TRAP                        0x095
+#define SVM_EXIT_CR6_WRITE_TRAP                        0x096
+#define SVM_EXIT_CR7_WRITE_TRAP                        0x097
+#define SVM_EXIT_CR8_WRITE_TRAP                        0x098
+#define SVM_EXIT_CR9_WRITE_TRAP                        0x099
+#define SVM_EXIT_CR10_WRITE_TRAP               0x09a
+#define SVM_EXIT_CR11_WRITE_TRAP               0x09b
+#define SVM_EXIT_CR12_WRITE_TRAP               0x09c
+#define SVM_EXIT_CR13_WRITE_TRAP               0x09d
+#define SVM_EXIT_CR14_WRITE_TRAP               0x09e
+#define SVM_EXIT_CR15_WRITE_TRAP               0x09f
 #define SVM_EXIT_INVPCID       0x0a2
 #define SVM_EXIT_NPF           0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI           0x401
 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS     0x402
+#define SVM_EXIT_VMGEXIT       0x403
 
 /* SEV-ES software-defined VMGEXIT events */
 #define SVM_VMGEXIT_MMIO_READ                  0x80000001
        { SVM_EXIT_MONITOR,     "monitor" }, \
        { SVM_EXIT_MWAIT,       "mwait" }, \
        { SVM_EXIT_XSETBV,      "xsetbv" }, \
+       { SVM_EXIT_EFER_WRITE_TRAP,     "write_efer_trap" }, \
+       { SVM_EXIT_CR0_WRITE_TRAP,      "write_cr0_trap" }, \
+       { SVM_EXIT_CR4_WRITE_TRAP,      "write_cr4_trap" }, \
+       { SVM_EXIT_CR8_WRITE_TRAP,      "write_cr8_trap" }, \
        { SVM_EXIT_INVPCID,     "invpcid" }, \
        { SVM_EXIT_NPF,         "npf" }, \
        { SVM_EXIT_AVIC_INCOMPLETE_IPI,         "avic_incomplete_ipi" }, \
        { SVM_EXIT_AVIC_UNACCELERATED_ACCESS,   "avic_unaccelerated_access" }, \
+       { SVM_EXIT_VMGEXIT,             "vmgexit" }, \
+       { SVM_VMGEXIT_MMIO_READ,        "vmgexit_mmio_read" }, \
+       { SVM_VMGEXIT_MMIO_WRITE,       "vmgexit_mmio_write" }, \
+       { SVM_VMGEXIT_NMI_COMPLETE,     "vmgexit_nmi_complete" }, \
+       { SVM_VMGEXIT_AP_HLT_LOOP,      "vmgexit_ap_hlt_loop" }, \
+       { SVM_VMGEXIT_AP_JUMP_TABLE,    "vmgexit_ap_jump_table" }, \
        { SVM_EXIT_ERR,         "invalid_guest_state" }
 
 
index b8ff9e8..ada955c 100644 (file)
@@ -32,6 +32,7 @@
 #define EXIT_REASON_EXTERNAL_INTERRUPT  1
 #define EXIT_REASON_TRIPLE_FAULT        2
 #define EXIT_REASON_INIT_SIGNAL                        3
+#define EXIT_REASON_SIPI_SIGNAL         4
 
 #define EXIT_REASON_INTERRUPT_WINDOW    7
 #define EXIT_REASON_NMI_WINDOW          8
@@ -94,6 +95,7 @@
        { EXIT_REASON_EXTERNAL_INTERRUPT,    "EXTERNAL_INTERRUPT" }, \
        { EXIT_REASON_TRIPLE_FAULT,          "TRIPLE_FAULT" }, \
        { EXIT_REASON_INIT_SIGNAL,           "INIT_SIGNAL" }, \
+       { EXIT_REASON_SIPI_SIGNAL,           "SIPI_SIGNAL" }, \
        { EXIT_REASON_INTERRUPT_WINDOW,      "INTERRUPT_WINDOW" }, \
        { EXIT_REASON_NMI_WINDOW,            "NMI_WINDOW" }, \
        { EXIT_REASON_TASK_SWITCH,           "TASK_SWITCH" }, \
index 1eac536..758bbf2 100644 (file)
@@ -273,20 +273,24 @@ static int assign_irq_vector_any_locked(struct irq_data *irqd)
        const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
        int node = irq_data_get_node(irqd);
 
-       if (node == NUMA_NO_NODE)
-               goto all;
-       /* Try the intersection of @affmsk and node mask */
-       cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk);
-       if (!assign_vector_locked(irqd, vector_searchmask))
-               return 0;
-       /* Try the node mask */
-       if (!assign_vector_locked(irqd, cpumask_of_node(node)))
-               return 0;
-all:
+       if (node != NUMA_NO_NODE) {
+               /* Try the intersection of @affmsk and node mask */
+               cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk);
+               if (!assign_vector_locked(irqd, vector_searchmask))
+                       return 0;
+       }
+
        /* Try the full affinity mask */
        cpumask_and(vector_searchmask, affmsk, cpu_online_mask);
        if (!assign_vector_locked(irqd, vector_searchmask))
                return 0;
+
+       if (node != NUMA_NO_NODE) {
+               /* Try the node mask */
+               if (!assign_vector_locked(irqd, cpumask_of_node(node)))
+                       return 0;
+       }
+
        /* Try the full online mask */
        return assign_vector_locked(irqd, cpu_online_mask);
 }
index 1b98f8c..235f5cd 100644 (file)
@@ -161,7 +161,7 @@ static int __init early_set_hub_type(void)
        /* UV4/4A only have a revision difference */
        case UV4_HUB_PART_NUMBER:
                uv_min_hub_revision_id = node_id.s.revision
-                                        + UV4_HUB_REVISION_BASE;
+                                        + UV4_HUB_REVISION_BASE - 1;
                uv_hub_type_set(UV4);
                if (uv_min_hub_revision_id == UV4A_HUB_REVISION_BASE)
                        uv_hub_type_set(UV4|UV4A);
index 581fb72..d41b70f 100644 (file)
@@ -739,11 +739,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
        if (boot_cpu_has(X86_FEATURE_IBPB)) {
                setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
 
+               spectre_v2_user_ibpb = mode;
                switch (cmd) {
                case SPECTRE_V2_USER_CMD_FORCE:
                case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
                case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
                        static_branch_enable(&switch_mm_always_ibpb);
+                       spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT;
                        break;
                case SPECTRE_V2_USER_CMD_PRCTL:
                case SPECTRE_V2_USER_CMD_AUTO:
@@ -757,8 +759,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
                pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
                        static_key_enabled(&switch_mm_always_ibpb) ?
                        "always-on" : "conditional");
-
-               spectre_v2_user_ibpb = mode;
        }
 
        /*
index d502241..42af31b 100644 (file)
@@ -69,6 +69,7 @@ static const struct cpuid_dep cpuid_deps[] = {
        { X86_FEATURE_CQM_MBM_TOTAL,            X86_FEATURE_CQM_LLC   },
        { X86_FEATURE_CQM_MBM_LOCAL,            X86_FEATURE_CQM_LLC   },
        { X86_FEATURE_AVX512_BF16,              X86_FEATURE_AVX512VL  },
+       { X86_FEATURE_AVX512_FP16,              X86_FEATURE_AVX512BW  },
        { X86_FEATURE_ENQCMD,                   X86_FEATURE_XSAVES    },
        { X86_FEATURE_PER_THREAD_MBA,           X86_FEATURE_MBA       },
        {}
index 4102b86..32b7099 100644 (file)
@@ -1384,8 +1384,10 @@ noinstr void do_machine_check(struct pt_regs *regs)
         * When there's any problem use only local no_way_out state.
         */
        if (!lmce) {
-               if (mce_end(order) < 0)
-                       no_way_out = worst >= MCE_PANIC_SEVERITY;
+               if (mce_end(order) < 0) {
+                       if (!no_way_out)
+                               no_way_out = worst >= MCE_PANIC_SEVERITY;
+               }
        } else {
                /*
                 * If there was a fatal machine check we should have
index 6a99535..7e8e07b 100644 (file)
@@ -100,53 +100,6 @@ static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev
        return find_matching_signature(mc, csig, cpf);
 }
 
-/*
- * Given CPU signature and a microcode patch, this function finds if the
- * microcode patch has matching family and model with the CPU.
- *
- * %true - if there's a match
- * %false - otherwise
- */
-static bool microcode_matches(struct microcode_header_intel *mc_header,
-                             unsigned long sig)
-{
-       unsigned long total_size = get_totalsize(mc_header);
-       unsigned long data_size = get_datasize(mc_header);
-       struct extended_sigtable *ext_header;
-       unsigned int fam_ucode, model_ucode;
-       struct extended_signature *ext_sig;
-       unsigned int fam, model;
-       int ext_sigcount, i;
-
-       fam   = x86_family(sig);
-       model = x86_model(sig);
-
-       fam_ucode   = x86_family(mc_header->sig);
-       model_ucode = x86_model(mc_header->sig);
-
-       if (fam == fam_ucode && model == model_ucode)
-               return true;
-
-       /* Look for ext. headers: */
-       if (total_size <= data_size + MC_HEADER_SIZE)
-               return false;
-
-       ext_header   = (void *) mc_header + data_size + MC_HEADER_SIZE;
-       ext_sig      = (void *)ext_header + EXT_HEADER_SIZE;
-       ext_sigcount = ext_header->count;
-
-       for (i = 0; i < ext_sigcount; i++) {
-               fam_ucode   = x86_family(ext_sig->sig);
-               model_ucode = x86_model(ext_sig->sig);
-
-               if (fam == fam_ucode && model == model_ucode)
-                       return true;
-
-               ext_sig++;
-       }
-       return false;
-}
-
 static struct ucode_patch *memdup_patch(void *data, unsigned int size)
 {
        struct ucode_patch *p;
@@ -164,7 +117,7 @@ static struct ucode_patch *memdup_patch(void *data, unsigned int size)
        return p;
 }
 
-static void save_microcode_patch(void *data, unsigned int size)
+static void save_microcode_patch(struct ucode_cpu_info *uci, void *data, unsigned int size)
 {
        struct microcode_header_intel *mc_hdr, *mc_saved_hdr;
        struct ucode_patch *iter, *tmp, *p = NULL;
@@ -210,6 +163,9 @@ static void save_microcode_patch(void *data, unsigned int size)
        if (!p)
                return;
 
+       if (!find_matching_signature(p->data, uci->cpu_sig.sig, uci->cpu_sig.pf))
+               return;
+
        /*
         * Save for early loading. On 32-bit, that needs to be a physical
         * address as the APs are running from physical addresses, before
@@ -344,13 +300,14 @@ scan_microcode(void *data, size_t size, struct ucode_cpu_info *uci, bool save)
 
                size -= mc_size;
 
-               if (!microcode_matches(mc_header, uci->cpu_sig.sig)) {
+               if (!find_matching_signature(data, uci->cpu_sig.sig,
+                                            uci->cpu_sig.pf)) {
                        data += mc_size;
                        continue;
                }
 
                if (save) {
-                       save_microcode_patch(data, mc_size);
+                       save_microcode_patch(uci, data, mc_size);
                        goto next;
                }
 
@@ -483,14 +440,14 @@ static void show_saved_mc(void)
  * Save this microcode patch. It will be loaded early when a CPU is
  * hot-added or resumes.
  */
-static void save_mc_for_early(u8 *mc, unsigned int size)
+static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int size)
 {
        /* Synchronization during CPU hotplug. */
        static DEFINE_MUTEX(x86_cpu_microcode_mutex);
 
        mutex_lock(&x86_cpu_microcode_mutex);
 
-       save_microcode_patch(mc, size);
+       save_microcode_patch(uci, mc, size);
        show_saved_mc();
 
        mutex_unlock(&x86_cpu_microcode_mutex);
@@ -935,7 +892,7 @@ static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter)
         * permanent memory. So it will be loaded early when a CPU is hot added
         * or resumes.
         */
-       save_mc_for_early(new_mc, new_mc_size);
+       save_mc_for_early(uci, new_mc, new_mc_size);
 
        pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
                 cpu, new_rev, uci->cpu_sig.rev);
index e5f4ee8..e8b5f1c 100644 (file)
@@ -570,6 +570,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
 
        if (d) {
                cpumask_set_cpu(cpu, &d->cpu_mask);
+               if (r->cache.arch_has_per_cpu_cfg)
+                       rdt_domain_reconfigure_cdp(r);
                return;
        }
 
@@ -923,6 +925,7 @@ static __init void rdt_init_res_defs_intel(void)
                    r->rid == RDT_RESOURCE_L2CODE) {
                        r->cache.arch_has_sparse_bitmaps = false;
                        r->cache.arch_has_empty_bitmaps = false;
+                       r->cache.arch_has_per_cpu_cfg = false;
                } else if (r->rid == RDT_RESOURCE_MBA) {
                        r->msr_base = MSR_IA32_MBA_THRTL_BASE;
                        r->msr_update = mba_wrmsr_intel;
@@ -943,6 +946,7 @@ static __init void rdt_init_res_defs_amd(void)
                    r->rid == RDT_RESOURCE_L2CODE) {
                        r->cache.arch_has_sparse_bitmaps = true;
                        r->cache.arch_has_empty_bitmaps = true;
+                       r->cache.arch_has_per_cpu_cfg = true;
                } else if (r->rid == RDT_RESOURCE_MBA) {
                        r->msr_base = MSR_IA32_MBA_BW_BASE;
                        r->msr_update = mba_wrmsr_amd;
index 80fa997..f65d3c0 100644 (file)
@@ -360,6 +360,8 @@ struct msr_param {
  *                     executing entities
  * @arch_has_sparse_bitmaps:   True if a bitmap like f00f is valid.
  * @arch_has_empty_bitmaps:    True if the '0' bitmap is valid.
+ * @arch_has_per_cpu_cfg:      True if QOS_CFG register for this cache
+ *                             level has CPU scope.
  */
 struct rdt_cache {
        unsigned int    cbm_len;
@@ -369,6 +371,7 @@ struct rdt_cache {
        unsigned int    shareable_bits;
        bool            arch_has_sparse_bitmaps;
        bool            arch_has_empty_bitmaps;
+       bool            arch_has_per_cpu_cfg;
 };
 
 /**
index 54dffe5..a98519a 100644 (file)
@@ -279,7 +279,6 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
                return;
 
        chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
-       m->chunks += chunks;
        cur_bw = (chunks * r->mon_scale) >> 20;
 
        if (m->delta_comp)
@@ -450,15 +449,14 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
        }
        if (is_mbm_local_enabled()) {
                rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
+               __mon_event_count(rmid, &rr);
 
                /*
                 * Call the MBA software controller only for the
                 * control groups and when user has enabled
                 * the software controller explicitly.
                 */
-               if (!is_mba_sc(NULL))
-                       __mon_event_count(rmid, &rr);
-               else
+               if (is_mba_sc(NULL))
                        mbm_bw_count(rmid, &rr);
        }
 }
index af323e2..f341842 100644 (file)
@@ -507,6 +507,24 @@ unlock:
        return ret ?: nbytes;
 }
 
+/**
+ * rdtgroup_remove - the helper to remove resource group safely
+ * @rdtgrp: resource group to remove
+ *
+ * On resource group creation via a mkdir, an extra kernfs_node reference is
+ * taken to ensure that the rdtgroup structure remains accessible for the
+ * rdtgroup_kn_unlock() calls where it is removed.
+ *
+ * Drop the extra reference here, then free the rdtgroup structure.
+ *
+ * Return: void
+ */
+static void rdtgroup_remove(struct rdtgroup *rdtgrp)
+{
+       kernfs_put(rdtgrp->kn);
+       kfree(rdtgrp);
+}
+
 struct task_move_callback {
        struct callback_head    work;
        struct rdtgroup         *rdtgrp;
@@ -529,7 +547,7 @@ static void move_myself(struct callback_head *head)
            (rdtgrp->flags & RDT_DELETED)) {
                current->closid = 0;
                current->rmid = 0;
-               kfree(rdtgrp);
+               rdtgroup_remove(rdtgrp);
        }
 
        if (unlikely(current->flags & PF_EXITING))
@@ -1769,7 +1787,6 @@ static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
        if (IS_ERR(kn_subdir))
                return PTR_ERR(kn_subdir);
 
-       kernfs_get(kn_subdir);
        ret = rdtgroup_kn_set_ugid(kn_subdir);
        if (ret)
                return ret;
@@ -1792,7 +1809,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
        kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
        if (IS_ERR(kn_info))
                return PTR_ERR(kn_info);
-       kernfs_get(kn_info);
 
        ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
        if (ret)
@@ -1813,12 +1829,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
                        goto out_destroy;
        }
 
-       /*
-        * This extra ref will be put in kernfs_remove() and guarantees
-        * that @rdtgrp->kn is always accessible.
-        */
-       kernfs_get(kn_info);
-
        ret = rdtgroup_kn_set_ugid(kn_info);
        if (ret)
                goto out_destroy;
@@ -1847,12 +1857,6 @@ mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
        if (dest_kn)
                *dest_kn = kn;
 
-       /*
-        * This extra ref will be put in kernfs_remove() and guarantees
-        * that @rdtgrp->kn is always accessible.
-        */
-       kernfs_get(kn);
-
        ret = rdtgroup_kn_set_ugid(kn);
        if (ret)
                goto out_destroy;
@@ -1905,8 +1909,13 @@ static int set_cache_qos_cfg(int level, bool enable)
 
        r_l = &rdt_resources_all[level];
        list_for_each_entry(d, &r_l->domains, list) {
-               /* Pick one CPU from each domain instance to update MSR */
-               cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
+               if (r_l->cache.arch_has_per_cpu_cfg)
+                       /* Pick all the CPUs in the domain instance */
+                       for_each_cpu(cpu, &d->cpu_mask)
+                               cpumask_set_cpu(cpu, cpu_mask);
+               else
+                       /* Pick one CPU from each domain instance to update MSR */
+                       cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
        }
        cpu = get_cpu();
        /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
@@ -2079,8 +2088,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
                    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
                        rdtgroup_pseudo_lock_remove(rdtgrp);
                kernfs_unbreak_active_protection(kn);
-               kernfs_put(rdtgrp->kn);
-               kfree(rdtgrp);
+               rdtgroup_remove(rdtgrp);
        } else {
                kernfs_unbreak_active_protection(kn);
        }
@@ -2139,13 +2147,11 @@ static int rdt_get_tree(struct fs_context *fc)
                                          &kn_mongrp);
                if (ret < 0)
                        goto out_info;
-               kernfs_get(kn_mongrp);
 
                ret = mkdir_mondata_all(rdtgroup_default.kn,
                                        &rdtgroup_default, &kn_mondata);
                if (ret < 0)
                        goto out_mongrp;
-               kernfs_get(kn_mondata);
                rdtgroup_default.mon.mon_data_kn = kn_mondata;
        }
 
@@ -2357,7 +2363,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
                if (atomic_read(&sentry->waitcount) != 0)
                        sentry->flags = RDT_DELETED;
                else
-                       kfree(sentry);
+                       rdtgroup_remove(sentry);
        }
 }
 
@@ -2399,7 +2405,7 @@ static void rmdir_all_sub(void)
                if (atomic_read(&rdtgrp->waitcount) != 0)
                        rdtgrp->flags = RDT_DELETED;
                else
-                       kfree(rdtgrp);
+                       rdtgroup_remove(rdtgrp);
        }
        /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
        update_closid_rmid(cpu_online_mask, &rdtgroup_default);
@@ -2499,11 +2505,6 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
        if (IS_ERR(kn))
                return PTR_ERR(kn);
 
-       /*
-        * This extra ref will be put in kernfs_remove() and guarantees
-        * that kn is always accessible.
-        */
-       kernfs_get(kn);
        ret = rdtgroup_kn_set_ugid(kn);
        if (ret)
                goto out_destroy;
@@ -2838,8 +2839,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
        /*
         * kernfs_remove() will drop the reference count on "kn" which
         * will free it. But we still need it to stick around for the
-        * rdtgroup_kn_unlock(kn} call below. Take one extra reference
-        * here, which will be dropped inside rdtgroup_kn_unlock().
+        * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
+        * which will be dropped by kernfs_put() in rdtgroup_remove().
         */
        kernfs_get(kn);
 
@@ -2880,6 +2881,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 out_idfree:
        free_rmid(rdtgrp->mon.rmid);
 out_destroy:
+       kernfs_put(rdtgrp->kn);
        kernfs_remove(rdtgrp->kn);
 out_free_rgrp:
        kfree(rdtgrp);
@@ -2892,7 +2894,7 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
 {
        kernfs_remove(rgrp->kn);
        free_rmid(rgrp->mon.rmid);
-       kfree(rgrp);
+       rdtgroup_remove(rgrp);
 }
 
 /*
@@ -3049,11 +3051,6 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
        WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
        list_del(&rdtgrp->mon.crdtgrp_list);
 
-       /*
-        * one extra hold on this, will drop when we kfree(rdtgrp)
-        * in rdtgroup_kn_unlock()
-        */
-       kernfs_get(kn);
        kernfs_remove(rdtgrp->kn);
 
        return 0;
@@ -3065,11 +3062,6 @@ static int rdtgroup_ctrl_remove(struct kernfs_node *kn,
        rdtgrp->flags = RDT_DELETED;
        list_del(&rdtgrp->rdtgroup_list);
 
-       /*
-        * one extra hold on this, will drop when we kfree(rdtgrp)
-        * in rdtgroup_kn_unlock()
-        */
-       kernfs_get(kn);
        kernfs_remove(rdtgrp->kn);
        return 0;
 }
index 866c9a9..2369249 100644 (file)
@@ -44,6 +44,7 @@ static const struct cpuid_bit cpuid_bits[] = {
        { X86_FEATURE_SEV,              CPUID_EAX,  1, 0x8000001f, 0 },
        { X86_FEATURE_SEV_ES,           CPUID_EAX,  3, 0x8000001f, 0 },
        { X86_FEATURE_SME_COHERENT,     CPUID_EAX, 10, 0x8000001f, 0 },
+       { X86_FEATURE_VM_PAGE_FLUSH,    CPUID_EAX,  2, 0x8000001f, 0 },
        { 0, 0, 0, 0, 0 }
 };
 
index 924571f..c6ede3b 100644 (file)
@@ -501,12 +501,12 @@ static bool vmware_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
              ghcb_rbp_is_valid(ghcb)))
                return false;
 
-       regs->bx = ghcb->save.rbx;
-       regs->cx = ghcb->save.rcx;
-       regs->dx = ghcb->save.rdx;
-       regs->si = ghcb->save.rsi;
-       regs->di = ghcb->save.rdi;
-       regs->bp = ghcb->save.rbp;
+       regs->bx = ghcb_get_rbx(ghcb);
+       regs->cx = ghcb_get_rcx(ghcb);
+       regs->dx = ghcb_get_rdx(ghcb);
+       regs->si = ghcb_get_rsi(ghcb);
+       regs->di = ghcb_get_rdi(ghcb);
+       regs->bp = ghcb_get_rbp(ghcb);
 
        return true;
 }
index 25c06b6..97aa900 100644 (file)
@@ -78,6 +78,9 @@ static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src,
        if (!user_mode(regs))
                return copy_from_kernel_nofault(buf, (u8 *)src, nbytes);
 
+       /* The user space code from other tasks cannot be accessed. */
+       if (regs != task_pt_regs(current))
+               return -EPERM;
        /*
         * Make sure userspace isn't trying to trick us into dumping kernel
         * memory by pointing the userspace instruction pointer at it.
@@ -85,6 +88,12 @@ static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src,
        if (__chk_range_not_ok(src, nbytes, TASK_SIZE_MAX))
                return -EINVAL;
 
+       /*
+        * Even if named copy_from_user_nmi() this can be invoked from
+        * other contexts and will not try to resolve a pagefault, which is
+        * the correct thing to do here as this code can be called from any
+        * context.
+        */
        return copy_from_user_nmi(buf, (void __user *)src, nbytes);
 }
 
@@ -115,13 +124,19 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl)
        u8 opcodes[OPCODE_BUFSIZE];
        unsigned long prologue = regs->ip - PROLOGUE_SIZE;
 
-       if (copy_code(regs, opcodes, prologue, sizeof(opcodes))) {
-               printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n",
-                      loglvl, prologue);
-       } else {
+       switch (copy_code(regs, opcodes, prologue, sizeof(opcodes))) {
+       case 0:
                printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
                       __stringify(EPILOGUE_SIZE) "ph\n", loglvl, opcodes,
                       opcodes[PROLOGUE_SIZE], opcodes + PROLOGUE_SIZE + 1);
+               break;
+       case -EPERM:
+               /* No access to the user space stack of other tasks. Ignore. */
+               break;
+       default:
+               printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n",
+                      loglvl, prologue);
+               break;
        }
 }
 
index 041f0b5..08eb230 100644 (file)
@@ -272,6 +272,19 @@ static int insn_is_indirect_jump(struct insn *insn)
        return ret;
 }
 
+static bool is_padding_int3(unsigned long addr, unsigned long eaddr)
+{
+       unsigned char ops;
+
+       for (; addr < eaddr; addr++) {
+               if (get_kernel_nofault(ops, (void *)addr) < 0 ||
+                   ops != INT3_INSN_OPCODE)
+                       return false;
+       }
+
+       return true;
+}
+
 /* Decode whole function to ensure any instructions don't jump into target */
 static int can_optimize(unsigned long paddr)
 {
@@ -310,9 +323,14 @@ static int can_optimize(unsigned long paddr)
                        return 0;
                kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
                insn_get_length(&insn);
-               /* Another subsystem puts a breakpoint */
+               /*
+                * In the case of detecting unknown breakpoint, this could be
+                * a padding INT3 between functions. Let's check that all the
+                * rest of the bytes are also INT3.
+                */
                if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
-                       return 0;
+                       return is_padding_int3(addr, paddr - offset + size) ? 1 : 0;
+
                /* Recover address */
                insn.kaddr = (void *)addr;
                insn.next_byte = (void *)(addr + insn.length);
index 34b18f6..aa59374 100644 (file)
@@ -44,7 +44,6 @@ static int __init parse_no_kvmclock_vsyscall(char *arg)
 early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
 
 /* Aligned to page sizes to match whats mapped via vsyscalls to userspace */
-#define HV_CLOCK_SIZE  (sizeof(struct pvclock_vsyscall_time_info) * NR_CPUS)
 #define HVC_BOOT_ARRAY_SIZE \
        (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
 
index ba4593a..145a7ac 100644 (file)
@@ -685,7 +685,7 @@ void arch_cpu_idle(void)
  */
 void __cpuidle default_idle(void)
 {
-       safe_halt();
+       raw_safe_halt();
 }
 #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
 EXPORT_SYMBOL(default_idle);
@@ -736,6 +736,8 @@ void stop_this_cpu(void *dummy)
 /*
  * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
  * states (local apic timer and TSC stop).
+ *
+ * XXX this function is completely buggered vs RCU and tracing.
  */
 static void amd_e400_idle(void)
 {
@@ -757,9 +759,9 @@ static void amd_e400_idle(void)
         * The switch back from broadcast mode needs to be called with
         * interrupts disabled.
         */
-       local_irq_disable();
+       raw_local_irq_disable();
        tick_broadcast_exit();
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 /*
@@ -801,9 +803,9 @@ static __cpuidle void mwait_idle(void)
                if (!need_resched())
                        __sti_mwait(0, 0);
                else
-                       local_irq_enable();
+                       raw_local_irq_enable();
        } else {
-               local_irq_enable();
+               raw_local_irq_enable();
        }
        __current_clr_polling();
 }
index 992fb14..ae64f98 100644 (file)
@@ -514,16 +514,10 @@ int tboot_force_iommu(void)
        if (!tboot_enabled())
                return 0;
 
-       if (intel_iommu_tboot_noforce)
-               return 1;
-
-       if (no_iommu || swiotlb || dmar_disabled)
+       if (no_iommu || dmar_disabled)
                pr_warn("Forcing Intel-IOMMU to enabled\n");
 
        dmar_disabled = 0;
-#ifdef CONFIG_SWIOTLB
-       swiotlb = 0;
-#endif
        no_iommu = 0;
 
        return 1;
index 3fdaa04..138bdb1 100644 (file)
@@ -255,12 +255,13 @@ static volatile u32 good_2byte_insns[256 / 32] = {
 
 static bool is_prefix_bad(struct insn *insn)
 {
+       insn_byte_t p;
        int i;
 
-       for (i = 0; i < insn->prefixes.nbytes; i++) {
+       for_each_insn_prefix(insn, i, p) {
                insn_attr_t attr;
 
-               attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]);
+               attr = inat_get_opcode_attribute(p);
                switch (attr) {
                case INAT_MAKE_PREFIX(INAT_PFX_ES):
                case INAT_MAKE_PREFIX(INAT_PFX_CS):
@@ -715,6 +716,7 @@ static const struct uprobe_xol_ops push_xol_ops = {
 static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 {
        u8 opc1 = OPCODE1(insn);
+       insn_byte_t p;
        int i;
 
        switch (opc1) {
@@ -746,8 +748,8 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
         * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix.
         * No one uses these insns, reject any branch insns with such prefix.
         */
-       for (i = 0; i < insn->prefixes.nbytes; i++) {
-               if (insn->prefixes.bytes[i] == 0x66)
+       for_each_insn_prefix(insn, i, p) {
+               if (p == 0x66)
                        return -ENOTSUPP;
        }
 
index f92dfd8..7ac5926 100644 (file)
@@ -100,7 +100,8 @@ config KVM_AMD_SEV
        depends on KVM_AMD && X86_64
        depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
        help
-       Provides support for launching Encrypted VMs on AMD processors.
+         Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
+         with Encrypted State (SEV-ES) on AMD processors.
 
 config KVM_MMU_AUDIT
        bool "Audit KVM MMU"
index b804444..4bd14ab 100644 (file)
@@ -10,7 +10,8 @@ endif
 KVM := ../../../virt/kvm
 
 kvm-y                  += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
-                               $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
+                               $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o \
+                               $(KVM)/dirty_ring.o
 kvm-$(CONFIG_KVM_ASYNC_PF)     += $(KVM)/async_pf.o
 
 kvm-y                  += x86.o emulate.o i8259.o irq.o lapic.o \
index 83637a2..13036cf 100644 (file)
@@ -146,6 +146,7 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
                                           MSR_IA32_MISC_ENABLE_MWAIT);
        }
 }
+EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
 
 static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
@@ -418,7 +419,7 @@ void kvm_set_cpu_caps(void)
                F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
                F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
                F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
-               F(SERIALIZE) | F(TSXLDTRK)
+               F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16)
        );
 
        /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
index f7a6e8f..dc921d7 100644 (file)
@@ -264,6 +264,20 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
        return x86_stepping(best->eax);
 }
 
+static inline bool guest_has_spec_ctrl_msr(struct kvm_vcpu *vcpu)
+{
+       return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
+               guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) ||
+               guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) ||
+               guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD));
+}
+
+static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu)
+{
+       return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
+               guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB));
+}
+
 static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu)
 {
        return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT;
index 5c7c406..922c69d 100644 (file)
@@ -1951,8 +1951,8 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
        return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
 }
 
-int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
-                               struct kvm_cpuid_entry2 __user *entries)
+int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
+                    struct kvm_cpuid_entry2 __user *entries)
 {
        uint16_t evmcs_ver = 0;
        struct kvm_cpuid_entry2 cpuid_entries[] = {
@@ -2037,7 +2037,7 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
                         * Direct Synthetic timers only make sense with in-kernel
                         * LAPIC
                         */
-                       if (lapic_in_kernel(vcpu))
+                       if (!vcpu || lapic_in_kernel(vcpu))
                                ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
 
                        break;
index e68c6c2..6d7def2 100644 (file)
@@ -126,7 +126,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
 void kvm_hv_init_vm(struct kvm *kvm);
 void kvm_hv_destroy_vm(struct kvm *kvm);
 int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
-int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
-                               struct kvm_cpuid_entry2 __user *entries);
+int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
+                    struct kvm_cpuid_entry2 __user *entries);
 
 #endif
index 99d118f..814698e 100644 (file)
@@ -40,29 +40,10 @@ static int pending_userspace_extint(struct kvm_vcpu *v)
  * check if there is pending interrupt from
  * non-APIC source without intack.
  */
-static int kvm_cpu_has_extint(struct kvm_vcpu *v)
-{
-       u8 accept = kvm_apic_accept_pic_intr(v);
-
-       if (accept) {
-               if (irqchip_split(v->kvm))
-                       return pending_userspace_extint(v);
-               else
-                       return v->kvm->arch.vpic->output;
-       } else
-               return 0;
-}
-
-/*
- * check if there is injectable interrupt:
- * when virtual interrupt delivery enabled,
- * interrupt from apic will handled by hardware,
- * we don't need to check it here.
- */
-int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
+int kvm_cpu_has_extint(struct kvm_vcpu *v)
 {
        /*
-        * FIXME: interrupt.injected represents an interrupt that it's
+        * FIXME: interrupt.injected represents an interrupt whose
         * side-effects have already been applied (e.g. bit from IRR
         * already moved to ISR). Therefore, it is incorrect to rely
         * on interrupt.injected to know if there is a pending
@@ -75,6 +56,23 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
        if (!lapic_in_kernel(v))
                return v->arch.interrupt.injected;
 
+       if (!kvm_apic_accept_pic_intr(v))
+               return 0;
+
+       if (irqchip_split(v->kvm))
+               return pending_userspace_extint(v);
+       else
+               return v->kvm->arch.vpic->output;
+}
+
+/*
+ * check if there is injectable interrupt:
+ * when virtual interrupt delivery enabled,
+ * interrupt from apic will handled by hardware,
+ * we don't need to check it here.
+ */
+int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
+{
        if (kvm_cpu_has_extint(v))
                return 1;
 
@@ -91,20 +89,6 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_injectable_intr);
  */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 {
-       /*
-        * FIXME: interrupt.injected represents an interrupt that it's
-        * side-effects have already been applied (e.g. bit from IRR
-        * already moved to ISR). Therefore, it is incorrect to rely
-        * on interrupt.injected to know if there is a pending
-        * interrupt in the user-mode LAPIC.
-        * This leads to nVMX/nSVM not be able to distinguish
-        * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on
-        * pending interrupt or should re-inject an injected
-        * interrupt.
-        */
-       if (!lapic_in_kernel(v))
-               return v->arch.interrupt.injected;
-
        if (kvm_cpu_has_extint(v))
                return 1;
 
@@ -118,16 +102,21 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
  */
 static int kvm_cpu_get_extint(struct kvm_vcpu *v)
 {
-       if (kvm_cpu_has_extint(v)) {
-               if (irqchip_split(v->kvm)) {
-                       int vector = v->arch.pending_external_vector;
-
-                       v->arch.pending_external_vector = -1;
-                       return vector;
-               } else
-                       return kvm_pic_read_irq(v->kvm); /* PIC */
-       } else
+       if (!kvm_cpu_has_extint(v)) {
+               WARN_ON(!lapic_in_kernel(v));
                return -1;
+       }
+
+       if (!lapic_in_kernel(v))
+               return v->arch.interrupt.nr;
+
+       if (irqchip_split(v->kvm)) {
+               int vector = v->arch.pending_external_vector;
+
+               v->arch.pending_external_vector = -1;
+               return vector;
+       } else
+               return kvm_pic_read_irq(v->kvm); /* PIC */
 }
 
 /*
@@ -135,13 +124,7 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v)
  */
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 {
-       int vector;
-
-       if (!lapic_in_kernel(v))
-               return v->arch.interrupt.nr;
-
-       vector = kvm_cpu_get_extint(v);
-
+       int vector = kvm_cpu_get_extint(v);
        if (vector != -1)
                return vector;                  /* PIC */
 
index a889563..f15bc16 100644 (file)
@@ -9,6 +9,31 @@
        (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR  \
         | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE)
 
+static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
+                                            enum kvm_reg reg)
+{
+       return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
+                                        enum kvm_reg reg)
+{
+       return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
+static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
+                                              enum kvm_reg reg)
+{
+       __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
+                                          enum kvm_reg reg)
+{
+       __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+       __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
 #define BUILD_KVM_GPR_ACCESSORS(lname, uname)                                \
 static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
 {                                                                            \
@@ -18,6 +43,7 @@ static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu,              \
                                                unsigned long val)            \
 {                                                                            \
        vcpu->arch.regs[VCPU_REGS_##uname] = val;                             \
+       kvm_register_mark_dirty(vcpu, VCPU_REGS_##uname);                     \
 }
 BUILD_KVM_GPR_ACCESSORS(rax, RAX)
 BUILD_KVM_GPR_ACCESSORS(rbx, RBX)
@@ -37,31 +63,6 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14)
 BUILD_KVM_GPR_ACCESSORS(r15, R15)
 #endif
 
-static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
-                                            enum kvm_reg reg)
-{
-       return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-}
-
-static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
-                                        enum kvm_reg reg)
-{
-       return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
-}
-
-static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
-                                              enum kvm_reg reg)
-{
-       __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-}
-
-static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
-                                          enum kvm_reg reg)
-{
-       __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-       __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
-}
-
 static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg)
 {
        if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
index 105e785..43ccead 100644 (file)
@@ -674,7 +674,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
                return false;
        }
-       return val & 0x1;
+       return val & KVM_PV_EOI_ENABLED;
 }
 
 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
@@ -2465,7 +2465,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
        struct kvm_lapic *apic = vcpu->arch.apic;
        u32 ppr;
 
-       if (!kvm_apic_hw_enabled(apic))
+       if (!kvm_apic_present(vcpu))
                return -1;
 
        __apic_update_ppr(apic, &ppr);
@@ -2843,14 +2843,35 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
        u8 sipi_vector;
+       int r;
        unsigned long pe;
 
-       if (!lapic_in_kernel(vcpu) || !apic->pending_events)
+       if (!lapic_in_kernel(vcpu))
                return;
 
        /*
+        * Read pending events before calling the check_events
+        * callback.
+        */
+       pe = smp_load_acquire(&apic->pending_events);
+       if (!pe)
+               return;
+
+       if (is_guest_mode(vcpu)) {
+               r = kvm_x86_ops.nested_ops->check_events(vcpu);
+               if (r < 0)
+                       return;
+               /*
+                * If an event has happened and caused a vmexit,
+                * we know INITs are latched and therefore
+                * we will not incorrectly deliver an APIC
+                * event instead of a vmexit.
+                */
+       }
+
+       /*
         * INITs are latched while CPU is in specific states
-        * (SMM, VMX non-root mode, SVM with GIF=0).
+        * (SMM, VMX root mode, SVM with GIF=0).
         * Because a CPU cannot be in these states immediately
         * after it has processed an INIT signal (and thus in
         * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
@@ -2858,26 +2879,28 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
         */
        if (kvm_vcpu_latch_init(vcpu)) {
                WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
-               if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
+               if (test_bit(KVM_APIC_SIPI, &pe))
                        clear_bit(KVM_APIC_SIPI, &apic->pending_events);
                return;
        }
 
-       pe = xchg(&apic->pending_events, 0);
        if (test_bit(KVM_APIC_INIT, &pe)) {
+               clear_bit(KVM_APIC_INIT, &apic->pending_events);
                kvm_vcpu_reset(vcpu, true);
                if (kvm_vcpu_is_bsp(apic->vcpu))
                        vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
                else
                        vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
        }
-       if (test_bit(KVM_APIC_SIPI, &pe) &&
-           vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
-               /* evaluate pending_events before reading the vector */
-               smp_rmb();
-               sipi_vector = apic->sipi_vector;
-               kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
-               vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+       if (test_bit(KVM_APIC_SIPI, &pe)) {
+               clear_bit(KVM_APIC_SIPI, &apic->pending_events);
+               if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
+                       /* evaluate pending_events before reading the vector */
+                       smp_rmb();
+                       sipi_vector = apic->sipi_vector;
+                       kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, sipi_vector);
+                       vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+               }
        }
 }
 
index 9c4a9c8..581925e 100644 (file)
@@ -49,7 +49,7 @@ static inline u64 rsvd_bits(int s, int e)
        if (e < s)
                return 0;
 
-       return ((1ULL << (e - s + 1)) - 1) << s;
+       return ((2ULL << (e - s)) - 1) << s;
 }
 
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask);
index 5bb1939..6d16481 100644 (file)
@@ -820,7 +820,7 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
        slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
        if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
                return NULL;
-       if (no_dirty_log && slot->dirty_bitmap)
+       if (no_dirty_log && kvm_slot_dirty_track_enabled(slot))
                return NULL;
 
        return slot;
@@ -1289,6 +1289,14 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
                kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
 }
 
+int kvm_cpu_dirty_log_size(void)
+{
+       if (kvm_x86_ops.cpu_dirty_log_size)
+               return kvm_x86_ops.cpu_dirty_log_size();
+
+       return 0;
+}
+
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
                                    struct kvm_memory_slot *slot, u64 gfn)
 {
@@ -3485,26 +3493,25 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
  * Return the level of the lowest level SPTE added to sptes.
  * That SPTE may be non-present.
  */
-static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
+static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level)
 {
        struct kvm_shadow_walk_iterator iterator;
-       int leaf = vcpu->arch.mmu->root_level;
+       int leaf = -1;
        u64 spte;
 
-
        walk_shadow_page_lockless_begin(vcpu);
 
-       for (shadow_walk_init(&iterator, vcpu, addr);
+       for (shadow_walk_init(&iterator, vcpu, addr),
+            *root_level = iterator.level;
             shadow_walk_okay(&iterator);
             __shadow_walk_next(&iterator, spte)) {
                leaf = iterator.level;
                spte = mmu_spte_get_lockless(iterator.sptep);
 
-               sptes[leaf - 1] = spte;
+               sptes[leaf] = spte;
 
                if (!is_shadow_present_pte(spte))
                        break;
-
        }
 
        walk_shadow_page_lockless_end(vcpu);
@@ -3512,14 +3519,12 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
        return leaf;
 }
 
-/* return true if reserved bit is detected on spte. */
+/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */
 static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
-       u64 sptes[PT64_ROOT_MAX_LEVEL];
+       u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
        struct rsvd_bits_validate *rsvd_check;
-       int root = vcpu->arch.mmu->root_level;
-       int leaf;
-       int level;
+       int root, leaf, level;
        bool reserved = false;
 
        if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) {
@@ -3528,35 +3533,45 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
        }
 
        if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
-               leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes);
+               leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
        else
-               leaf = get_walk(vcpu, addr, sptes);
+               leaf = get_walk(vcpu, addr, sptes, &root);
+
+       if (unlikely(leaf < 0)) {
+               *sptep = 0ull;
+               return reserved;
+       }
+
+       *sptep = sptes[leaf];
+
+       /*
+        * Skip reserved bits checks on the terminal leaf if it's not a valid
+        * SPTE.  Note, this also (intentionally) skips MMIO SPTEs, which, by
+        * design, always have reserved bits set.  The purpose of the checks is
+        * to detect reserved bits on non-MMIO SPTEs. i.e. buggy SPTEs.
+        */
+       if (!is_shadow_present_pte(sptes[leaf]))
+               leaf++;
 
        rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
 
-       for (level = root; level >= leaf; level--) {
-               if (!is_shadow_present_pte(sptes[level - 1]))
-                       break;
+       for (level = root; level >= leaf; level--)
                /*
                 * Use a bitwise-OR instead of a logical-OR to aggregate the
                 * reserved bit and EPT's invalid memtype/XWR checks to avoid
                 * adding a Jcc in the loop.
                 */
-               reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level - 1]) |
-                           __is_rsvd_bits_set(rsvd_check, sptes[level - 1],
-                                              level);
-       }
+               reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) |
+                           __is_rsvd_bits_set(rsvd_check, sptes[level], level);
 
        if (reserved) {
                pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
                       __func__, addr);
                for (level = root; level >= leaf; level--)
                        pr_err("------ spte 0x%llx level %d.\n",
-                              sptes[level - 1], level);
+                              sptes[level], level);
        }
 
-       *sptep = sptes[leaf - 1];
-
        return reserved;
 }
 
index 213699b..e798489 100644 (file)
@@ -381,6 +381,35 @@ TRACE_EVENT(
        )
 );
 
+TRACE_EVENT(
+       kvm_tdp_mmu_spte_changed,
+       TP_PROTO(int as_id, gfn_t gfn, int level, u64 old_spte, u64 new_spte),
+       TP_ARGS(as_id, gfn, level, old_spte, new_spte),
+
+       TP_STRUCT__entry(
+               __field(u64, gfn)
+               __field(u64, old_spte)
+               __field(u64, new_spte)
+               /* Level cannot be larger than 5 on x86, so it fits in a u8. */
+               __field(u8, level)
+               /* as_id can only be 0 or 1 x86, so it fits in a u8. */
+               __field(u8, as_id)
+       ),
+
+       TP_fast_assign(
+               __entry->gfn = gfn;
+               __entry->old_spte = old_spte;
+               __entry->new_spte = new_spte;
+               __entry->level = level;
+               __entry->as_id = as_id;
+       ),
+
+       TP_printk("as id %d gfn %llx level %d old_spte %llx new_spte %llx",
+                 __entry->as_id, __entry->gfn, __entry->level,
+                 __entry->old_spte, __entry->new_spte
+       )
+);
+
 #endif /* _TRACE_KVMMMU_H */
 
 #undef TRACE_INCLUDE_PATH
index fcac2ca..c51ad54 100644 (file)
@@ -40,8 +40,8 @@ static u64 generation_mmio_spte_mask(u64 gen)
        WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
        BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK);
 
-       mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
-       mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
+       mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK;
+       mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK;
        return mask;
 }
 
index 5c75a45..2b3a30b 100644 (file)
 #define SPTE_MMU_WRITEABLE     (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
 
 /*
- * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
+ * Due to limited space in PTEs, the MMIO generation is a 18 bit subset of
  * the memslots generation and is derived as follows:
  *
  * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11
- * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61
+ * Bits 9-17 of the MMIO generation are propagated to spte bits 54-62
  *
  * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
  * the MMIO generation number, as doing so would require stealing a bit from
  * requires a full MMU zap).  The flag is instead explicitly queried when
  * checking for MMIO spte cache hits.
  */
-#define MMIO_SPTE_GEN_MASK             GENMASK_ULL(17, 0)
 
 #define MMIO_SPTE_GEN_LOW_START                3
 #define MMIO_SPTE_GEN_LOW_END          11
-#define MMIO_SPTE_GEN_LOW_MASK         GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
-                                                   MMIO_SPTE_GEN_LOW_START)
 
 #define MMIO_SPTE_GEN_HIGH_START       PT64_SECOND_AVAIL_BITS_SHIFT
 #define MMIO_SPTE_GEN_HIGH_END         62
+
+#define MMIO_SPTE_GEN_LOW_MASK         GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
+                                                   MMIO_SPTE_GEN_LOW_START)
 #define MMIO_SPTE_GEN_HIGH_MASK                GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \
                                                    MMIO_SPTE_GEN_HIGH_START)
 
+#define MMIO_SPTE_GEN_LOW_BITS         (MMIO_SPTE_GEN_LOW_END - MMIO_SPTE_GEN_LOW_START + 1)
+#define MMIO_SPTE_GEN_HIGH_BITS                (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1)
+
+/* remember to adjust the comment above as well if you change these */
+static_assert(MMIO_SPTE_GEN_LOW_BITS == 9 && MMIO_SPTE_GEN_HIGH_BITS == 9);
+
+#define MMIO_SPTE_GEN_LOW_SHIFT                (MMIO_SPTE_GEN_LOW_START - 0)
+#define MMIO_SPTE_GEN_HIGH_SHIFT       (MMIO_SPTE_GEN_HIGH_START - MMIO_SPTE_GEN_LOW_BITS)
+
+#define MMIO_SPTE_GEN_MASK             GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0)
+
 extern u64 __read_mostly shadow_nx_mask;
 extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
 extern u64 __read_mostly shadow_user_mask;
@@ -228,8 +239,8 @@ static inline u64 get_mmio_spte_generation(u64 spte)
 {
        u64 gen;
 
-       gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
-       gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
+       gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_SHIFT;
+       gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_SHIFT;
        return gen;
 }
 
index ff28a5c..2ef8615 100644 (file)
@@ -7,6 +7,8 @@
 #include "tdp_mmu.h"
 #include "spte.h"
 
+#include <trace/events/kvm.h>
+
 #ifdef CONFIG_X86_64
 static bool __read_mostly tdp_mmu_enabled = false;
 module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644);
@@ -42,7 +44,48 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
        WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
 }
 
-#define for_each_tdp_mmu_root(_kvm, _root)                         \
+static void tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root)
+{
+       if (kvm_mmu_put_root(kvm, root))
+               kvm_tdp_mmu_free_root(kvm, root);
+}
+
+static inline bool tdp_mmu_next_root_valid(struct kvm *kvm,
+                                          struct kvm_mmu_page *root)
+{
+       lockdep_assert_held(&kvm->mmu_lock);
+
+       if (list_entry_is_head(root, &kvm->arch.tdp_mmu_roots, link))
+               return false;
+
+       kvm_mmu_get_root(kvm, root);
+       return true;
+
+}
+
+static inline struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
+                                                    struct kvm_mmu_page *root)
+{
+       struct kvm_mmu_page *next_root;
+
+       next_root = list_next_entry(root, link);
+       tdp_mmu_put_root(kvm, root);
+       return next_root;
+}
+
+/*
+ * Note: this iterator gets and puts references to the roots it iterates over.
+ * This makes it safe to release the MMU lock and yield within the loop, but
+ * if exiting the loop early, the caller must drop the reference to the most
+ * recent root. (Unless keeping a live reference is desirable.)
+ */
+#define for_each_tdp_mmu_root_yield_safe(_kvm, _root)                          \
+       for (_root = list_first_entry(&_kvm->arch.tdp_mmu_roots,        \
+                                     typeof(*_root), link);            \
+            tdp_mmu_next_root_valid(_kvm, _root);                      \
+            _root = tdp_mmu_next_root(_kvm, _root))
+
+#define for_each_tdp_mmu_root(_kvm, _root)                             \
        list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link)
 
 bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa)
@@ -66,7 +109,7 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 
 void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root)
 {
-       gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT);
+       gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
 
        lockdep_assert_held(&kvm->mmu_lock);
 
@@ -108,6 +151,8 @@ static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn,
        sp->gfn = gfn;
        sp->tdp_mmu_page = true;
 
+       trace_kvm_mmu_get_page(sp, true);
+
        return sp;
 }
 
@@ -185,7 +230,7 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
        if ((!is_writable_pte(old_spte) || pfn_changed) &&
            is_writable_pte(new_spte)) {
                slot = __gfn_to_memslot(__kvm_memslots(kvm, as_id), gfn);
-               mark_page_dirty_in_slot(slot, gfn);
+               mark_page_dirty_in_slot(kvm, slot, gfn);
        }
 }
 
@@ -244,6 +289,8 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
        if (old_spte == new_spte)
                return;
 
+       trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
+
        /*
         * The only times a SPTE should be changed from a non-present to
         * non-present state is when an MMIO entry is installed/modified/
@@ -278,6 +325,8 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
                pt = spte_to_child_pt(old_spte, level);
                sp = sptep_to_sp(pt);
 
+               trace_kvm_mmu_prepare_zap_page(sp);
+
                list_del(&sp->link);
 
                if (sp->lpage_disallowed)
@@ -439,24 +488,15 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
        struct kvm_mmu_page *root;
        bool flush = false;
 
-       for_each_tdp_mmu_root(kvm, root) {
-               /*
-                * Take a reference on the root so that it cannot be freed if
-                * this thread releases the MMU lock and yields in this loop.
-                */
-               kvm_mmu_get_root(kvm, root);
-
+       for_each_tdp_mmu_root_yield_safe(kvm, root)
                flush |= zap_gfn_range(kvm, root, start, end, true);
 
-               kvm_mmu_put_root(kvm, root);
-       }
-
        return flush;
 }
 
 void kvm_tdp_mmu_zap_all(struct kvm *kvm)
 {
-       gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT);
+       gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
        bool flush;
 
        flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn);
@@ -480,11 +520,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
        if (unlikely(is_noslot_pfn(pfn))) {
                new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
                trace_mark_mmio_spte(iter->sptep, iter->gfn, new_spte);
-       } else
+       } else {
                make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn,
                                         pfn, iter->old_spte, prefault, true,
                                         map_writable, !shadow_accessed_mask,
                                         &new_spte);
+               trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep);
+       }
 
        if (new_spte == iter->old_spte)
                ret = RET_PF_SPURIOUS;
@@ -609,13 +651,7 @@ static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start,
        int ret = 0;
        int as_id;
 
-       for_each_tdp_mmu_root(kvm, root) {
-               /*
-                * Take a reference on the root so that it cannot be freed if
-                * this thread releases the MMU lock and yields in this loop.
-                */
-               kvm_mmu_get_root(kvm, root);
-
+       for_each_tdp_mmu_root_yield_safe(kvm, root) {
                as_id = kvm_mmu_page_as_id(root);
                slots = __kvm_memslots(kvm, as_id);
                kvm_for_each_memslot(memslot, slots) {
@@ -637,8 +673,6 @@ static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start,
                        ret |= handler(kvm, memslot, root, gfn_start,
                                       gfn_end, data);
                }
-
-               kvm_mmu_put_root(kvm, root);
        }
 
        return ret;
@@ -698,6 +732,8 @@ static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot,
 
                tdp_mmu_set_spte_no_acc_track(kvm, &iter, new_spte);
                young = 1;
+
+               trace_kvm_age_page(iter.gfn, iter.level, slot, young);
        }
 
        return young;
@@ -826,21 +862,13 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
        int root_as_id;
        bool spte_set = false;
 
-       for_each_tdp_mmu_root(kvm, root) {
+       for_each_tdp_mmu_root_yield_safe(kvm, root) {
                root_as_id = kvm_mmu_page_as_id(root);
                if (root_as_id != slot->as_id)
                        continue;
 
-               /*
-                * Take a reference on the root so that it cannot be freed if
-                * this thread releases the MMU lock and yields in this loop.
-                */
-               kvm_mmu_get_root(kvm, root);
-
                spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn,
                             slot->base_gfn + slot->npages, min_level);
-
-               kvm_mmu_put_root(kvm, root);
        }
 
        return spte_set;
@@ -894,21 +922,13 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
        int root_as_id;
        bool spte_set = false;
 
-       for_each_tdp_mmu_root(kvm, root) {
+       for_each_tdp_mmu_root_yield_safe(kvm, root) {
                root_as_id = kvm_mmu_page_as_id(root);
                if (root_as_id != slot->as_id)
                        continue;
 
-               /*
-                * Take a reference on the root so that it cannot be freed if
-                * this thread releases the MMU lock and yields in this loop.
-                */
-               kvm_mmu_get_root(kvm, root);
-
                spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn,
                                slot->base_gfn + slot->npages);
-
-               kvm_mmu_put_root(kvm, root);
        }
 
        return spte_set;
@@ -1017,21 +1037,13 @@ bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot)
        int root_as_id;
        bool spte_set = false;
 
-       for_each_tdp_mmu_root(kvm, root) {
+       for_each_tdp_mmu_root_yield_safe(kvm, root) {
                root_as_id = kvm_mmu_page_as_id(root);
                if (root_as_id != slot->as_id)
                        continue;
 
-               /*
-                * Take a reference on the root so that it cannot be freed if
-                * this thread releases the MMU lock and yields in this loop.
-                */
-               kvm_mmu_get_root(kvm, root);
-
                spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn,
                                slot->base_gfn + slot->npages);
-
-               kvm_mmu_put_root(kvm, root);
        }
        return spte_set;
 }
@@ -1077,21 +1089,13 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
        struct kvm_mmu_page *root;
        int root_as_id;
 
-       for_each_tdp_mmu_root(kvm, root) {
+       for_each_tdp_mmu_root_yield_safe(kvm, root) {
                root_as_id = kvm_mmu_page_as_id(root);
                if (root_as_id != slot->as_id)
                        continue;
 
-               /*
-                * Take a reference on the root so that it cannot be freed if
-                * this thread releases the MMU lock and yields in this loop.
-                */
-               kvm_mmu_get_root(kvm, root);
-
                zap_collapsible_spte_range(kvm, root, slot->base_gfn,
                                           slot->base_gfn + slot->npages);
-
-               kvm_mmu_put_root(kvm, root);
        }
 }
 
@@ -1148,16 +1152,19 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
  * Return the level of the lowest level SPTE added to sptes.
  * That SPTE may be non-present.
  */
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+                        int *root_level)
 {
        struct tdp_iter iter;
        struct kvm_mmu *mmu = vcpu->arch.mmu;
-       int leaf = vcpu->arch.mmu->shadow_root_level;
        gfn_t gfn = addr >> PAGE_SHIFT;
+       int leaf = -1;
+
+       *root_level = vcpu->arch.mmu->shadow_root_level;
 
        tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
                leaf = iter.level;
-               sptes[leaf - 1] = iter.old_spte;
+               sptes[leaf] = iter.old_spte;
        }
 
        return leaf;
index 556e065..cbbdbad 100644 (file)
@@ -44,5 +44,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
 bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
                                   struct kvm_memory_slot *slot, gfn_t gfn);
 
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes);
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+                        int *root_level);
+
 #endif /* __KVM_X86_MMU_TDP_MMU_H */
index 7f0059a..f472fdb 100644 (file)
@@ -84,12 +84,8 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
        } else
                /* MTRR mask */
                mask |= 0x7ff;
-       if (data & mask) {
-               kvm_inject_gp(vcpu, 0);
-               return false;
-       }
 
-       return true;
+       return (data & mask) == 0;
 }
 EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
 
index 8c55099..0ef84d5 100644 (file)
@@ -233,7 +233,8 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
  */
 static int avic_update_access_page(struct kvm *kvm, bool activate)
 {
-       int ret = 0;
+       void __user *ret;
+       int r = 0;
 
        mutex_lock(&kvm->slots_lock);
        /*
@@ -249,13 +250,15 @@ static int avic_update_access_page(struct kvm *kvm, bool activate)
                                      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
                                      APIC_DEFAULT_PHYS_BASE,
                                      activate ? PAGE_SIZE : 0);
-       if (ret)
+       if (IS_ERR(ret)) {
+               r = PTR_ERR(ret);
                goto out;
+       }
 
        kvm->arch.apic_access_page_done = activate;
 out:
        mutex_unlock(&kvm->slots_lock);
-       return ret;
+       return r;
 }
 
 static int avic_init_backing_page(struct kvm_vcpu *vcpu)
index 9e4c226..cb4c6ee 100644 (file)
@@ -199,6 +199,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
 static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
+
        if (!nested_svm_vmrun_msrpm(svm)) {
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror =
@@ -254,7 +255,7 @@ static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12)
                    (vmcb12->save.cr3 & MSR_CR3_LONG_MBZ_MASK))
                        return false;
        }
-       if (kvm_valid_cr4(&svm->vcpu, vmcb12->save.cr4))
+       if (!kvm_is_valid_cr4(&svm->vcpu, vmcb12->save.cr4))
                return false;
 
        return nested_vmcb_check_controls(&vmcb12->control);
@@ -381,7 +382,7 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
        svm->vmcb->save.ds = vmcb12->save.ds;
        svm->vmcb->save.gdtr = vmcb12->save.gdtr;
        svm->vmcb->save.idtr = vmcb12->save.idtr;
-       kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags);
+       kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
        svm_set_efer(&svm->vcpu, vmcb12->save.efer);
        svm_set_cr0(&svm->vcpu, vmcb12->save.cr0);
        svm_set_cr4(&svm->vcpu, vmcb12->save.cr4);
@@ -394,8 +395,8 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
        svm->vmcb->save.rax = vmcb12->save.rax;
        svm->vmcb->save.rsp = vmcb12->save.rsp;
        svm->vmcb->save.rip = vmcb12->save.rip;
-       svm->vmcb->save.dr7 = vmcb12->save.dr7;
-       svm->vcpu.arch.dr6  = vmcb12->save.dr6;
+       svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1;
+       svm->vcpu.arch.dr6  = vmcb12->save.dr6 | DR6_FIXED_1 | DR6_RTM;
        svm->vmcb->save.cpl = vmcb12->save.cpl;
 }
 
@@ -595,6 +596,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
        svm->nested.vmcb12_gpa = 0;
        WARN_ON_ONCE(svm->nested.nested_run_pending);
 
+       kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu);
+
        /* in case we halted in L2 */
        svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
@@ -660,13 +663,14 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
        svm->vmcb->save.gdtr = hsave->save.gdtr;
        svm->vmcb->save.idtr = hsave->save.idtr;
        kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
+       kvm_set_rflags(&svm->vcpu, hsave->save.rflags | X86_EFLAGS_FIXED);
        svm_set_efer(&svm->vcpu, hsave->save.efer);
        svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
        svm_set_cr4(&svm->vcpu, hsave->save.cr4);
        kvm_rax_write(&svm->vcpu, hsave->save.rax);
        kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
        kvm_rip_write(&svm->vcpu, hsave->save.rip);
-       svm->vmcb->save.dr7 = 0;
+       svm->vmcb->save.dr7 = DR7_FIXED_1;
        svm->vmcb->save.cpl = 0;
        svm->vmcb->control.exit_int_info = 0;
 
@@ -753,6 +757,7 @@ void svm_leave_nested(struct vcpu_svm *svm)
                leave_guest_mode(&svm->vcpu);
                copy_vmcb_control_area(&vmcb->control, &hsave->control);
                nested_svm_uninit_mmu_context(&svm->vcpu);
+               vmcb_mark_all_dirty(svm->vmcb);
        }
 
        kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu);
@@ -1193,6 +1198,10 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
         * in the registers, the save area of the nested state instead
         * contains saved L1 state.
         */
+
+       svm->nested.nested_run_pending =
+               !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+
        copy_vmcb_control_area(&hsave->control, &svm->vmcb->control);
        hsave->save = *save;
 
index c0b1410..c8ffdbc 100644 (file)
 #include <linux/psp-sev.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
+#include <linux/processor.h>
+#include <linux/trace_events.h>
+#include <asm/fpu/internal.h>
+
+#include <asm/trapnr.h>
 
 #include "x86.h"
 #include "svm.h"
+#include "cpuid.h"
+#include "trace.h"
+
+#define __ex(x) __kvm_handle_fault_on_reboot(x)
 
+static u8 sev_enc_bit;
 static int sev_flush_asids(void);
 static DECLARE_RWSEM(sev_deactivate_lock);
 static DEFINE_MUTEX(sev_bitmap_lock);
@@ -25,7 +35,6 @@ unsigned int max_sev_asid;
 static unsigned int min_sev_asid;
 static unsigned long *sev_asid_bitmap;
 static unsigned long *sev_reclaim_asid_bitmap;
-#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
 
 struct enc_region {
        struct list_head list;
@@ -57,19 +66,19 @@ static int sev_flush_asids(void)
 }
 
 /* Must be called with the sev_bitmap_lock held */
-static bool __sev_recycle_asids(void)
+static bool __sev_recycle_asids(int min_asid, int max_asid)
 {
        int pos;
 
        /* Check if there are any ASIDs to reclaim before performing a flush */
-       pos = find_next_bit(sev_reclaim_asid_bitmap,
-                           max_sev_asid, min_sev_asid - 1);
-       if (pos >= max_sev_asid)
+       pos = find_next_bit(sev_reclaim_asid_bitmap, max_sev_asid, min_asid);
+       if (pos >= max_asid)
                return false;
 
        if (sev_flush_asids())
                return false;
 
+       /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
        bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
                   max_sev_asid);
        bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
@@ -77,20 +86,23 @@ static bool __sev_recycle_asids(void)
        return true;
 }
 
-static int sev_asid_new(void)
+static int sev_asid_new(struct kvm_sev_info *sev)
 {
+       int pos, min_asid, max_asid;
        bool retry = true;
-       int pos;
 
        mutex_lock(&sev_bitmap_lock);
 
        /*
-        * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
+        * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
+        * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
         */
+       min_asid = sev->es_active ? 0 : min_sev_asid - 1;
+       max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
 again:
-       pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
-       if (pos >= max_sev_asid) {
-               if (retry && __sev_recycle_asids()) {
+       pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid);
+       if (pos >= max_asid) {
+               if (retry && __sev_recycle_asids(min_asid, max_asid)) {
                        retry = false;
                        goto again;
                }
@@ -172,7 +184,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
        if (unlikely(sev->active))
                return ret;
 
-       asid = sev_asid_new();
+       asid = sev_asid_new(sev);
        if (asid < 0)
                return ret;
 
@@ -191,6 +203,16 @@ e_free:
        return ret;
 }
 
+static int sev_es_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+       if (!sev_es)
+               return -ENOTTY;
+
+       to_kvm_svm(kvm)->sev_info.es_active = true;
+
+       return sev_guest_init(kvm, argp);
+}
+
 static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
 {
        struct sev_data_activate *data;
@@ -490,6 +512,96 @@ e_free:
        return ret;
 }
 
+static int sev_es_sync_vmsa(struct vcpu_svm *svm)
+{
+       struct vmcb_save_area *save = &svm->vmcb->save;
+
+       /* Check some debug related fields before encrypting the VMSA */
+       if (svm->vcpu.guest_debug || (save->dr7 & ~DR7_FIXED_1))
+               return -EINVAL;
+
+       /* Sync registgers */
+       save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX];
+       save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX];
+       save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+       save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX];
+       save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP];
+       save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP];
+       save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI];
+       save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI];
+#ifdef CONFIG_X86_64
+       save->r8  = svm->vcpu.arch.regs[VCPU_REGS_R8];
+       save->r9  = svm->vcpu.arch.regs[VCPU_REGS_R9];
+       save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10];
+       save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11];
+       save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12];
+       save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13];
+       save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14];
+       save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15];
+#endif
+       save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP];
+
+       /* Sync some non-GPR registers before encrypting */
+       save->xcr0 = svm->vcpu.arch.xcr0;
+       save->pkru = svm->vcpu.arch.pkru;
+       save->xss  = svm->vcpu.arch.ia32_xss;
+
+       /*
+        * SEV-ES will use a VMSA that is pointed to by the VMCB, not
+        * the traditional VMSA that is part of the VMCB. Copy the
+        * traditional VMSA as it has been built so far (in prep
+        * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
+        */
+       memcpy(svm->vmsa, save, sizeof(*save));
+
+       return 0;
+}
+
+static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+       struct sev_data_launch_update_vmsa *vmsa;
+       int i, ret;
+
+       if (!sev_es_guest(kvm))
+               return -ENOTTY;
+
+       vmsa = kzalloc(sizeof(*vmsa), GFP_KERNEL);
+       if (!vmsa)
+               return -ENOMEM;
+
+       for (i = 0; i < kvm->created_vcpus; i++) {
+               struct vcpu_svm *svm = to_svm(kvm->vcpus[i]);
+
+               /* Perform some pre-encryption checks against the VMSA */
+               ret = sev_es_sync_vmsa(svm);
+               if (ret)
+                       goto e_free;
+
+               /*
+                * The LAUNCH_UPDATE_VMSA command will perform in-place
+                * encryption of the VMSA memory content (i.e it will write
+                * the same memory region with the guest's key), so invalidate
+                * it first.
+                */
+               clflush_cache_range(svm->vmsa, PAGE_SIZE);
+
+               vmsa->handle = sev->handle;
+               vmsa->address = __sme_pa(svm->vmsa);
+               vmsa->len = PAGE_SIZE;
+               ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, vmsa,
+                                   &argp->error);
+               if (ret)
+                       goto e_free;
+
+               svm->vcpu.arch.guest_state_protected = true;
+       }
+
+e_free:
+       kfree(vmsa);
+       return ret;
+}
+
 static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
        void __user *measure = (void __user *)(uintptr_t)argp->data;
@@ -642,8 +754,8 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
         * Its safe to read more than we are asked, caller should ensure that
         * destination has enough space.
         */
-       src_paddr = round_down(src_paddr, 16);
        offset = src_paddr & 15;
+       src_paddr = round_down(src_paddr, 16);
        sz = round_up(sz + offset, 16);
 
        return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
@@ -932,7 +1044,7 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
        struct kvm_sev_cmd sev_cmd;
        int r;
 
-       if (!svm_sev_enabled())
+       if (!svm_sev_enabled() || !sev)
                return -ENOTTY;
 
        if (!argp)
@@ -947,12 +1059,18 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
        case KVM_SEV_INIT:
                r = sev_guest_init(kvm, &sev_cmd);
                break;
+       case KVM_SEV_ES_INIT:
+               r = sev_es_guest_init(kvm, &sev_cmd);
+               break;
        case KVM_SEV_LAUNCH_START:
                r = sev_launch_start(kvm, &sev_cmd);
                break;
        case KVM_SEV_LAUNCH_UPDATE_DATA:
                r = sev_launch_update_data(kvm, &sev_cmd);
                break;
+       case KVM_SEV_LAUNCH_UPDATE_VMSA:
+               r = sev_launch_update_vmsa(kvm, &sev_cmd);
+               break;
        case KVM_SEV_LAUNCH_MEASURE:
                r = sev_launch_measure(kvm, &sev_cmd);
                break;
@@ -1125,49 +1243,61 @@ void sev_vm_destroy(struct kvm *kvm)
        sev_asid_free(sev->asid);
 }
 
-int __init sev_hardware_setup(void)
+void __init sev_hardware_setup(void)
 {
-       struct sev_user_data_status *status;
-       int rc;
+       unsigned int eax, ebx, ecx, edx;
+       bool sev_es_supported = false;
+       bool sev_supported = false;
+
+       /* Does the CPU support SEV? */
+       if (!boot_cpu_has(X86_FEATURE_SEV))
+               goto out;
+
+       /* Retrieve SEV CPUID information */
+       cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
+
+       /* Set encryption bit location for SEV-ES guests */
+       sev_enc_bit = ebx & 0x3f;
 
        /* Maximum number of encrypted guests supported simultaneously */
-       max_sev_asid = cpuid_ecx(0x8000001F);
+       max_sev_asid = ecx;
 
        if (!svm_sev_enabled())
-               return 1;
+               goto out;
 
        /* Minimum ASID value that should be used for SEV guest */
-       min_sev_asid = cpuid_edx(0x8000001F);
+       min_sev_asid = edx;
 
        /* Initialize SEV ASID bitmaps */
        sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
        if (!sev_asid_bitmap)
-               return 1;
+               goto out;
 
        sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
        if (!sev_reclaim_asid_bitmap)
-               return 1;
+               goto out;
 
-       status = kmalloc(sizeof(*status), GFP_KERNEL);
-       if (!status)
-               return 1;
+       pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1);
+       sev_supported = true;
 
-       /*
-        * Check SEV platform status.
-        *
-        * PLATFORM_STATUS can be called in any state, if we failed to query
-        * the PLATFORM status then either PSP firmware does not support SEV
-        * feature or SEV firmware is dead.
-        */
-       rc = sev_platform_status(status, NULL);
-       if (rc)
-               goto err;
+       /* SEV-ES support requested? */
+       if (!sev_es)
+               goto out;
+
+       /* Does the CPU support SEV-ES? */
+       if (!boot_cpu_has(X86_FEATURE_SEV_ES))
+               goto out;
 
-       pr_info("SEV supported\n");
+       /* Has the system been allocated ASIDs for SEV-ES? */
+       if (min_sev_asid == 1)
+               goto out;
 
-err:
-       kfree(status);
-       return rc;
+       pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1);
+       sev_es_supported = true;
+
+out:
+       sev = sev_supported;
+       sev_es = sev_es_supported;
 }
 
 void sev_hardware_teardown(void)
@@ -1181,13 +1311,330 @@ void sev_hardware_teardown(void)
        sev_flush_asids();
 }
 
+/*
+ * Pages used by hardware to hold guest encrypted state must be flushed before
+ * returning them to the system.
+ */
+static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
+                                  unsigned long len)
+{
+       /*
+        * If hardware enforced cache coherency for encrypted mappings of the
+        * same physical page is supported, nothing to do.
+        */
+       if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
+               return;
+
+       /*
+        * If the VM Page Flush MSR is supported, use it to flush the page
+        * (using the page virtual address and the guest ASID).
+        */
+       if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) {
+               struct kvm_sev_info *sev;
+               unsigned long va_start;
+               u64 start, stop;
+
+               /* Align start and stop to page boundaries. */
+               va_start = (unsigned long)va;
+               start = (u64)va_start & PAGE_MASK;
+               stop = PAGE_ALIGN((u64)va_start + len);
+
+               if (start < stop) {
+                       sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+
+                       while (start < stop) {
+                               wrmsrl(MSR_AMD64_VM_PAGE_FLUSH,
+                                      start | sev->asid);
+
+                               start += PAGE_SIZE;
+                       }
+
+                       return;
+               }
+
+               WARN(1, "Address overflow, using WBINVD\n");
+       }
+
+       /*
+        * Hardware should always have one of the above features,
+        * but if not, use WBINVD and issue a warning.
+        */
+       WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
+       wbinvd_on_all_cpus();
+}
+
+void sev_free_vcpu(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_svm *svm;
+
+       if (!sev_es_guest(vcpu->kvm))
+               return;
+
+       svm = to_svm(vcpu);
+
+       if (vcpu->arch.guest_state_protected)
+               sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
+       __free_page(virt_to_page(svm->vmsa));
+
+       if (svm->ghcb_sa_free)
+               kfree(svm->ghcb_sa);
+}
+
+static void dump_ghcb(struct vcpu_svm *svm)
+{
+       struct ghcb *ghcb = svm->ghcb;
+       unsigned int nbits;
+
+       /* Re-use the dump_invalid_vmcb module parameter */
+       if (!dump_invalid_vmcb) {
+               pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
+               return;
+       }
+
+       nbits = sizeof(ghcb->save.valid_bitmap) * 8;
+
+       pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa);
+       pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
+              ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb));
+       pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
+              ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb));
+       pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
+              ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb));
+       pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
+              ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
+       pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
+}
+
+static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
+{
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+       struct ghcb *ghcb = svm->ghcb;
+
+       /*
+        * The GHCB protocol so far allows for the following data
+        * to be returned:
+        *   GPRs RAX, RBX, RCX, RDX
+        *
+        * Copy their values to the GHCB if they are dirty.
+        */
+       if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX))
+               ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
+       if (kvm_register_is_dirty(vcpu, VCPU_REGS_RBX))
+               ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
+       if (kvm_register_is_dirty(vcpu, VCPU_REGS_RCX))
+               ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
+       if (kvm_register_is_dirty(vcpu, VCPU_REGS_RDX))
+               ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
+}
+
+static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
+{
+       struct vmcb_control_area *control = &svm->vmcb->control;
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+       struct ghcb *ghcb = svm->ghcb;
+       u64 exit_code;
+
+       /*
+        * The GHCB protocol so far allows for the following data
+        * to be supplied:
+        *   GPRs RAX, RBX, RCX, RDX
+        *   XCR0
+        *   CPL
+        *
+        * VMMCALL allows the guest to provide extra registers. KVM also
+        * expects RSI for hypercalls, so include that, too.
+        *
+        * Copy their values to the appropriate location if supplied.
+        */
+       memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
+
+       vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
+       vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
+       vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
+       vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
+       vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);
+
+       svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);
+
+       if (ghcb_xcr0_is_valid(ghcb)) {
+               vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
+               kvm_update_cpuid_runtime(vcpu);
+       }
+
+       /* Copy the GHCB exit information into the VMCB fields */
+       exit_code = ghcb_get_sw_exit_code(ghcb);
+       control->exit_code = lower_32_bits(exit_code);
+       control->exit_code_hi = upper_32_bits(exit_code);
+       control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
+       control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
+
+       /* Clear the valid entries fields */
+       memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
+}
+
+static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
+{
+       struct kvm_vcpu *vcpu;
+       struct ghcb *ghcb;
+       u64 exit_code = 0;
+
+       ghcb = svm->ghcb;
+
+       /* Only GHCB Usage code 0 is supported */
+       if (ghcb->ghcb_usage)
+               goto vmgexit_err;
+
+       /*
+        * Retrieve the exit code now even though is may not be marked valid
+        * as it could help with debugging.
+        */
+       exit_code = ghcb_get_sw_exit_code(ghcb);
+
+       if (!ghcb_sw_exit_code_is_valid(ghcb) ||
+           !ghcb_sw_exit_info_1_is_valid(ghcb) ||
+           !ghcb_sw_exit_info_2_is_valid(ghcb))
+               goto vmgexit_err;
+
+       switch (ghcb_get_sw_exit_code(ghcb)) {
+       case SVM_EXIT_READ_DR7:
+               break;
+       case SVM_EXIT_WRITE_DR7:
+               if (!ghcb_rax_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_EXIT_RDTSC:
+               break;
+       case SVM_EXIT_RDPMC:
+               if (!ghcb_rcx_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_EXIT_CPUID:
+               if (!ghcb_rax_is_valid(ghcb) ||
+                   !ghcb_rcx_is_valid(ghcb))
+                       goto vmgexit_err;
+               if (ghcb_get_rax(ghcb) == 0xd)
+                       if (!ghcb_xcr0_is_valid(ghcb))
+                               goto vmgexit_err;
+               break;
+       case SVM_EXIT_INVD:
+               break;
+       case SVM_EXIT_IOIO:
+               if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) {
+                       if (!ghcb_sw_scratch_is_valid(ghcb))
+                               goto vmgexit_err;
+               } else {
+                       if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
+                               if (!ghcb_rax_is_valid(ghcb))
+                                       goto vmgexit_err;
+               }
+               break;
+       case SVM_EXIT_MSR:
+               if (!ghcb_rcx_is_valid(ghcb))
+                       goto vmgexit_err;
+               if (ghcb_get_sw_exit_info_1(ghcb)) {
+                       if (!ghcb_rax_is_valid(ghcb) ||
+                           !ghcb_rdx_is_valid(ghcb))
+                               goto vmgexit_err;
+               }
+               break;
+       case SVM_EXIT_VMMCALL:
+               if (!ghcb_rax_is_valid(ghcb) ||
+                   !ghcb_cpl_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_EXIT_RDTSCP:
+               break;
+       case SVM_EXIT_WBINVD:
+               break;
+       case SVM_EXIT_MONITOR:
+               if (!ghcb_rax_is_valid(ghcb) ||
+                   !ghcb_rcx_is_valid(ghcb) ||
+                   !ghcb_rdx_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_EXIT_MWAIT:
+               if (!ghcb_rax_is_valid(ghcb) ||
+                   !ghcb_rcx_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_VMGEXIT_MMIO_READ:
+       case SVM_VMGEXIT_MMIO_WRITE:
+               if (!ghcb_sw_scratch_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_VMGEXIT_NMI_COMPLETE:
+       case SVM_VMGEXIT_AP_HLT_LOOP:
+       case SVM_VMGEXIT_AP_JUMP_TABLE:
+       case SVM_VMGEXIT_UNSUPPORTED_EVENT:
+               break;
+       default:
+               goto vmgexit_err;
+       }
+
+       return 0;
+
+vmgexit_err:
+       vcpu = &svm->vcpu;
+
+       if (ghcb->ghcb_usage) {
+               vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
+                           ghcb->ghcb_usage);
+       } else {
+               vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n",
+                           exit_code);
+               dump_ghcb(svm);
+       }
+
+       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+       vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+       vcpu->run->internal.ndata = 2;
+       vcpu->run->internal.data[0] = exit_code;
+       vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
+
+       return -EINVAL;
+}
+
+static void pre_sev_es_run(struct vcpu_svm *svm)
+{
+       if (!svm->ghcb)
+               return;
+
+       if (svm->ghcb_sa_free) {
+               /*
+                * The scratch area lives outside the GHCB, so there is a
+                * buffer that, depending on the operation performed, may
+                * need to be synced, then freed.
+                */
+               if (svm->ghcb_sa_sync) {
+                       kvm_write_guest(svm->vcpu.kvm,
+                                       ghcb_get_sw_scratch(svm->ghcb),
+                                       svm->ghcb_sa, svm->ghcb_sa_len);
+                       svm->ghcb_sa_sync = false;
+               }
+
+               kfree(svm->ghcb_sa);
+               svm->ghcb_sa = NULL;
+               svm->ghcb_sa_free = false;
+       }
+
+       trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
+
+       sev_es_sync_to_ghcb(svm);
+
+       kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
+       svm->ghcb = NULL;
+}
+
 void pre_sev_run(struct vcpu_svm *svm, int cpu)
 {
        struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
        int asid = sev_get_asid(svm->vcpu.kvm);
 
+       /* Perform any SEV-ES pre-run actions */
+       pre_sev_es_run(svm);
+
        /* Assign the asid allocated with this SEV guest */
-       svm->vmcb->control.asid = asid;
+       svm->asid = asid;
 
        /*
         * Flush guest TLB:
@@ -1203,3 +1650,415 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
        svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
        vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
 }
+
+#define GHCB_SCRATCH_AREA_LIMIT                (16ULL * PAGE_SIZE)
+static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
+{
+       struct vmcb_control_area *control = &svm->vmcb->control;
+       struct ghcb *ghcb = svm->ghcb;
+       u64 ghcb_scratch_beg, ghcb_scratch_end;
+       u64 scratch_gpa_beg, scratch_gpa_end;
+       void *scratch_va;
+
+       scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
+       if (!scratch_gpa_beg) {
+               pr_err("vmgexit: scratch gpa not provided\n");
+               return false;
+       }
+
+       scratch_gpa_end = scratch_gpa_beg + len;
+       if (scratch_gpa_end < scratch_gpa_beg) {
+               pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
+                      len, scratch_gpa_beg);
+               return false;
+       }
+
+       if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
+               /* Scratch area begins within GHCB */
+               ghcb_scratch_beg = control->ghcb_gpa +
+                                  offsetof(struct ghcb, shared_buffer);
+               ghcb_scratch_end = control->ghcb_gpa +
+                                  offsetof(struct ghcb, reserved_1);
+
+               /*
+                * If the scratch area begins within the GHCB, it must be
+                * completely contained in the GHCB shared buffer area.
+                */
+               if (scratch_gpa_beg < ghcb_scratch_beg ||
+                   scratch_gpa_end > ghcb_scratch_end) {
+                       pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
+                              scratch_gpa_beg, scratch_gpa_end);
+                       return false;
+               }
+
+               scratch_va = (void *)svm->ghcb;
+               scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
+       } else {
+               /*
+                * The guest memory must be read into a kernel buffer, so
+                * limit the size
+                */
+               if (len > GHCB_SCRATCH_AREA_LIMIT) {
+                       pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
+                              len, GHCB_SCRATCH_AREA_LIMIT);
+                       return false;
+               }
+               scratch_va = kzalloc(len, GFP_KERNEL);
+               if (!scratch_va)
+                       return false;
+
+               if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) {
+                       /* Unable to copy scratch area from guest */
+                       pr_err("vmgexit: kvm_read_guest for scratch area failed\n");
+
+                       kfree(scratch_va);
+                       return false;
+               }
+
+               /*
+                * The scratch area is outside the GHCB. The operation will
+                * dictate whether the buffer needs to be synced before running
+                * the vCPU next time (i.e. a read was requested so the data
+                * must be written back to the guest memory).
+                */
+               svm->ghcb_sa_sync = sync;
+               svm->ghcb_sa_free = true;
+       }
+
+       svm->ghcb_sa = scratch_va;
+       svm->ghcb_sa_len = len;
+
+       return true;
+}
+
+static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
+                             unsigned int pos)
+{
+       svm->vmcb->control.ghcb_gpa &= ~(mask << pos);
+       svm->vmcb->control.ghcb_gpa |= (value & mask) << pos;
+}
+
+static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos)
+{
+       return (svm->vmcb->control.ghcb_gpa >> pos) & mask;
+}
+
+static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
+{
+       svm->vmcb->control.ghcb_gpa = value;
+}
+
+static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
+{
+       struct vmcb_control_area *control = &svm->vmcb->control;
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+       u64 ghcb_info;
+       int ret = 1;
+
+       ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;
+
+       trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id,
+                                            control->ghcb_gpa);
+
+       switch (ghcb_info) {
+       case GHCB_MSR_SEV_INFO_REQ:
+               set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
+                                                   GHCB_VERSION_MIN,
+                                                   sev_enc_bit));
+               break;
+       case GHCB_MSR_CPUID_REQ: {
+               u64 cpuid_fn, cpuid_reg, cpuid_value;
+
+               cpuid_fn = get_ghcb_msr_bits(svm,
+                                            GHCB_MSR_CPUID_FUNC_MASK,
+                                            GHCB_MSR_CPUID_FUNC_POS);
+
+               /* Initialize the registers needed by the CPUID intercept */
+               vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn;
+               vcpu->arch.regs[VCPU_REGS_RCX] = 0;
+
+               ret = svm_invoke_exit_handler(svm, SVM_EXIT_CPUID);
+               if (!ret) {
+                       ret = -EINVAL;
+                       break;
+               }
+
+               cpuid_reg = get_ghcb_msr_bits(svm,
+                                             GHCB_MSR_CPUID_REG_MASK,
+                                             GHCB_MSR_CPUID_REG_POS);
+               if (cpuid_reg == 0)
+                       cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX];
+               else if (cpuid_reg == 1)
+                       cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX];
+               else if (cpuid_reg == 2)
+                       cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX];
+               else
+                       cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX];
+
+               set_ghcb_msr_bits(svm, cpuid_value,
+                                 GHCB_MSR_CPUID_VALUE_MASK,
+                                 GHCB_MSR_CPUID_VALUE_POS);
+
+               set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP,
+                                 GHCB_MSR_INFO_MASK,
+                                 GHCB_MSR_INFO_POS);
+               break;
+       }
+       case GHCB_MSR_TERM_REQ: {
+               u64 reason_set, reason_code;
+
+               reason_set = get_ghcb_msr_bits(svm,
+                                              GHCB_MSR_TERM_REASON_SET_MASK,
+                                              GHCB_MSR_TERM_REASON_SET_POS);
+               reason_code = get_ghcb_msr_bits(svm,
+                                               GHCB_MSR_TERM_REASON_MASK,
+                                               GHCB_MSR_TERM_REASON_POS);
+               pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
+                       reason_set, reason_code);
+               fallthrough;
+       }
+       default:
+               ret = -EINVAL;
+       }
+
+       trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id,
+                                           control->ghcb_gpa, ret);
+
+       return ret;
+}
+
+int sev_handle_vmgexit(struct vcpu_svm *svm)
+{
+       struct vmcb_control_area *control = &svm->vmcb->control;
+       u64 ghcb_gpa, exit_code;
+       struct ghcb *ghcb;
+       int ret;
+
+       /* Validate the GHCB */
+       ghcb_gpa = control->ghcb_gpa;
+       if (ghcb_gpa & GHCB_MSR_INFO_MASK)
+               return sev_handle_vmgexit_msr_protocol(svm);
+
+       if (!ghcb_gpa) {
+               vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB gpa is not set\n");
+               return -EINVAL;
+       }
+
+       if (kvm_vcpu_map(&svm->vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
+               /* Unable to map GHCB from guest */
+               vcpu_unimpl(&svm->vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
+                           ghcb_gpa);
+               return -EINVAL;
+       }
+
+       svm->ghcb = svm->ghcb_map.hva;
+       ghcb = svm->ghcb_map.hva;
+
+       trace_kvm_vmgexit_enter(svm->vcpu.vcpu_id, ghcb);
+
+       exit_code = ghcb_get_sw_exit_code(ghcb);
+
+       ret = sev_es_validate_vmgexit(svm);
+       if (ret)
+               return ret;
+
+       sev_es_sync_from_ghcb(svm);
+       ghcb_set_sw_exit_info_1(ghcb, 0);
+       ghcb_set_sw_exit_info_2(ghcb, 0);
+
+       ret = -EINVAL;
+       switch (exit_code) {
+       case SVM_VMGEXIT_MMIO_READ:
+               if (!setup_vmgexit_scratch(svm, true, control->exit_info_2))
+                       break;
+
+               ret = kvm_sev_es_mmio_read(&svm->vcpu,
+                                          control->exit_info_1,
+                                          control->exit_info_2,
+                                          svm->ghcb_sa);
+               break;
+       case SVM_VMGEXIT_MMIO_WRITE:
+               if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
+                       break;
+
+               ret = kvm_sev_es_mmio_write(&svm->vcpu,
+                                           control->exit_info_1,
+                                           control->exit_info_2,
+                                           svm->ghcb_sa);
+               break;
+       case SVM_VMGEXIT_NMI_COMPLETE:
+               ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET);
+               break;
+       case SVM_VMGEXIT_AP_HLT_LOOP:
+               ret = kvm_emulate_ap_reset_hold(&svm->vcpu);
+               break;
+       case SVM_VMGEXIT_AP_JUMP_TABLE: {
+               struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+
+               switch (control->exit_info_1) {
+               case 0:
+                       /* Set AP jump table address */
+                       sev->ap_jump_table = control->exit_info_2;
+                       break;
+               case 1:
+                       /* Get AP jump table address */
+                       ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
+                       break;
+               default:
+                       pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
+                              control->exit_info_1);
+                       ghcb_set_sw_exit_info_1(ghcb, 1);
+                       ghcb_set_sw_exit_info_2(ghcb,
+                                               X86_TRAP_UD |
+                                               SVM_EVTINJ_TYPE_EXEPT |
+                                               SVM_EVTINJ_VALID);
+               }
+
+               ret = 1;
+               break;
+       }
+       case SVM_VMGEXIT_UNSUPPORTED_EVENT:
+               vcpu_unimpl(&svm->vcpu,
+                           "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
+                           control->exit_info_1, control->exit_info_2);
+               break;
+       default:
+               ret = svm_invoke_exit_handler(svm, exit_code);
+       }
+
+       return ret;
+}
+
+int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
+{
+       if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2))
+               return -EINVAL;
+
+       return kvm_sev_es_string_io(&svm->vcpu, size, port,
+                                   svm->ghcb_sa, svm->ghcb_sa_len, in);
+}
+
+void sev_es_init_vmcb(struct vcpu_svm *svm)
+{
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+
+       svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
+       svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
+
+       /*
+        * An SEV-ES guest requires a VMSA area that is a separate from the
+        * VMCB page. Do not include the encryption mask on the VMSA physical
+        * address since hardware will access it using the guest key.
+        */
+       svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
+
+       /* Can't intercept CR register access, HV can't modify CR registers */
+       svm_clr_intercept(svm, INTERCEPT_CR0_READ);
+       svm_clr_intercept(svm, INTERCEPT_CR4_READ);
+       svm_clr_intercept(svm, INTERCEPT_CR8_READ);
+       svm_clr_intercept(svm, INTERCEPT_CR0_WRITE);
+       svm_clr_intercept(svm, INTERCEPT_CR4_WRITE);
+       svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);
+
+       svm_clr_intercept(svm, INTERCEPT_SELECTIVE_CR0);
+
+       /* Track EFER/CR register changes */
+       svm_set_intercept(svm, TRAP_EFER_WRITE);
+       svm_set_intercept(svm, TRAP_CR0_WRITE);
+       svm_set_intercept(svm, TRAP_CR4_WRITE);
+       svm_set_intercept(svm, TRAP_CR8_WRITE);
+
+       /* No support for enable_vmware_backdoor */
+       clr_exception_intercept(svm, GP_VECTOR);
+
+       /* Can't intercept XSETBV, HV can't modify XCR0 directly */
+       svm_clr_intercept(svm, INTERCEPT_XSETBV);
+
+       /* Clear intercepts on selected MSRs */
+       set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+}
+
+void sev_es_create_vcpu(struct vcpu_svm *svm)
+{
+       /*
+        * Set the GHCB MSR value as per the GHCB specification when creating
+        * a vCPU for an SEV-ES guest.
+        */
+       set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
+                                           GHCB_VERSION_MIN,
+                                           sev_enc_bit));
+}
+
+void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu)
+{
+       struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+       struct vmcb_save_area *hostsa;
+       unsigned int i;
+
+       /*
+        * As an SEV-ES guest, hardware will restore the host state on VMEXIT,
+        * of which one step is to perform a VMLOAD. Since hardware does not
+        * perform a VMSAVE on VMRUN, the host savearea must be updated.
+        */
+       asm volatile(__ex("vmsave %0") : : "a" (__sme_page_pa(sd->save_area)) : "memory");
+
+       /*
+        * Certain MSRs are restored on VMEXIT, only save ones that aren't
+        * restored.
+        */
+       for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) {
+               if (host_save_user_msrs[i].sev_es_restored)
+                       continue;
+
+               rdmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]);
+       }
+
+       /* XCR0 is restored on VMEXIT, save the current host value */
+       hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400);
+       hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+
+       /* PKRU is restored on VMEXIT, save the curent host value */
+       hostsa->pkru = read_pkru();
+
+       /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
+       hostsa->xss = host_xss;
+}
+
+void sev_es_vcpu_put(struct vcpu_svm *svm)
+{
+       unsigned int i;
+
+       /*
+        * Certain MSRs are restored on VMEXIT and were saved with vmsave in
+        * sev_es_vcpu_load() above. Only restore ones that weren't.
+        */
+       for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) {
+               if (host_save_user_msrs[i].sev_es_restored)
+                       continue;
+
+               wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]);
+       }
+}
+
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+
+       /* First SIPI: Use the values as initially set by the VMM */
+       if (!svm->received_first_sipi) {
+               svm->received_first_sipi = true;
+               return;
+       }
+
+       /*
+        * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
+        * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
+        * non-zero value.
+        */
+       ghcb_set_sw_exit_info_2(svm->ghcb, 1);
+}
index 1e81cfe..7ef1717 100644 (file)
@@ -33,9 +33,9 @@
 #include <asm/debugreg.h>
 #include <asm/kvm_para.h>
 #include <asm/irq_remapping.h>
-#include <asm/mce.h>
 #include <asm/spec-ctrl.h>
 #include <asm/cpu_device_id.h>
+#include <asm/traps.h>
 
 #include <asm/virtext.h>
 #include "trace.h"
@@ -90,7 +90,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio);
 
 static const struct svm_direct_access_msrs {
        u32 index;   /* Index of the MSR */
-       bool always; /* True if intercept is always on */
+       bool always; /* True if intercept is initially cleared */
 } direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = {
        { .index = MSR_STAR,                            .always = true  },
        { .index = MSR_IA32_SYSENTER_CS,                .always = true  },
@@ -108,6 +108,9 @@ static const struct svm_direct_access_msrs {
        { .index = MSR_IA32_LASTBRANCHTOIP,             .always = false },
        { .index = MSR_IA32_LASTINTFROMIP,              .always = false },
        { .index = MSR_IA32_LASTINTTOIP,                .always = false },
+       { .index = MSR_EFER,                            .always = false },
+       { .index = MSR_IA32_CR_PAT,                     .always = false },
+       { .index = MSR_AMD64_SEV_ES_GHCB,               .always = true  },
        { .index = MSR_INVALID,                         .always = false },
 };
 
@@ -187,10 +190,14 @@ static int vgif = true;
 module_param(vgif, int, 0444);
 
 /* enable/disable SEV support */
-static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
 module_param(sev, int, 0444);
 
-static bool __read_mostly dump_invalid_vmcb = 0;
+/* enable/disable SEV-ES support */
+int sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+module_param(sev_es, int, 0444);
+
+bool __read_mostly dump_invalid_vmcb;
 module_param(dump_invalid_vmcb, bool, 0644);
 
 static u8 rsm_ins_bytes[] = "\x0f\xaa";
@@ -336,6 +343,13 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       /*
+        * SEV-ES does not expose the next RIP. The RIP update is controlled by
+        * the type of exit and the #VC handler in the guest.
+        */
+       if (sev_es_guest(vcpu->kvm))
+               goto done;
+
        if (nrips && svm->vmcb->control.next_rip != 0) {
                WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
                svm->next_rip = svm->vmcb->control.next_rip;
@@ -347,6 +361,8 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
        } else {
                kvm_rip_write(vcpu, svm->next_rip);
        }
+
+done:
        svm_set_interrupt_shadow(vcpu, 0);
 
        return 1;
@@ -484,7 +500,7 @@ static int svm_hardware_enable(void)
 
        wrmsrl(MSR_EFER, efer | EFER_SVME);
 
-       wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
+       wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area));
 
        if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
                wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
@@ -530,12 +546,12 @@ static int svm_hardware_enable(void)
 
 static void svm_cpu_uninit(int cpu)
 {
-       struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
+       struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
 
        if (!sd)
                return;
 
-       per_cpu(svm_data, raw_smp_processor_id()) = NULL;
+       per_cpu(svm_data, cpu) = NULL;
        kfree(sd->sev_vmcbs);
        __free_page(sd->save_area);
        kfree(sd);
@@ -552,6 +568,7 @@ static int svm_cpu_init(int cpu)
        sd->save_area = alloc_page(GFP_KERNEL);
        if (!sd->save_area)
                goto free_cpu_data;
+       clear_page(page_address(sd->save_area));
 
        if (svm_sev_enabled()) {
                sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
@@ -662,8 +679,8 @@ static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm,
        msrpm[offset] = tmp;
 }
 
-static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
-                                int read, int write)
+void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
+                         int read, int write)
 {
        set_shadow_msr_intercept(vcpu, msr, read, write);
        set_msr_interception_bitmap(vcpu, msrpm, msr, read, write);
@@ -959,15 +976,11 @@ static __init int svm_hardware_setup(void)
                kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
        }
 
-       if (sev) {
-               if (boot_cpu_has(X86_FEATURE_SEV) &&
-                   IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
-                       r = sev_hardware_setup();
-                       if (r)
-                               sev = false;
-               } else {
-                       sev = false;
-               }
+       if (IS_ENABLED(CONFIG_KVM_AMD_SEV) && sev) {
+               sev_hardware_setup();
+       } else {
+               sev = false;
+               sev_es = false;
        }
 
        svm_adjust_mmio_mask();
@@ -1215,6 +1228,7 @@ static void init_vmcb(struct vcpu_svm *svm)
                save->cr4 = 0;
        }
        svm->asid_generation = 0;
+       svm->asid = 0;
 
        svm->nested.vmcb12_gpa = 0;
        svm->vcpu.arch.hflags = 0;
@@ -1252,6 +1266,11 @@ static void init_vmcb(struct vcpu_svm *svm)
        if (sev_guest(svm->vcpu.kvm)) {
                svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
                clr_exception_intercept(svm, UD_VECTOR);
+
+               if (sev_es_guest(svm->vcpu.kvm)) {
+                       /* Perform SEV-ES specific VMCB updates */
+                       sev_es_init_vmcb(svm);
+               }
        }
 
        vmcb_mark_all_dirty(svm->vmcb);
@@ -1288,6 +1307,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm;
        struct page *vmcb_page;
+       struct page *vmsa_page = NULL;
        int err;
 
        BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
@@ -1298,9 +1318,27 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
        if (!vmcb_page)
                goto out;
 
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               /*
+                * SEV-ES guests require a separate VMSA page used to contain
+                * the encrypted register state of the guest.
+                */
+               vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+               if (!vmsa_page)
+                       goto error_free_vmcb_page;
+
+               /*
+                * SEV-ES guests maintain an encrypted version of their FPU
+                * state which is restored and saved on VMRUN and VMEXIT.
+                * Free the fpu structure to prevent KVM from attempting to
+                * access the FPU state.
+                */
+               kvm_free_guest_fpu(vcpu);
+       }
+
        err = avic_init_vcpu(svm);
        if (err)
-               goto error_free_vmcb_page;
+               goto error_free_vmsa_page;
 
        /* We initialize this flag to true to make sure that the is_running
         * bit would be set the first time the vcpu is loaded.
@@ -1309,21 +1347,34 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
                svm->avic_is_running = true;
 
        svm->msrpm = svm_vcpu_alloc_msrpm();
-       if (!svm->msrpm)
-               goto error_free_vmcb_page;
+       if (!svm->msrpm) {
+               err = -ENOMEM;
+               goto error_free_vmsa_page;
+       }
 
        svm_vcpu_init_msrpm(vcpu, svm->msrpm);
 
        svm->vmcb = page_address(vmcb_page);
        svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT);
+
+       if (vmsa_page)
+               svm->vmsa = page_address(vmsa_page);
+
        svm->asid_generation = 0;
        init_vmcb(svm);
 
        svm_init_osvw(vcpu);
        vcpu->arch.microcode_version = 0x01000065;
 
+       if (sev_es_guest(svm->vcpu.kvm))
+               /* Perform SEV-ES specific VMCB creation updates */
+               sev_es_create_vcpu(svm);
+
        return 0;
 
+error_free_vmsa_page:
+       if (vmsa_page)
+               __free_page(vmsa_page);
 error_free_vmcb_page:
        __free_page(vmcb_page);
 out:
@@ -1351,6 +1402,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 
        svm_free_nested(svm);
 
+       sev_free_vcpu(vcpu);
+
        __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
        __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
 }
@@ -1366,15 +1419,20 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                vmcb_mark_all_dirty(svm->vmcb);
        }
 
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               sev_es_vcpu_load(svm, cpu);
+       } else {
 #ifdef CONFIG_X86_64
-       rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
+               rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
 #endif
-       savesegment(fs, svm->host.fs);
-       savesegment(gs, svm->host.gs);
-       svm->host.ldt = kvm_read_ldt();
+               savesegment(fs, svm->host.fs);
+               savesegment(gs, svm->host.gs);
+               svm->host.ldt = kvm_read_ldt();
 
-       for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
-               rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
+               for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
+                       rdmsrl(host_save_user_msrs[i].index,
+                              svm->host_user_msrs[i]);
+       }
 
        if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
                u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
@@ -1402,18 +1460,24 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
        avic_vcpu_put(vcpu);
 
        ++vcpu->stat.host_state_reload;
-       kvm_load_ldt(svm->host.ldt);
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               sev_es_vcpu_put(svm);
+       } else {
+               kvm_load_ldt(svm->host.ldt);
 #ifdef CONFIG_X86_64
-       loadsegment(fs, svm->host.fs);
-       wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
-       load_gs_index(svm->host.gs);
+               loadsegment(fs, svm->host.fs);
+               wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
+               load_gs_index(svm->host.gs);
 #else
 #ifdef CONFIG_X86_32_LAZY_GS
-       loadsegment(gs, svm->host.gs);
+               loadsegment(gs, svm->host.gs);
 #endif
 #endif
-       for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
-               wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
+
+               for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
+                       wrmsrl(host_save_user_msrs[i].index,
+                              svm->host_user_msrs[i]);
+       }
 }
 
 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
@@ -1631,9 +1695,18 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
 
 static void update_cr0_intercept(struct vcpu_svm *svm)
 {
-       ulong gcr0 = svm->vcpu.arch.cr0;
-       u64 *hcr0 = &svm->vmcb->save.cr0;
+       ulong gcr0;
+       u64 *hcr0;
 
+       /*
+        * SEV-ES guests must always keep the CR intercepts cleared. CR
+        * tracking is done using the CR write traps.
+        */
+       if (sev_es_guest(svm->vcpu.kvm))
+               return;
+
+       gcr0 = svm->vcpu.arch.cr0;
+       hcr0 = &svm->vmcb->save.cr0;
        *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
                | (gcr0 & SVM_CR0_SELECTIVE_MASK);
 
@@ -1653,7 +1726,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        struct vcpu_svm *svm = to_svm(vcpu);
 
 #ifdef CONFIG_X86_64
-       if (vcpu->arch.efer & EFER_LME) {
+       if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
                if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
                        vcpu->arch.efer |= EFER_LMA;
                        svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
@@ -1682,13 +1755,15 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        update_cr0_intercept(svm);
 }
 
-int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+static bool svm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-       unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
-       unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
+       return true;
+}
 
-       if (cr4 & X86_CR4_VMXE)
-               return 1;
+void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+       unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
+       unsigned long old_cr4 = vcpu->arch.cr4;
 
        if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
                svm_flush_tlb(vcpu);
@@ -1699,7 +1774,9 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
        cr4 |= host_cr4_mce;
        to_svm(vcpu)->vmcb->save.cr4 = cr4;
        vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
-       return 0;
+
+       if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
+               kvm_update_cpuid_runtime(vcpu);
 }
 
 static void svm_set_segment(struct kvm_vcpu *vcpu,
@@ -1751,18 +1828,20 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
                ++sd->asid_generation;
                sd->next_asid = sd->min_asid;
                svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
+               vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
        }
 
        svm->asid_generation = sd->asid_generation;
-       svm->vmcb->control.asid = sd->next_asid++;
-
-       vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
+       svm->asid = sd->next_asid++;
 }
 
 static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value)
 {
        struct vmcb *vmcb = svm->vmcb;
 
+       if (svm->vcpu.arch.guest_state_protected)
+               return;
+
        if (unlikely(value != vmcb->save.dr6)) {
                vmcb->save.dr6 = value;
                vmcb_mark_dirty(vmcb, VMCB_DR);
@@ -1773,6 +1852,9 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       if (vcpu->arch.guest_state_protected)
+               return;
+
        get_debugreg(vcpu->arch.db[0], 0);
        get_debugreg(vcpu->arch.db[1], 1);
        get_debugreg(vcpu->arch.db[2], 2);
@@ -1791,6 +1873,9 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       if (vcpu->arch.guest_state_protected)
+               return;
+
        svm->vmcb->save.dr7 = value;
        vmcb_mark_dirty(svm->vmcb, VMCB_DR);
 }
@@ -1929,25 +2014,6 @@ static bool is_erratum_383(void)
        return true;
 }
 
-/*
- * Trigger machine check on the host. We assume all the MSRs are already set up
- * by the CPU and that we still run on the same CPU as the MCE occurred on.
- * We pass a fake environment to the machine check handler because we want
- * the guest to be always treated like user space, no matter what context
- * it used internally.
- */
-static void kvm_machine_check(void)
-{
-#if defined(CONFIG_X86_MCE)
-       struct pt_regs regs = {
-               .cs = 3, /* Fake ring 3 no matter what the guest ran on */
-               .flags = X86_EFLAGS_IF,
-       };
-
-       do_machine_check(&regs);
-#endif
-}
-
 static void svm_handle_mce(struct vcpu_svm *svm)
 {
        if (is_erratum_383()) {
@@ -1979,6 +2045,13 @@ static int shutdown_interception(struct vcpu_svm *svm)
        struct kvm_run *kvm_run = svm->vcpu.run;
 
        /*
+        * The VM save area has already been encrypted so it
+        * cannot be reinitialized - just terminate.
+        */
+       if (sev_es_guest(svm->vcpu.kvm))
+               return -EINVAL;
+
+       /*
         * VMCB is undefined after a SHUTDOWN intercept
         * so reinitialize it.
         */
@@ -1999,11 +2072,16 @@ static int io_interception(struct vcpu_svm *svm)
        ++svm->vcpu.stat.io_exits;
        string = (io_info & SVM_IOIO_STR_MASK) != 0;
        in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
-       if (string)
-               return kvm_emulate_instruction(vcpu, 0);
-
        port = io_info >> 16;
        size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
+
+       if (string) {
+               if (sev_es_guest(vcpu->kvm))
+                       return sev_es_string_io(svm, size, port, in);
+               else
+                       return kvm_emulate_instruction(vcpu, 0);
+       }
+
        svm->next_rip = svm->vmcb->control.exit_info_2;
 
        return kvm_fast_pio(&svm->vcpu, size, port, in);
@@ -2267,9 +2345,11 @@ static int cpuid_interception(struct vcpu_svm *svm)
 static int iret_interception(struct vcpu_svm *svm)
 {
        ++svm->vcpu.stat.nmi_window_exits;
-       svm_clr_intercept(svm, INTERCEPT_IRET);
        svm->vcpu.arch.hflags |= HF_IRET_MASK;
-       svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
+       if (!sev_es_guest(svm->vcpu.kvm)) {
+               svm_clr_intercept(svm, INTERCEPT_IRET);
+               svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
+       }
        kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
        return 1;
 }
@@ -2406,6 +2486,41 @@ static int cr_interception(struct vcpu_svm *svm)
        return kvm_complete_insn_gp(&svm->vcpu, err);
 }
 
+static int cr_trap(struct vcpu_svm *svm)
+{
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+       unsigned long old_value, new_value;
+       unsigned int cr;
+       int ret = 0;
+
+       new_value = (unsigned long)svm->vmcb->control.exit_info_1;
+
+       cr = svm->vmcb->control.exit_code - SVM_EXIT_CR0_WRITE_TRAP;
+       switch (cr) {
+       case 0:
+               old_value = kvm_read_cr0(vcpu);
+               svm_set_cr0(vcpu, new_value);
+
+               kvm_post_set_cr0(vcpu, old_value, new_value);
+               break;
+       case 4:
+               old_value = kvm_read_cr4(vcpu);
+               svm_set_cr4(vcpu, new_value);
+
+               kvm_post_set_cr4(vcpu, old_value, new_value);
+               break;
+       case 8:
+               ret = kvm_set_cr8(&svm->vcpu, new_value);
+               break;
+       default:
+               WARN(1, "unhandled CR%d write trap", cr);
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
+       }
+
+       return kvm_complete_insn_gp(vcpu, ret);
+}
+
 static int dr_interception(struct vcpu_svm *svm)
 {
        int reg, dr;
@@ -2459,6 +2574,25 @@ static int cr8_write_interception(struct vcpu_svm *svm)
        return 0;
 }
 
+static int efer_trap(struct vcpu_svm *svm)
+{
+       struct msr_data msr_info;
+       int ret;
+
+       /*
+        * Clear the EFER_SVME bit from EFER. The SVM code always sets this
+        * bit in svm_set_efer(), but __kvm_valid_efer() checks it against
+        * whether the guest has X86_FEATURE_SVM - this avoids a failure if
+        * the guest doesn't have X86_FEATURE_SVM.
+        */
+       msr_info.host_initiated = false;
+       msr_info.index = MSR_EFER;
+       msr_info.data = svm->vmcb->control.exit_info_1 & ~EFER_SVME;
+       ret = kvm_set_msr_common(&svm->vcpu, &msr_info);
+
+       return kvm_complete_insn_gp(&svm->vcpu, ret);
+}
+
 static int svm_get_msr_feature(struct kvm_msr_entry *msr)
 {
        msr->data = 0;
@@ -2541,10 +2675,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
+                   !guest_has_spec_ctrl_msr(vcpu))
                        return 1;
 
                msr_info->data = svm->spec_ctrl;
@@ -2582,6 +2713,20 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        return 0;
 }
 
+static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+       if (!sev_es_guest(svm->vcpu.kvm) || !err)
+               return kvm_complete_insn_gp(&svm->vcpu, err);
+
+       ghcb_set_sw_exit_info_1(svm->ghcb, 1);
+       ghcb_set_sw_exit_info_2(svm->ghcb,
+                               X86_TRAP_GP |
+                               SVM_EVTINJ_TYPE_EXEPT |
+                               SVM_EVTINJ_VALID);
+       return 1;
+}
+
 static int rdmsr_interception(struct vcpu_svm *svm)
 {
        return kvm_emulate_rdmsr(&svm->vcpu);
@@ -2628,10 +2773,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
+                   !guest_has_spec_ctrl_msr(vcpu))
                        return 1;
 
                if (kvm_spec_ctrl_test_value(data))
@@ -2656,12 +2798,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                break;
        case MSR_IA32_PRED_CMD:
                if (!msr->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
+                   !guest_has_pred_cmd_msr(vcpu))
                        return 1;
 
                if (data & ~PRED_CMD_IBPB)
                        return 1;
-               if (!boot_cpu_has(X86_FEATURE_AMD_IBPB))
+               if (!boot_cpu_has(X86_FEATURE_IBPB))
                        return 1;
                if (!data)
                        break;
@@ -2803,7 +2945,14 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
 static int pause_interception(struct vcpu_svm *svm)
 {
        struct kvm_vcpu *vcpu = &svm->vcpu;
-       bool in_kernel = (svm_get_cpl(vcpu) == 0);
+       bool in_kernel;
+
+       /*
+        * CPL is not made available for an SEV-ES guest, therefore
+        * vcpu->arch.preempted_in_kernel can never be true.  Just
+        * set in_kernel to false as well.
+        */
+       in_kernel = !sev_es_guest(svm->vcpu.kvm) && svm_get_cpl(vcpu) == 0;
 
        if (!kvm_pause_in_guest(vcpu->kvm))
                grow_ple_window(vcpu);
@@ -2918,11 +3067,16 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_MWAIT]                        = mwait_interception,
        [SVM_EXIT_XSETBV]                       = xsetbv_interception,
        [SVM_EXIT_RDPRU]                        = rdpru_interception,
+       [SVM_EXIT_EFER_WRITE_TRAP]              = efer_trap,
+       [SVM_EXIT_CR0_WRITE_TRAP]               = cr_trap,
+       [SVM_EXIT_CR4_WRITE_TRAP]               = cr_trap,
+       [SVM_EXIT_CR8_WRITE_TRAP]               = cr_trap,
        [SVM_EXIT_INVPCID]                      = invpcid_interception,
        [SVM_EXIT_NPF]                          = npf_interception,
        [SVM_EXIT_RSM]                          = rsm_interception,
        [SVM_EXIT_AVIC_INCOMPLETE_IPI]          = avic_incomplete_ipi_interception,
        [SVM_EXIT_AVIC_UNACCELERATED_ACCESS]    = avic_unaccelerated_access_interception,
+       [SVM_EXIT_VMGEXIT]                      = sev_handle_vmgexit,
 };
 
 static void dump_vmcb(struct kvm_vcpu *vcpu)
@@ -2964,6 +3118,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
        pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
        pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
        pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
+       pr_err("%-20s%016llx\n", "ghcb:", control->ghcb_gpa);
        pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
        pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
        pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
@@ -2971,6 +3126,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
        pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
        pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
        pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
+       pr_err("%-20s%016llx\n", "vmsa_pa:", control->vmsa_pa);
        pr_err("VMCB State Save Area:\n");
        pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
               "es:",
@@ -3043,6 +3199,43 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
               "excp_to:", save->last_excp_to);
 }
 
+static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
+{
+       if (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
+           svm_exit_handlers[exit_code])
+               return 0;
+
+       vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code);
+       dump_vmcb(vcpu);
+       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+       vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+       vcpu->run->internal.ndata = 2;
+       vcpu->run->internal.data[0] = exit_code;
+       vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
+
+       return -EINVAL;
+}
+
+int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code)
+{
+       if (svm_handle_invalid_exit(&svm->vcpu, exit_code))
+               return 0;
+
+#ifdef CONFIG_RETPOLINE
+       if (exit_code == SVM_EXIT_MSR)
+               return msr_interception(svm);
+       else if (exit_code == SVM_EXIT_VINTR)
+               return interrupt_window_interception(svm);
+       else if (exit_code == SVM_EXIT_INTR)
+               return intr_interception(svm);
+       else if (exit_code == SVM_EXIT_HLT)
+               return halt_interception(svm);
+       else if (exit_code == SVM_EXIT_NPF)
+               return npf_interception(svm);
+#endif
+       return svm_exit_handlers[exit_code](svm);
+}
+
 static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
                              u32 *intr_info, u32 *error_code)
 {
@@ -3066,10 +3259,13 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
 
        trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
 
-       if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
-               vcpu->arch.cr0 = svm->vmcb->save.cr0;
-       if (npt_enabled)
-               vcpu->arch.cr3 = svm->vmcb->save.cr3;
+       /* SEV-ES guests must use the CR write traps to track CR registers. */
+       if (!sev_es_guest(vcpu->kvm)) {
+               if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
+                       vcpu->arch.cr0 = svm->vmcb->save.cr0;
+               if (npt_enabled)
+                       vcpu->arch.cr3 = svm->vmcb->save.cr3;
+       }
 
        if (is_guest_mode(vcpu)) {
                int vmexit;
@@ -3106,32 +3302,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
        if (exit_fastpath != EXIT_FASTPATH_NONE)
                return 1;
 
-       if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
-           || !svm_exit_handlers[exit_code]) {
-               vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
-               dump_vmcb(vcpu);
-               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-               vcpu->run->internal.suberror =
-                       KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
-               vcpu->run->internal.ndata = 2;
-               vcpu->run->internal.data[0] = exit_code;
-               vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
-               return 0;
-       }
-
-#ifdef CONFIG_RETPOLINE
-       if (exit_code == SVM_EXIT_MSR)
-               return msr_interception(svm);
-       else if (exit_code == SVM_EXIT_VINTR)
-               return interrupt_window_interception(svm);
-       else if (exit_code == SVM_EXIT_INTR)
-               return intr_interception(svm);
-       else if (exit_code == SVM_EXIT_HLT)
-               return halt_interception(svm);
-       else if (exit_code == SVM_EXIT_NPF)
-               return npf_interception(svm);
-#endif
-       return svm_exit_handlers[exit_code](svm);
+       return svm_invoke_exit_handler(svm, exit_code);
 }
 
 static void reload_tss(struct kvm_vcpu *vcpu)
@@ -3160,7 +3331,8 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
 
        svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
        vcpu->arch.hflags |= HF_NMI_MASK;
-       svm_set_intercept(svm, INTERCEPT_IRET);
+       if (!sev_es_guest(svm->vcpu.kvm))
+               svm_set_intercept(svm, INTERCEPT_IRET);
        ++vcpu->stat.nmi_injections;
 }
 
@@ -3181,6 +3353,13 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       /*
+        * SEV-ES guests must always keep the CR intercepts cleared. CR
+        * tracking is done using the CR write traps.
+        */
+       if (sev_es_guest(vcpu->kvm))
+               return;
+
        if (nested_svm_virtualize_tpr(vcpu))
                return;
 
@@ -3237,10 +3416,12 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 
        if (masked) {
                svm->vcpu.arch.hflags |= HF_NMI_MASK;
-               svm_set_intercept(svm, INTERCEPT_IRET);
+               if (!sev_es_guest(svm->vcpu.kvm))
+                       svm_set_intercept(svm, INTERCEPT_IRET);
        } else {
                svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
-               svm_clr_intercept(svm, INTERCEPT_IRET);
+               if (!sev_es_guest(svm->vcpu.kvm))
+                       svm_clr_intercept(svm, INTERCEPT_IRET);
        }
 }
 
@@ -3252,7 +3433,14 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
        if (!gif_set(svm))
                return true;
 
-       if (is_guest_mode(vcpu)) {
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               /*
+                * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
+                * bit to determine the state of the IF flag.
+                */
+               if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK))
+                       return true;
+       } else if (is_guest_mode(vcpu)) {
                /* As long as interrupts are being delivered...  */
                if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
                    ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF)
@@ -3411,8 +3599,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
         * If we've made progress since setting HF_IRET_MASK, we've
         * executed an IRET and can allow NMI injection.
         */
-       if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
-           && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
+       if ((svm->vcpu.arch.hflags & HF_IRET_MASK) &&
+           (sev_es_guest(svm->vcpu.kvm) ||
+            kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip)) {
                svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
                kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
        }
@@ -3435,6 +3624,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
                break;
        case SVM_EXITINTINFO_TYPE_EXEPT:
                /*
+                * Never re-inject a #VC exception.
+                */
+               if (vector == X86_TRAP_VC)
+                       break;
+
+               /*
                 * In case of software exceptions, do not reinject the vector,
                 * but re-execute the instruction instead. Rewind RIP first
                 * if we emulated INT3 before.
@@ -3482,8 +3677,6 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
        return EXIT_FASTPATH_NONE;
 }
 
-void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
-
 static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
                                        struct vcpu_svm *svm)
 {
@@ -3507,16 +3700,20 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
        guest_enter_irqoff();
        lockdep_hardirqs_on(CALLER_ADDR0);
 
-       __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               __svm_sev_es_vcpu_run(svm->vmcb_pa);
+       } else {
+               __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);
 
 #ifdef CONFIG_X86_64
-       native_wrmsrl(MSR_GS_BASE, svm->host.gs_base);
+               native_wrmsrl(MSR_GS_BASE, svm->host.gs_base);
 #else
-       loadsegment(fs, svm->host.fs);
+               loadsegment(fs, svm->host.fs);
 #ifndef CONFIG_X86_32_LAZY_GS
-       loadsegment(gs, svm->host.gs);
+               loadsegment(gs, svm->host.gs);
 #endif
 #endif
+       }
 
        /*
         * VMEXIT disables interrupts (host state), but tracing and lockdep
@@ -3566,6 +3763,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
 
        sync_lapic_to_cr8(vcpu);
 
+       if (unlikely(svm->asid != svm->vmcb->control.asid)) {
+               svm->vmcb->control.asid = svm->asid;
+               vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
+       }
        svm->vmcb->save.cr2 = vcpu->arch.cr2;
 
        /*
@@ -3610,14 +3811,17 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
        if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
                svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
-       reload_tss(vcpu);
+       if (!sev_es_guest(svm->vcpu.kvm))
+               reload_tss(vcpu);
 
        x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
 
-       vcpu->arch.cr2 = svm->vmcb->save.cr2;
-       vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
-       vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
-       vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
+       if (!sev_es_guest(svm->vcpu.kvm)) {
+               vcpu->arch.cr2 = svm->vmcb->save.cr2;
+               vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
+               vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
+               vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
+       }
 
        if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
                kvm_before_interrupt(&svm->vcpu);
@@ -3720,12 +3924,21 @@ static bool svm_cpu_has_accelerated_tpr(void)
        return false;
 }
 
-static bool svm_has_emulated_msr(u32 index)
+/*
+ * The kvm parameter can be NULL (module initialization, or invocation before
+ * VM creation). Be sure to check the kvm parameter before using it.
+ */
+static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
 {
        switch (index) {
        case MSR_IA32_MCG_EXT_CTL:
        case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
                return false;
+       case MSR_IA32_SMBASE:
+               /* SEV-ES guests do not support SMM, so report false */
+               if (kvm && sev_es_guest(kvm))
+                       return false;
+               break;
        default:
                break;
        }
@@ -4084,6 +4297,12 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i
        unsigned long cr4;
 
        /*
+        * When the guest is an SEV-ES guest, emulation is not possible.
+        */
+       if (sev_es_guest(vcpu->kvm))
+               return false;
+
+       /*
         * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
         *
         * Errata:
@@ -4163,6 +4382,14 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
                   (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT));
 }
 
+static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+       if (!sev_es_guest(vcpu->kvm))
+               return kvm_vcpu_deliver_sipi_vector(vcpu, vector);
+
+       sev_vcpu_deliver_sipi_vector(vcpu, vector);
+}
+
 static void svm_vm_destroy(struct kvm *kvm)
 {
        avic_vm_destroy(kvm);
@@ -4215,6 +4442,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .get_cpl = svm_get_cpl,
        .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
        .set_cr0 = svm_set_cr0,
+       .is_valid_cr4 = svm_is_valid_cr4,
        .set_cr4 = svm_set_cr4,
        .set_efer = svm_set_efer,
        .get_idt = svm_get_idt,
@@ -4303,6 +4531,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .apic_init_signal_blocked = svm_apic_init_signal_blocked,
 
        .msr_filter_changed = svm_msr_filter_changed,
+       .complete_emulated_msr = svm_complete_emulated_msr,
+
+       .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {
index 1d853fe..0fe874a 100644 (file)
 
 #include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
+#include <linux/bits.h>
 
 #include <asm/svm.h>
 
-static const u32 host_save_user_msrs[] = {
+#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
+
+static const struct svm_host_save_msrs {
+       u32 index;              /* Index of the MSR */
+       bool sev_es_restored;   /* True if MSR is restored on SEV-ES VMEXIT */
+} host_save_user_msrs[] = {
 #ifdef CONFIG_X86_64
-       MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
-       MSR_FS_BASE,
+       { .index = MSR_STAR,                    .sev_es_restored = true },
+       { .index = MSR_LSTAR,                   .sev_es_restored = true },
+       { .index = MSR_CSTAR,                   .sev_es_restored = true },
+       { .index = MSR_SYSCALL_MASK,            .sev_es_restored = true },
+       { .index = MSR_KERNEL_GS_BASE,          .sev_es_restored = true },
+       { .index = MSR_FS_BASE,                 .sev_es_restored = true },
 #endif
-       MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
-       MSR_TSC_AUX,
+       { .index = MSR_IA32_SYSENTER_CS,        .sev_es_restored = true },
+       { .index = MSR_IA32_SYSENTER_ESP,       .sev_es_restored = true },
+       { .index = MSR_IA32_SYSENTER_EIP,       .sev_es_restored = true },
+       { .index = MSR_TSC_AUX,                 .sev_es_restored = false },
 };
-
 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
 
-#define MAX_DIRECT_ACCESS_MSRS 15
+#define MAX_DIRECT_ACCESS_MSRS 18
 #define MSRPM_OFFSETS  16
 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
 extern bool npt_enabled;
@@ -61,11 +72,13 @@ enum {
 
 struct kvm_sev_info {
        bool active;            /* SEV enabled guest */
+       bool es_active;         /* SEV-ES enabled guest */
        unsigned int asid;      /* ASID used for this guest */
        unsigned int handle;    /* SEV firmware handle */
        int fd;                 /* SEV device fd */
        unsigned long pages_locked; /* Number of pages locked */
        struct list_head regions_list;  /* List of registered regions */
+       u64 ap_jump_table;      /* SEV-ES AP Jump Table address */
 };
 
 struct kvm_svm {
@@ -106,6 +119,7 @@ struct vcpu_svm {
        struct vmcb *vmcb;
        unsigned long vmcb_pa;
        struct svm_cpu_data *svm_data;
+       u32 asid;
        uint64_t asid_generation;
        uint64_t sysenter_esp;
        uint64_t sysenter_eip;
@@ -166,6 +180,18 @@ struct vcpu_svm {
                DECLARE_BITMAP(read, MAX_DIRECT_ACCESS_MSRS);
                DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS);
        } shadow_msr_intercept;
+
+       /* SEV-ES support */
+       struct vmcb_save_area *vmsa;
+       struct ghcb *ghcb;
+       struct kvm_host_map ghcb_map;
+       bool received_first_sipi;
+
+       /* SEV-ES scratch area support */
+       void *ghcb_sa;
+       u64 ghcb_sa_len;
+       bool ghcb_sa_sync;
+       bool ghcb_sa_free;
 };
 
 struct svm_cpu_data {
@@ -193,6 +219,28 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
        return container_of(kvm, struct kvm_svm, kvm);
 }
 
+static inline bool sev_guest(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_AMD_SEV
+       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+       return sev->active;
+#else
+       return false;
+#endif
+}
+
+static inline bool sev_es_guest(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_AMD_SEV
+       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+       return sev_guest(kvm) && sev->es_active;
+#else
+       return false;
+#endif
+}
+
 static inline void vmcb_mark_all_dirty(struct vmcb *vmcb)
 {
        vmcb->control.clean = 0;
@@ -244,21 +292,24 @@ static inline void set_dr_intercepts(struct vcpu_svm *svm)
 {
        struct vmcb *vmcb = get_host_vmcb(svm);
 
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+       if (!sev_es_guest(svm->vcpu.kvm)) {
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
+       }
+
        vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
        vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
 
        recalc_intercepts(svm);
@@ -270,6 +321,12 @@ static inline void clr_dr_intercepts(struct vcpu_svm *svm)
 
        vmcb->control.intercepts[INTERCEPT_DR] = 0;
 
+       /* DR7 access must remain intercepted for an SEV-ES guest */
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
+       }
+
        recalc_intercepts(svm);
 }
 
@@ -351,6 +408,10 @@ static inline bool gif_set(struct vcpu_svm *svm)
 #define MSR_CR3_LONG_MBZ_MASK                  0xfff0000000000000U
 #define MSR_INVALID                            0xffffffffU
 
+extern int sev;
+extern int sev_es;
+extern bool dump_invalid_vmcb;
+
 u32 svm_msrpm_offset(u32 msr);
 u32 *svm_vcpu_alloc_msrpm(void);
 void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
@@ -358,13 +419,16 @@ void svm_vcpu_free_msrpm(u32 *msrpm);
 
 int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
 void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
-int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 void svm_flush_tlb(struct kvm_vcpu *vcpu);
 void disable_nmi_singlestep(struct vcpu_svm *svm);
 bool svm_smi_blocked(struct kvm_vcpu *vcpu);
 bool svm_nmi_blocked(struct kvm_vcpu *vcpu);
 bool svm_interrupt_blocked(struct kvm_vcpu *vcpu);
 void svm_set_gif(struct vcpu_svm *svm, bool value);
+int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code);
+void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
+                         int read, int write);
 
 /* nested.c */
 
@@ -470,18 +534,42 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
 
 /* sev.c */
 
-extern unsigned int max_sev_asid;
+#define GHCB_VERSION_MAX               1ULL
+#define GHCB_VERSION_MIN               1ULL
+
+#define GHCB_MSR_INFO_POS              0
+#define GHCB_MSR_INFO_MASK             (BIT_ULL(12) - 1)
+
+#define GHCB_MSR_SEV_INFO_RESP         0x001
+#define GHCB_MSR_SEV_INFO_REQ          0x002
+#define GHCB_MSR_VER_MAX_POS           48
+#define GHCB_MSR_VER_MAX_MASK          0xffff
+#define GHCB_MSR_VER_MIN_POS           32
+#define GHCB_MSR_VER_MIN_MASK          0xffff
+#define GHCB_MSR_CBIT_POS              24
+#define GHCB_MSR_CBIT_MASK             0xff
+#define GHCB_MSR_SEV_INFO(_max, _min, _cbit)                           \
+       ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) |   \
+        (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) |   \
+        (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) |        \
+        GHCB_MSR_SEV_INFO_RESP)
+
+#define GHCB_MSR_CPUID_REQ             0x004
+#define GHCB_MSR_CPUID_RESP            0x005
+#define GHCB_MSR_CPUID_FUNC_POS                32
+#define GHCB_MSR_CPUID_FUNC_MASK       0xffffffff
+#define GHCB_MSR_CPUID_VALUE_POS       32
+#define GHCB_MSR_CPUID_VALUE_MASK      0xffffffff
+#define GHCB_MSR_CPUID_REG_POS         30
+#define GHCB_MSR_CPUID_REG_MASK                0x3
+
+#define GHCB_MSR_TERM_REQ              0x100
+#define GHCB_MSR_TERM_REASON_SET_POS   12
+#define GHCB_MSR_TERM_REASON_SET_MASK  0xf
+#define GHCB_MSR_TERM_REASON_POS       16
+#define GHCB_MSR_TERM_REASON_MASK      0xff
 
-static inline bool sev_guest(struct kvm *kvm)
-{
-#ifdef CONFIG_KVM_AMD_SEV
-       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
-
-       return sev->active;
-#else
-       return false;
-#endif
-}
+extern unsigned int max_sev_asid;
 
 static inline bool svm_sev_enabled(void)
 {
@@ -495,7 +583,20 @@ int svm_register_enc_region(struct kvm *kvm,
 int svm_unregister_enc_region(struct kvm *kvm,
                              struct kvm_enc_region *range);
 void pre_sev_run(struct vcpu_svm *svm, int cpu);
-int __init sev_hardware_setup(void);
+void __init sev_hardware_setup(void);
 void sev_hardware_teardown(void);
+void sev_free_vcpu(struct kvm_vcpu *vcpu);
+int sev_handle_vmgexit(struct vcpu_svm *svm);
+int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
+void sev_es_init_vmcb(struct vcpu_svm *svm);
+void sev_es_create_vcpu(struct vcpu_svm *svm);
+void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu);
+void sev_es_vcpu_put(struct vcpu_svm *svm);
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
+
+/* vmenter.S */
+
+void __svm_sev_es_vcpu_run(unsigned long vmcb_pa);
+void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
 
 #endif
index 1ec1ac4..6feb8c0 100644 (file)
@@ -168,3 +168,53 @@ SYM_FUNC_START(__svm_vcpu_run)
        pop %_ASM_BP
        ret
 SYM_FUNC_END(__svm_vcpu_run)
+
+/**
+ * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode
+ * @vmcb_pa:   unsigned long
+ */
+SYM_FUNC_START(__svm_sev_es_vcpu_run)
+       push %_ASM_BP
+#ifdef CONFIG_X86_64
+       push %r15
+       push %r14
+       push %r13
+       push %r12
+#else
+       push %edi
+       push %esi
+#endif
+       push %_ASM_BX
+
+       /* Enter guest mode */
+       mov %_ASM_ARG1, %_ASM_AX
+       sti
+
+1:     vmrun %_ASM_AX
+       jmp 3f
+2:     cmpb $0, kvm_rebooting
+       jne 3f
+       ud2
+       _ASM_EXTABLE(1b, 2b)
+
+3:     cli
+
+#ifdef CONFIG_RETPOLINE
+       /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
+       FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+#endif
+
+       pop %_ASM_BX
+
+#ifdef CONFIG_X86_64
+       pop %r12
+       pop %r13
+       pop %r14
+       pop %r15
+#else
+       pop %esi
+       pop %edi
+#endif
+       pop %_ASM_BP
+       ret
+SYM_FUNC_END(__svm_sev_es_vcpu_run)
index aef960f..2de30c2 100644 (file)
@@ -1578,6 +1578,103 @@ TRACE_EVENT(kvm_hv_syndbg_get_msr,
                  __entry->vcpu_id, __entry->vp_index, __entry->msr,
                  __entry->data)
 );
+
+/*
+ * Tracepoint for the start of VMGEXIT processing
+ */
+TRACE_EVENT(kvm_vmgexit_enter,
+       TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb),
+       TP_ARGS(vcpu_id, ghcb),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, vcpu_id)
+               __field(u64, exit_reason)
+               __field(u64, info1)
+               __field(u64, info2)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id     = vcpu_id;
+               __entry->exit_reason = ghcb->save.sw_exit_code;
+               __entry->info1       = ghcb->save.sw_exit_info_1;
+               __entry->info2       = ghcb->save.sw_exit_info_2;
+       ),
+
+       TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx",
+                 __entry->vcpu_id, __entry->exit_reason,
+                 __entry->info1, __entry->info2)
+);
+
+/*
+ * Tracepoint for the end of VMGEXIT processing
+ */
+TRACE_EVENT(kvm_vmgexit_exit,
+       TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb),
+       TP_ARGS(vcpu_id, ghcb),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, vcpu_id)
+               __field(u64, exit_reason)
+               __field(u64, info1)
+               __field(u64, info2)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id     = vcpu_id;
+               __entry->exit_reason = ghcb->save.sw_exit_code;
+               __entry->info1       = ghcb->save.sw_exit_info_1;
+               __entry->info2       = ghcb->save.sw_exit_info_2;
+       ),
+
+       TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx",
+                 __entry->vcpu_id, __entry->exit_reason,
+                 __entry->info1, __entry->info2)
+);
+
+/*
+ * Tracepoint for the start of VMGEXIT MSR procotol processing
+ */
+TRACE_EVENT(kvm_vmgexit_msr_protocol_enter,
+       TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa),
+       TP_ARGS(vcpu_id, ghcb_gpa),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, vcpu_id)
+               __field(u64, ghcb_gpa)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id  = vcpu_id;
+               __entry->ghcb_gpa = ghcb_gpa;
+       ),
+
+       TP_printk("vcpu %u, ghcb_gpa %016llx",
+                 __entry->vcpu_id, __entry->ghcb_gpa)
+);
+
+/*
+ * Tracepoint for the end of VMGEXIT MSR procotol processing
+ */
+TRACE_EVENT(kvm_vmgexit_msr_protocol_exit,
+       TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa, int result),
+       TP_ARGS(vcpu_id, ghcb_gpa, result),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, vcpu_id)
+               __field(u64, ghcb_gpa)
+               __field(int, result)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id  = vcpu_id;
+               __entry->ghcb_gpa = ghcb_gpa;
+               __entry->result   = result;
+       ),
+
+       TP_printk("vcpu %u, ghcb_gpa %016llx, result %d",
+                 __entry->vcpu_id, __entry->ghcb_gpa, __entry->result)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
index f3199bb..41f2466 100644 (file)
@@ -326,7 +326,6 @@ bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa)
 
 uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
 {
-       struct vcpu_vmx *vmx = to_vmx(vcpu);
        /*
         * vmcs_version represents the range of supported Enlightened VMCS
         * versions: lower 8 bits is the minimal version, higher 8 bits is the
@@ -334,7 +333,7 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
         * KVM_EVMCS_VERSION.
         */
        if (kvm_cpu_cap_get(X86_FEATURE_VMX) &&
-           vmx->nested.enlightened_vmcs_enabled)
+           (!vcpu || to_vmx(vcpu)->nested.enlightened_vmcs_enabled))
                return (KVM_EVMCS_VERSION << 8) | 1;
 
        return 0;
index 89af692..0fbb469 100644 (file)
@@ -2952,7 +2952,8 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
 static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
 {
        if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
-              vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT))
+              vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT &&
+              vmcs12->guest_activity_state != GUEST_ACTIVITY_WAIT_SIPI))
                return -EINVAL;
 
        return 0;
@@ -3559,19 +3560,29 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
         */
        nested_cache_shadow_vmcs12(vcpu, vmcs12);
 
-       /*
-        * If we're entering a halted L2 vcpu and the L2 vcpu won't be
-        * awakened by event injection or by an NMI-window VM-exit or
-        * by an interrupt-window VM-exit, halt the vcpu.
-        */
-       if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
-           !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
-           !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_NMI_WINDOW_EXITING) &&
-           !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_INTR_WINDOW_EXITING) &&
-             (vmcs12->guest_rflags & X86_EFLAGS_IF))) {
+       switch (vmcs12->guest_activity_state) {
+       case GUEST_ACTIVITY_HLT:
+               /*
+                * If we're entering a halted L2 vcpu and the L2 vcpu won't be
+                * awakened by event injection or by an NMI-window VM-exit or
+                * by an interrupt-window VM-exit, halt the vcpu.
+                */
+               if (!(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
+                   !nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING) &&
+                   !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) &&
+                     (vmcs12->guest_rflags & X86_EFLAGS_IF))) {
+                       vmx->nested.nested_run_pending = 0;
+                       return kvm_vcpu_halt(vcpu);
+               }
+               break;
+       case GUEST_ACTIVITY_WAIT_SIPI:
                vmx->nested.nested_run_pending = 0;
-               return kvm_vcpu_halt(vcpu);
+               vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
+               break;
+       default:
+               break;
        }
+
        return 1;
 
 vmentry_failed:
@@ -3797,7 +3808,20 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
                        return -EBUSY;
                nested_vmx_update_pending_dbg(vcpu);
                clear_bit(KVM_APIC_INIT, &apic->pending_events);
-               nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
+               if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED)
+                       nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
+               return 0;
+       }
+
+       if (lapic_in_kernel(vcpu) &&
+           test_bit(KVM_APIC_SIPI, &apic->pending_events)) {
+               if (block_nested_events)
+                       return -EBUSY;
+
+               clear_bit(KVM_APIC_SIPI, &apic->pending_events);
+               if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
+                       nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0,
+                                               apic->sipi_vector & 0xFFUL);
                return 0;
        }
 
@@ -4036,6 +4060,8 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
        if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
                vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
+       else if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
+               vmcs12->guest_activity_state = GUEST_ACTIVITY_WAIT_SIPI;
        else
                vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
 
@@ -4416,6 +4442,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
        /* trying to cancel vmlaunch/vmresume is a bug */
        WARN_ON_ONCE(vmx->nested.nested_run_pending);
 
+       kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+
        /* Service the TLB flush request for L2 before switching to L1. */
        if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
                kvm_vcpu_flush_tlb_current(vcpu);
@@ -4814,7 +4842,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
        /*
         * The Intel VMX Instruction Reference lists a bunch of bits that are
         * prerequisite to running VMXON, most notably cr4.VMXE must be set to
-        * 1 (see vmx_set_cr4() for when we allow the guest to set this).
+        * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this).
         * Otherwise, we should fail with #UD.  But most faulting conditions
         * have already been checked by hardware, prior to the VM-exit for
         * VMXON.  We do test guest cr4.VMXE because processor CR4 always has
@@ -6483,7 +6511,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
        msrs->misc_low |=
                MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
                VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
-               VMX_MISC_ACTIVITY_HLT;
+               VMX_MISC_ACTIVITY_HLT |
+               VMX_MISC_ACTIVITY_WAIT_SIPI;
        msrs->misc_high = 0;
 
        /*
index 90ad7a6..e85aa5f 100644 (file)
@@ -132,7 +132,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
        mov (%_ASM_SP), %_ASM_AX
 
        /* Check if vmlaunch or vmresume is needed */
-       cmpb $0, %bl
+       testb %bl, %bl
 
        /* Load guest registers.  Don't clobber flags. */
        mov VCPU_RCX(%_ASM_AX), %_ASM_CX
index 47b8357..2af05d3 100644 (file)
@@ -40,7 +40,6 @@
 #include <asm/irq_remapping.h>
 #include <asm/kexec.h>
 #include <asm/perf_event.h>
-#include <asm/mce.h>
 #include <asm/mmu_context.h>
 #include <asm/mshyperv.h>
 #include <asm/mwait.h>
@@ -1826,7 +1825,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+                   !guest_has_spec_ctrl_msr(vcpu))
                        return 1;
 
                msr_info->data = to_vmx(vcpu)->spec_ctrl;
@@ -2028,7 +2027,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+                   !guest_has_spec_ctrl_msr(vcpu))
                        return 1;
 
                if (kvm_spec_ctrl_test_value(data))
@@ -2063,12 +2062,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                goto find_uret_msr;
        case MSR_IA32_PRED_CMD:
                if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+                   !guest_has_pred_cmd_msr(vcpu))
                        return 1;
 
                if (data & ~PRED_CMD_IBPB)
                        return 1;
-               if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL))
+               if (!boot_cpu_has(X86_FEATURE_IBPB))
                        return 1;
                if (!data)
                        break;
@@ -3095,8 +3094,25 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd,
                vmcs_writel(GUEST_CR3, guest_cr3);
 }
 
-int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
+       /*
+        * We operate under the default treatment of SMM, so VMX cannot be
+        * enabled under SMM.  Note, whether or not VMXE is allowed at all is
+        * handled by kvm_is_valid_cr4().
+        */
+       if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu))
+               return false;
+
+       if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
+               return false;
+
+       return true;
+}
+
+void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+       unsigned long old_cr4 = vcpu->arch.cr4;
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        /*
         * Pass through host's Machine Check Enable value to hw_cr4, which
@@ -3123,21 +3139,6 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
                }
        }
 
-       if (cr4 & X86_CR4_VMXE) {
-               /*
-                * To use VMXON (and later other VMX instructions), a guest
-                * must first be able to turn on cr4.VMXE (see handle_vmon()).
-                * So basically the check on whether to allow nested VMX
-                * is here.  We operate under the default treatment of SMM,
-                * so VMX cannot be enabled under SMM.
-                */
-               if (!nested_vmx_allowed(vcpu) || is_smm(vcpu))
-                       return 1;
-       }
-
-       if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
-               return 1;
-
        vcpu->arch.cr4 = cr4;
        kvm_register_mark_available(vcpu, VCPU_EXREG_CR4);
 
@@ -3168,7 +3169,9 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 
        vmcs_writel(CR4_READ_SHADOW, cr4);
        vmcs_writel(GUEST_CR4, hw_cr4);
-       return 0;
+
+       if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
+               kvm_update_cpuid_runtime(vcpu);
 }
 
 void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
@@ -3515,42 +3518,33 @@ bool __vmx_guest_state_valid(struct kvm_vcpu *vcpu)
        return true;
 }
 
-static int init_rmode_tss(struct kvm *kvm)
+static int init_rmode_tss(struct kvm *kvm, void __user *ua)
 {
-       gfn_t fn;
-       u16 data = 0;
-       int idx, r;
+       const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
+       u16 data;
+       int i;
+
+       for (i = 0; i < 3; i++) {
+               if (__copy_to_user(ua + PAGE_SIZE * i, zero_page, PAGE_SIZE))
+                       return -EFAULT;
+       }
 
-       idx = srcu_read_lock(&kvm->srcu);
-       fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
-       r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
-       if (r < 0)
-               goto out;
        data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
-       r = kvm_write_guest_page(kvm, fn++, &data,
-                       TSS_IOPB_BASE_OFFSET, sizeof(u16));
-       if (r < 0)
-               goto out;
-       r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
-       if (r < 0)
-               goto out;
-       r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
-       if (r < 0)
-               goto out;
+       if (__copy_to_user(ua + TSS_IOPB_BASE_OFFSET, &data, sizeof(u16)))
+               return -EFAULT;
+
        data = ~0;
-       r = kvm_write_guest_page(kvm, fn, &data,
-                                RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
-                                sizeof(u8));
-out:
-       srcu_read_unlock(&kvm->srcu, idx);
-       return r;
+       if (__copy_to_user(ua + RMODE_TSS_SIZE - 1, &data, sizeof(u8)))
+               return -EFAULT;
+
+       return 0;
 }
 
 static int init_rmode_identity_map(struct kvm *kvm)
 {
        struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
        int i, r = 0;
-       kvm_pfn_t identity_map_pfn;
+       void __user *uaddr;
        u32 tmp;
 
        /* Protect kvm_vmx->ept_identity_pagetable_done. */
@@ -3561,24 +3555,24 @@ static int init_rmode_identity_map(struct kvm *kvm)
 
        if (!kvm_vmx->ept_identity_map_addr)
                kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
-       identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
 
-       r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
-                                   kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
-       if (r < 0)
+       uaddr = __x86_set_memory_region(kvm,
+                                       IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
+                                       kvm_vmx->ept_identity_map_addr,
+                                       PAGE_SIZE);
+       if (IS_ERR(uaddr)) {
+               r = PTR_ERR(uaddr);
                goto out;
+       }
 
-       r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
-       if (r < 0)
-               goto out;
        /* Set up identity-mapping pagetable for EPT in real mode */
        for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
                tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
                        _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
-               r = kvm_write_guest_page(kvm, identity_map_pfn,
-                               &tmp, i * sizeof(tmp), sizeof(tmp));
-               if (r < 0)
+               if (__copy_to_user(uaddr + i * sizeof(tmp), &tmp, sizeof(tmp))) {
+                       r = -EFAULT;
                        goto out;
+               }
        }
        kvm_vmx->ept_identity_pagetable_done = true;
 
@@ -3605,19 +3599,22 @@ static void seg_setup(int seg)
 static int alloc_apic_access_page(struct kvm *kvm)
 {
        struct page *page;
-       int r = 0;
+       void __user *hva;
+       int ret = 0;
 
        mutex_lock(&kvm->slots_lock);
        if (kvm->arch.apic_access_page_done)
                goto out;
-       r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
-                                   APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
-       if (r)
+       hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+                                     APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
+       if (IS_ERR(hva)) {
+               ret = PTR_ERR(hva);
                goto out;
+       }
 
        page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
        if (is_error_page(page)) {
-               r = -EFAULT;
+               ret = -EFAULT;
                goto out;
        }
 
@@ -3629,7 +3626,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
        kvm->arch.apic_access_page_done = true;
 out:
        mutex_unlock(&kvm->slots_lock);
-       return r;
+       return ret;
 }
 
 int allocate_vpid(void)
@@ -4638,7 +4635,7 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 {
-       int ret;
+       void __user *ret;
 
        if (enable_unrestricted_guest)
                return 0;
@@ -4648,10 +4645,12 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
                                      PAGE_SIZE * 3);
        mutex_unlock(&kvm->slots_lock);
 
-       if (ret)
-               return ret;
+       if (IS_ERR(ret))
+               return PTR_ERR(ret);
+
        to_kvm_vmx(kvm)->tss_addr = addr;
-       return init_rmode_tss(kvm);
+
+       return init_rmode_tss(kvm, ret);
 }
 
 static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
@@ -4716,25 +4715,6 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
        return 1;
 }
 
-/*
- * Trigger machine check on the host. We assume all the MSRs are already set up
- * by the CPU and that we still run on the same CPU as the MCE occurred on.
- * We pass a fake environment to the machine check handler because we want
- * the guest to be always treated like user space, no matter what context
- * it used internally.
- */
-static void kvm_machine_check(void)
-{
-#if defined(CONFIG_X86_MCE)
-       struct pt_regs regs = {
-               .cs = 3, /* Fake ring 3 no matter what the guest ran on */
-               .flags = X86_EFLAGS_IF,
-       };
-
-       do_machine_check(&regs);
-#endif
-}
-
 static int handle_machine_check(struct kvm_vcpu *vcpu)
 {
        /* handled by vmx_vcpu_run() */
@@ -6399,7 +6379,11 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
                handle_exception_nmi_irqoff(vmx);
 }
 
-static bool vmx_has_emulated_msr(u32 index)
+/*
+ * The kvm parameter can be NULL (module initialization, or invocation before
+ * VM creation). Be sure to check the kvm parameter before using it.
+ */
+static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
 {
        switch (index) {
        case MSR_IA32_SMBASE:
@@ -7558,7 +7542,7 @@ static void enable_smi_window(struct kvm_vcpu *vcpu)
 
 static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
 {
-       return to_vmx(vcpu)->nested.vmxon;
+       return to_vmx(vcpu)->nested.vmxon && !is_guest_mode(vcpu);
 }
 
 static void vmx_migrate_timers(struct kvm_vcpu *vcpu)
@@ -7587,6 +7571,11 @@ static bool vmx_check_apicv_inhibit_reasons(ulong bit)
        return supported & BIT(bit);
 }
 
+static int vmx_cpu_dirty_log_size(void)
+{
+       return enable_pml ? PML_ENTITY_NUM : 0;
+}
+
 static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .hardware_unsetup = hardware_unsetup,
 
@@ -7616,6 +7605,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .get_cpl = vmx_get_cpl,
        .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
        .set_cr0 = vmx_set_cr0,
+       .is_valid_cr4 = vmx_is_valid_cr4,
        .set_cr4 = vmx_set_cr4,
        .set_efer = vmx_set_efer,
        .get_idt = vmx_get_idt,
@@ -7715,6 +7705,10 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .migrate_timers = vmx_migrate_timers,
 
        .msr_filter_changed = vmx_msr_filter_changed,
+       .complete_emulated_msr = kvm_complete_insn_gp,
+       .cpu_dirty_log_size = vmx_cpu_dirty_log_size,
+
+       .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
 };
 
 static __init int hardware_setup(void)
@@ -7832,6 +7826,7 @@ static __init int hardware_setup(void)
                vmx_x86_ops.slot_disable_log_dirty = NULL;
                vmx_x86_ops.flush_log_dirty = NULL;
                vmx_x86_ops.enable_log_dirty_pt_masked = NULL;
+               vmx_x86_ops.cpu_dirty_log_size = NULL;
        }
 
        if (!cpu_has_vmx_preemption_timer())
index f6f66e5..9d3a557 100644 (file)
@@ -321,7 +321,7 @@ u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu);
 void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask);
 int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer);
 void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
-int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 void set_cr4_guest_host_mask(struct vcpu_vmx *vmx);
 void ept_save_pdptrs(struct kvm_vcpu *vcpu);
 void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
index 078a39d..0287840 100644 (file)
@@ -197,7 +197,8 @@ EXPORT_SYMBOL_GPL(host_efer);
 bool __read_mostly allow_smaller_maxphyaddr = 0;
 EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
 
-static u64 __read_mostly host_xss;
+u64 __read_mostly host_xss;
+EXPORT_SYMBOL_GPL(host_xss);
 u64 __read_mostly supported_xss;
 EXPORT_SYMBOL_GPL(supported_xss);
 
@@ -804,11 +805,29 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(pdptrs_changed);
 
+void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
+{
+       unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
+
+       if ((cr0 ^ old_cr0) & X86_CR0_PG) {
+               kvm_clear_async_pf_completion_queue(vcpu);
+               kvm_async_pf_hash_reset(vcpu);
+       }
+
+       if ((cr0 ^ old_cr0) & update_bits)
+               kvm_mmu_reset_context(vcpu);
+
+       if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
+           kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
+           !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
+               kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
+}
+EXPORT_SYMBOL_GPL(kvm_post_set_cr0);
+
 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
        unsigned long old_cr0 = kvm_read_cr0(vcpu);
        unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
-       unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
 
        cr0 |= X86_CR0_ET;
 
@@ -847,18 +866,7 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
        kvm_x86_ops.set_cr0(vcpu, cr0);
 
-       if ((cr0 ^ old_cr0) & X86_CR0_PG) {
-               kvm_clear_async_pf_completion_queue(vcpu);
-               kvm_async_pf_hash_reset(vcpu);
-       }
-
-       if ((cr0 ^ old_cr0) & update_bits)
-               kvm_mmu_reset_context(vcpu);
-
-       if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
-           kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
-           !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
-               kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
+       kvm_post_set_cr0(vcpu, old_cr0, cr0);
 
        return 0;
 }
@@ -872,6 +880,9 @@ EXPORT_SYMBOL_GPL(kvm_lmsw);
 
 void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
 {
+       if (vcpu->arch.guest_state_protected)
+               return;
+
        if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
 
                if (vcpu->arch.xcr0 != host_xcr0)
@@ -892,6 +903,9 @@ EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
 
 void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
 {
+       if (vcpu->arch.guest_state_protected)
+               return;
+
        if (static_cpu_has(X86_FEATURE_PKU) &&
            (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
             (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) {
@@ -964,26 +978,36 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 }
 EXPORT_SYMBOL_GPL(kvm_set_xcr);
 
-int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
        if (cr4 & cr4_reserved_bits)
-               return -EINVAL;
+               return false;
 
        if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
-               return -EINVAL;
+               return false;
 
-       return 0;
+       return kvm_x86_ops.is_valid_cr4(vcpu, cr4);
+}
+EXPORT_SYMBOL_GPL(kvm_is_valid_cr4);
+
+void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4)
+{
+       unsigned long mmu_role_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
+                                     X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
+
+       if (((cr4 ^ old_cr4) & mmu_role_bits) ||
+           (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
+               kvm_mmu_reset_context(vcpu);
 }
-EXPORT_SYMBOL_GPL(kvm_valid_cr4);
+EXPORT_SYMBOL_GPL(kvm_post_set_cr4);
 
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
        unsigned long old_cr4 = kvm_read_cr4(vcpu);
        unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
                                   X86_CR4_SMEP;
-       unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE;
 
-       if (kvm_valid_cr4(vcpu, cr4))
+       if (!kvm_is_valid_cr4(vcpu, cr4))
                return 1;
 
        if (is_long_mode(vcpu)) {
@@ -1006,15 +1030,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
                        return 1;
        }
 
-       if (kvm_x86_ops.set_cr4(vcpu, cr4))
-               return 1;
+       kvm_x86_ops.set_cr4(vcpu, cr4);
 
-       if (((cr4 ^ old_cr4) & mmu_role_bits) ||
-           (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
-               kvm_mmu_reset_context(vcpu);
-
-       if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
-               kvm_update_cpuid_runtime(vcpu);
+       kvm_post_set_cr4(vcpu, old_cr4, cr4);
 
        return 0;
 }
@@ -1638,27 +1656,20 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
 }
 EXPORT_SYMBOL_GPL(kvm_set_msr);
 
-static int complete_emulated_msr(struct kvm_vcpu *vcpu, bool is_read)
+static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
 {
-       if (vcpu->run->msr.error) {
-               kvm_inject_gp(vcpu, 0);
-               return 1;
-       } else if (is_read) {
+       int err = vcpu->run->msr.error;
+       if (!err) {
                kvm_rax_write(vcpu, (u32)vcpu->run->msr.data);
                kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32);
        }
 
-       return kvm_skip_emulated_instruction(vcpu);
-}
-
-static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
-{
-       return complete_emulated_msr(vcpu, true);
+       return kvm_x86_ops.complete_emulated_msr(vcpu, err);
 }
 
 static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)
 {
-       return complete_emulated_msr(vcpu, false);
+       return kvm_x86_ops.complete_emulated_msr(vcpu, vcpu->run->msr.error);
 }
 
 static u64 kvm_msr_reason(int r)
@@ -1721,18 +1732,16 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
                return 0;
        }
 
-       /* MSR read failed? Inject a #GP */
-       if (r) {
+       if (!r) {
+               trace_kvm_msr_read(ecx, data);
+
+               kvm_rax_write(vcpu, data & -1u);
+               kvm_rdx_write(vcpu, (data >> 32) & -1u);
+       } else {
                trace_kvm_msr_read_ex(ecx);
-               kvm_inject_gp(vcpu, 0);
-               return 1;
        }
 
-       trace_kvm_msr_read(ecx, data);
-
-       kvm_rax_write(vcpu, data & -1u);
-       kvm_rdx_write(vcpu, (data >> 32) & -1u);
-       return kvm_skip_emulated_instruction(vcpu);
+       return kvm_x86_ops.complete_emulated_msr(vcpu, r);
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
 
@@ -1753,15 +1762,12 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
        if (r < 0)
                return r;
 
-       /* MSR write failed? Inject a #GP */
-       if (r > 0) {
+       if (!r)
+               trace_kvm_msr_write(ecx, data);
+       else
                trace_kvm_msr_write_ex(ecx, data);
-               kvm_inject_gp(vcpu, 0);
-               return 1;
-       }
 
-       trace_kvm_msr_write(ecx, data);
-       return kvm_skip_emulated_instruction(vcpu);
+       return kvm_x86_ops.complete_emulated_msr(vcpu, r);
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
 
@@ -3678,6 +3684,27 @@ static inline bool kvm_can_mwait_in_guest(void)
                boot_cpu_has(X86_FEATURE_ARAT);
 }
 
+static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu,
+                                           struct kvm_cpuid2 __user *cpuid_arg)
+{
+       struct kvm_cpuid2 cpuid;
+       int r;
+
+       r = -EFAULT;
+       if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
+               return r;
+
+       r = kvm_get_hv_cpuid(vcpu, &cpuid, cpuid_arg->entries);
+       if (r)
+               return r;
+
+       r = -EFAULT;
+       if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
+               return r;
+
+       return 0;
+}
+
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
        int r = 0;
@@ -3714,6 +3741,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_HYPERV_TLBFLUSH:
        case KVM_CAP_HYPERV_SEND_IPI:
        case KVM_CAP_HYPERV_CPUID:
+       case KVM_CAP_SYS_HYPERV_CPUID:
        case KVM_CAP_PCI_SEGMENT:
        case KVM_CAP_DEBUGREGS:
        case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -3762,7 +3790,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                 * fringe case that is not enabled except via specific settings
                 * of the module parameters.
                 */
-               r = kvm_x86_ops.has_emulated_msr(MSR_IA32_SMBASE);
+               r = kvm_x86_ops.has_emulated_msr(kvm, MSR_IA32_SMBASE);
                break;
        case KVM_CAP_VAPIC:
                r = !kvm_x86_ops.cpu_has_accelerated_tpr();
@@ -3899,6 +3927,9 @@ long kvm_arch_dev_ioctl(struct file *filp,
        case KVM_GET_MSRS:
                r = msr_io(NULL, argp, do_get_msr_feature, 1);
                break;
+       case KVM_GET_SUPPORTED_HV_CPUID:
+               r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp);
+               break;
        default:
                r = -EINVAL;
                break;
@@ -3997,7 +4028,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
        int idx;
 
-       if (vcpu->preempted)
+       if (vcpu->preempted && !vcpu->arch.guest_state_protected)
                vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu);
 
        /*
@@ -4051,21 +4082,23 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
 
 static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
 {
+       /*
+        * We can accept userspace's request for interrupt injection
+        * as long as we have a place to store the interrupt number.
+        * The actual injection will happen when the CPU is able to
+        * deliver the interrupt.
+        */
+       if (kvm_cpu_has_extint(vcpu))
+               return false;
+
+       /* Acknowledging ExtINT does not happen if LINT0 is masked.  */
        return (!lapic_in_kernel(vcpu) ||
                kvm_apic_accept_pic_intr(vcpu));
 }
 
-/*
- * if userspace requested an interrupt window, check that the
- * interrupt window is open.
- *
- * No need to exit to userspace if we already have an interrupt queued.
- */
 static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
 {
        return kvm_arch_interrupt_allowed(vcpu) &&
-               !kvm_cpu_has_interrupt(vcpu) &&
-               !kvm_event_needs_reinjection(vcpu) &&
                kvm_cpu_accept_dm_intr(vcpu);
 }
 
@@ -4479,6 +4512,9 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
                                         struct kvm_xsave *guest_xsave)
 {
+       if (!vcpu->arch.guest_fpu)
+               return;
+
        if (boot_cpu_has(X86_FEATURE_XSAVE)) {
                memset(guest_xsave, 0, sizeof(struct kvm_xsave));
                fill_xsave((u8 *) guest_xsave->region, vcpu);
@@ -4496,9 +4532,14 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
                                        struct kvm_xsave *guest_xsave)
 {
-       u64 xstate_bv =
-               *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
-       u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
+       u64 xstate_bv;
+       u32 mxcsr;
+
+       if (!vcpu->arch.guest_fpu)
+               return 0;
+
+       xstate_bv = *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
+       mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
 
        if (boot_cpu_has(X86_FEATURE_XSAVE)) {
                /*
@@ -4975,25 +5016,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                srcu_read_unlock(&vcpu->kvm->srcu, idx);
                break;
        }
-       case KVM_GET_SUPPORTED_HV_CPUID: {
-               struct kvm_cpuid2 __user *cpuid_arg = argp;
-               struct kvm_cpuid2 cpuid;
-
-               r = -EFAULT;
-               if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
-                       goto out;
-
-               r = kvm_vcpu_ioctl_get_hv_cpuid(vcpu, &cpuid,
-                                               cpuid_arg->entries);
-               if (r)
-                       goto out;
-
-               r = -EFAULT;
-               if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
-                       goto out;
-               r = 0;
+       case KVM_GET_SUPPORTED_HV_CPUID:
+               r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
                break;
-       }
        default:
                r = -EINVAL;
        }
@@ -5774,7 +5799,7 @@ static void kvm_init_msr_list(void)
        }
 
        for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
-               if (!kvm_x86_ops.has_emulated_msr(emulated_msrs_all[i]))
+               if (!kvm_x86_ops.has_emulated_msr(NULL, emulated_msrs_all[i]))
                        continue;
 
                emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
@@ -7951,17 +7976,22 @@ void kvm_arch_exit(void)
        kmem_cache_destroy(x86_fpu_cache);
 }
 
-int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
+int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
 {
        ++vcpu->stat.halt_exits;
        if (lapic_in_kernel(vcpu)) {
-               vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+               vcpu->arch.mp_state = state;
                return 1;
        } else {
-               vcpu->run->exit_reason = KVM_EXIT_HLT;
+               vcpu->run->exit_reason = reason;
                return 0;
        }
 }
+
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
+{
+       return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
+}
 EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
 
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
@@ -7975,6 +8005,14 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
+int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
+{
+       int ret = kvm_skip_emulated_instruction(vcpu);
+
+       return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, KVM_EXIT_AP_RESET_HOLD) && ret;
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
+
 #ifdef CONFIG_X86_64
 static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
                                unsigned long clock_type)
@@ -8156,7 +8194,14 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *kvm_run = vcpu->run;
 
-       kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
+       /*
+        * if_flag is obsolete and useless, so do not bother
+        * setting it for SEV-ES guests.  Userspace can just
+        * use kvm_run->ready_for_interrupt_injection.
+        */
+       kvm_run->if_flag = !vcpu->arch.guest_state_protected
+               && (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
+
        kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
        kvm_run->cr8 = kvm_get_cr8(vcpu);
        kvm_run->apic_base = kvm_get_apic_base(vcpu);
@@ -8746,9 +8791,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
        bool req_immediate_exit = false;
 
+       /* Forbid vmenter if vcpu dirty ring is soft-full */
+       if (unlikely(vcpu->kvm->dirty_ring_size &&
+                    kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) {
+               vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL;
+               trace_kvm_dirty_ring_exit(vcpu);
+               r = 0;
+               goto out;
+       }
+
        if (kvm_request_pending(vcpu)) {
                if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
-                       if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
+                       if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
+                               ;
+                       else if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
                                r = 0;
                                goto out;
                        }
@@ -9053,6 +9109,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
        kvm_apic_accept_events(vcpu);
        switch(vcpu->arch.mp_state) {
        case KVM_MP_STATE_HALTED:
+       case KVM_MP_STATE_AP_RESET_HOLD:
                vcpu->arch.pv.pv_unhalted = false;
                vcpu->arch.mp_state =
                        KVM_MP_STATE_RUNNABLE;
@@ -9221,9 +9278,14 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
        kvm_save_current_fpu(vcpu->arch.user_fpu);
 
-       /* PKRU is separately restored in kvm_x86_ops.run.  */
-       __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
-                               ~XFEATURE_MASK_PKRU);
+       /*
+        * Guests with protected state can't have it set by the hypervisor,
+        * so skip trying to set it.
+        */
+       if (vcpu->arch.guest_fpu)
+               /* PKRU is separately restored in kvm_x86_ops.run. */
+               __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
+                                       ~XFEATURE_MASK_PKRU);
 
        fpregs_mark_activate();
        fpregs_unlock();
@@ -9236,7 +9298,12 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
        fpregs_lock();
 
-       kvm_save_current_fpu(vcpu->arch.guest_fpu);
+       /*
+        * Guests with protected state can't have it read by the hypervisor,
+        * so skip trying to save it.
+        */
+       if (vcpu->arch.guest_fpu)
+               kvm_save_current_fpu(vcpu->arch.guest_fpu);
 
        copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
 
@@ -9415,6 +9482,9 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
        struct desc_ptr dt;
 
+       if (vcpu->arch.guest_state_protected)
+               goto skip_protected_regs;
+
        kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
        kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
        kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -9432,9 +9502,11 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
        sregs->gdt.limit = dt.size;
        sregs->gdt.base = dt.address;
 
-       sregs->cr0 = kvm_read_cr0(vcpu);
        sregs->cr2 = vcpu->arch.cr2;
        sregs->cr3 = kvm_read_cr3(vcpu);
+
+skip_protected_regs:
+       sregs->cr0 = kvm_read_cr0(vcpu);
        sregs->cr4 = kvm_read_cr4(vcpu);
        sregs->cr8 = kvm_get_cr8(vcpu);
        sregs->efer = vcpu->arch.efer;
@@ -9464,8 +9536,9 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
                kvm_load_guest_fpu(vcpu);
 
        kvm_apic_accept_events(vcpu);
-       if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
-                                       vcpu->arch.pv.pv_unhalted)
+       if ((vcpu->arch.mp_state == KVM_MP_STATE_HALTED ||
+            vcpu->arch.mp_state == KVM_MP_STATE_AP_RESET_HOLD) &&
+           vcpu->arch.pv.pv_unhalted)
                mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
        else
                mp_state->mp_state = vcpu->arch.mp_state;
@@ -9533,7 +9606,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 }
 EXPORT_SYMBOL_GPL(kvm_task_switch);
 
-static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
        if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
                /*
@@ -9541,31 +9614,29 @@ static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
                 * 64-bit mode (though maybe in a 32-bit code segment).
                 * CR4.PAE and EFER.LMA must be set.
                 */
-               if (!(sregs->cr4 & X86_CR4_PAE)
-                   || !(sregs->efer & EFER_LMA))
-                       return -EINVAL;
+               if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA))
+                       return false;
        } else {
                /*
                 * Not in 64-bit mode: EFER.LMA is clear and the code
                 * segment cannot be 64-bit.
                 */
                if (sregs->efer & EFER_LMA || sregs->cs.l)
-                       return -EINVAL;
+                       return false;
        }
 
-       return kvm_valid_cr4(vcpu, sregs->cr4);
+       return kvm_is_valid_cr4(vcpu, sregs->cr4);
 }
 
 static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
        struct msr_data apic_base_msr;
        int mmu_reset_needed = 0;
-       int cpuid_update_needed = 0;
        int pending_vec, max_bits, idx;
        struct desc_ptr dt;
        int ret = -EINVAL;
 
-       if (kvm_valid_sregs(vcpu, sregs))
+       if (!kvm_is_valid_sregs(vcpu, sregs))
                goto out;
 
        apic_base_msr.data = sregs->apic_base;
@@ -9573,6 +9644,9 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
        if (kvm_set_apic_base(vcpu, &apic_base_msr))
                goto out;
 
+       if (vcpu->arch.guest_state_protected)
+               goto skip_protected_regs;
+
        dt.size = sregs->idt.limit;
        dt.address = sregs->idt.base;
        kvm_x86_ops.set_idt(vcpu, &dt);
@@ -9595,11 +9669,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
        vcpu->arch.cr0 = sregs->cr0;
 
        mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
-       cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
-                               (X86_CR4_OSXSAVE | X86_CR4_PKE));
        kvm_x86_ops.set_cr4(vcpu, sregs->cr4);
-       if (cpuid_update_needed)
-               kvm_update_cpuid_runtime(vcpu);
 
        idx = srcu_read_lock(&vcpu->kvm->srcu);
        if (is_pae_paging(vcpu)) {
@@ -9611,14 +9681,6 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
        if (mmu_reset_needed)
                kvm_mmu_reset_context(vcpu);
 
-       max_bits = KVM_NR_INTERRUPTS;
-       pending_vec = find_first_bit(
-               (const unsigned long *)sregs->interrupt_bitmap, max_bits);
-       if (pending_vec < max_bits) {
-               kvm_queue_interrupt(vcpu, pending_vec, false);
-               pr_debug("Set back pending irq %d\n", pending_vec);
-       }
-
        kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
        kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
        kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -9637,6 +9699,15 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
            !is_protmode(vcpu))
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
+skip_protected_regs:
+       max_bits = KVM_NR_INTERRUPTS;
+       pending_vec = find_first_bit(
+               (const unsigned long *)sregs->interrupt_bitmap, max_bits);
+       if (pending_vec < max_bits) {
+               kvm_queue_interrupt(vcpu, pending_vec, false);
+               pr_debug("Set back pending irq %d\n", pending_vec);
+       }
+
        kvm_make_request(KVM_REQ_EVENT, vcpu);
 
        ret = 0;
@@ -9661,6 +9732,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
        unsigned long rflags;
        int i, r;
 
+       if (vcpu->arch.guest_state_protected)
+               return -EINVAL;
+
        vcpu_load(vcpu);
 
        if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
@@ -9740,6 +9814,9 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
        struct fxregs_state *fxsave;
 
+       if (!vcpu->arch.guest_fpu)
+               return 0;
+
        vcpu_load(vcpu);
 
        fxsave = &vcpu->arch.guest_fpu->state.fxsave;
@@ -9760,6 +9837,9 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
        struct fxregs_state *fxsave;
 
+       if (!vcpu->arch.guest_fpu)
+               return 0;
+
        vcpu_load(vcpu);
 
        fxsave = &vcpu->arch.guest_fpu->state.fxsave;
@@ -9818,6 +9898,9 @@ static int sync_regs(struct kvm_vcpu *vcpu)
 
 static void fx_init(struct kvm_vcpu *vcpu)
 {
+       if (!vcpu->arch.guest_fpu)
+               return;
+
        fpstate_init(&vcpu->arch.guest_fpu->state);
        if (boot_cpu_has(X86_FEATURE_XSAVES))
                vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
@@ -9831,6 +9914,15 @@ static void fx_init(struct kvm_vcpu *vcpu)
        vcpu->arch.cr0 |= X86_CR0_ET;
 }
 
+void kvm_free_guest_fpu(struct kvm_vcpu *vcpu)
+{
+       if (vcpu->arch.guest_fpu) {
+               kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
+               vcpu->arch.guest_fpu = NULL;
+       }
+}
+EXPORT_SYMBOL_GPL(kvm_free_guest_fpu);
+
 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
 {
        if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
@@ -9926,7 +10018,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
        return 0;
 
 free_guest_fpu:
-       kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
+       kvm_free_guest_fpu(vcpu);
 free_user_fpu:
        kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
 free_emulate_ctxt:
@@ -9980,7 +10072,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
        kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
        free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
        kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
-       kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
+       kvm_free_guest_fpu(vcpu);
 
        kvm_hv_vcpu_uninit(vcpu);
        kvm_pmu_destroy(vcpu);
@@ -10028,7 +10120,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
        kvm_async_pf_hash_reset(vcpu);
        vcpu->arch.apf.halted = false;
 
-       if (kvm_mpx_supported()) {
+       if (vcpu->arch.guest_fpu && kvm_mpx_supported()) {
                void *mpx_state_buffer;
 
                /*
@@ -10077,6 +10169,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
        kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
        kvm_rip_write(vcpu, 0);
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector);
 
 int kvm_arch_hardware_enable(void)
 {
@@ -10347,7 +10440,32 @@ void kvm_arch_sync_events(struct kvm *kvm)
        kvm_free_pit(kvm);
 }
 
-int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
+#define  ERR_PTR_USR(e)  ((void __user *)ERR_PTR(e))
+
+/**
+ * __x86_set_memory_region: Setup KVM internal memory slot
+ *
+ * @kvm: the kvm pointer to the VM.
+ * @id: the slot ID to setup.
+ * @gpa: the GPA to install the slot (unused when @size == 0).
+ * @size: the size of the slot. Set to zero to uninstall a slot.
+ *
+ * This function helps to setup a KVM internal memory slot.  Specify
+ * @size > 0 to install a new slot, while @size == 0 to uninstall a
+ * slot.  The return code can be one of the following:
+ *
+ *   HVA:           on success (uninstall will return a bogus HVA)
+ *   -errno:        on error
+ *
+ * The caller should always use IS_ERR() to check the return value
+ * before use.  Note, the KVM internal memory slots are guaranteed to
+ * remain valid and unchanged until the VM is destroyed, i.e., the
+ * GPA->HVA translation will not change.  However, the HVA is a user
+ * address, i.e. its accessibility is not guaranteed, and must be
+ * accessed via __copy_{to,from}_user().
+ */
+void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
+                                     u32 size)
 {
        int i, r;
        unsigned long hva, old_npages;
@@ -10356,12 +10474,12 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
 
        /* Called with kvm->slots_lock held.  */
        if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
-               return -EINVAL;
+               return ERR_PTR_USR(-EINVAL);
 
        slot = id_to_memslot(slots, id);
        if (size) {
                if (slot && slot->npages)
-                       return -EEXIST;
+                       return ERR_PTR_USR(-EEXIST);
 
                /*
                 * MAP_SHARED to prevent internal slot pages from being moved
@@ -10370,7 +10488,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
                hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
                              MAP_SHARED | MAP_ANONYMOUS, 0);
                if (IS_ERR((void *)hva))
-                       return PTR_ERR((void *)hva);
+                       return (void __user *)hva;
        } else {
                if (!slot || !slot->npages)
                        return 0;
@@ -10389,13 +10507,13 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
                m.memory_size = size;
                r = __kvm_set_memory_region(kvm, &m);
                if (r < 0)
-                       return r;
+                       return ERR_PTR_USR(r);
        }
 
        if (!size)
                vm_munmap(hva, old_npages * PAGE_SIZE);
 
-       return 0;
+       return (void __user *)hva;
 }
 EXPORT_SYMBOL_GPL(__x86_set_memory_region);
 
@@ -10752,6 +10870,10 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
 
 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
 {
+       /* Can't read the RIP when guest state is protected, just return 0 */
+       if (vcpu->arch.guest_state_protected)
+               return 0;
+
        if (is_64_bit_mode(vcpu))
                return kvm_rip_read(vcpu);
        return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
@@ -11261,6 +11383,179 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
 }
 EXPORT_SYMBOL_GPL(kvm_handle_invpcid);
 
+static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_mmio_fragment *frag;
+       unsigned int len;
+
+       BUG_ON(!vcpu->mmio_needed);
+
+       /* Complete previous fragment */
+       frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
+       len = min(8u, frag->len);
+       if (!vcpu->mmio_is_write)
+               memcpy(frag->data, run->mmio.data, len);
+
+       if (frag->len <= 8) {
+               /* Switch to the next fragment. */
+               frag++;
+               vcpu->mmio_cur_fragment++;
+       } else {
+               /* Go forward to the next mmio piece. */
+               frag->data += len;
+               frag->gpa += len;
+               frag->len -= len;
+       }
+
+       if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
+               vcpu->mmio_needed = 0;
+
+               // VMG change, at this point, we're always done
+               // RIP has already been advanced
+               return 1;
+       }
+
+       // More MMIO is needed
+       run->mmio.phys_addr = frag->gpa;
+       run->mmio.len = min(8u, frag->len);
+       run->mmio.is_write = vcpu->mmio_is_write;
+       if (run->mmio.is_write)
+               memcpy(run->mmio.data, frag->data, min(8u, frag->len));
+       run->exit_reason = KVM_EXIT_MMIO;
+
+       vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
+
+       return 0;
+}
+
+int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
+                         void *data)
+{
+       int handled;
+       struct kvm_mmio_fragment *frag;
+
+       if (!data)
+               return -EINVAL;
+
+       handled = write_emultor.read_write_mmio(vcpu, gpa, bytes, data);
+       if (handled == bytes)
+               return 1;
+
+       bytes -= handled;
+       gpa += handled;
+       data += handled;
+
+       /*TODO: Check if need to increment number of frags */
+       frag = vcpu->mmio_fragments;
+       vcpu->mmio_nr_fragments = 1;
+       frag->len = bytes;
+       frag->gpa = gpa;
+       frag->data = data;
+
+       vcpu->mmio_needed = 1;
+       vcpu->mmio_cur_fragment = 0;
+
+       vcpu->run->mmio.phys_addr = gpa;
+       vcpu->run->mmio.len = min(8u, frag->len);
+       vcpu->run->mmio.is_write = 1;
+       memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
+       vcpu->run->exit_reason = KVM_EXIT_MMIO;
+
+       vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_write);
+
+int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
+                        void *data)
+{
+       int handled;
+       struct kvm_mmio_fragment *frag;
+
+       if (!data)
+               return -EINVAL;
+
+       handled = read_emultor.read_write_mmio(vcpu, gpa, bytes, data);
+       if (handled == bytes)
+               return 1;
+
+       bytes -= handled;
+       gpa += handled;
+       data += handled;
+
+       /*TODO: Check if need to increment number of frags */
+       frag = vcpu->mmio_fragments;
+       vcpu->mmio_nr_fragments = 1;
+       frag->len = bytes;
+       frag->gpa = gpa;
+       frag->data = data;
+
+       vcpu->mmio_needed = 1;
+       vcpu->mmio_cur_fragment = 0;
+
+       vcpu->run->mmio.phys_addr = gpa;
+       vcpu->run->mmio.len = min(8u, frag->len);
+       vcpu->run->mmio.is_write = 0;
+       vcpu->run->exit_reason = KVM_EXIT_MMIO;
+
+       vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);
+
+static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
+{
+       memcpy(vcpu->arch.guest_ins_data, vcpu->arch.pio_data,
+              vcpu->arch.pio.count * vcpu->arch.pio.size);
+       vcpu->arch.pio.count = 0;
+
+       return 1;
+}
+
+static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
+                          unsigned int port, void *data,  unsigned int count)
+{
+       int ret;
+
+       ret = emulator_pio_out_emulated(vcpu->arch.emulate_ctxt, size, port,
+                                       data, count);
+       if (ret)
+               return ret;
+
+       vcpu->arch.pio.count = 0;
+
+       return 0;
+}
+
+static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
+                         unsigned int port, void *data, unsigned int count)
+{
+       int ret;
+
+       ret = emulator_pio_in_emulated(vcpu->arch.emulate_ctxt, size, port,
+                                      data, count);
+       if (ret) {
+               vcpu->arch.pio.count = 0;
+       } else {
+               vcpu->arch.guest_ins_data = data;
+               vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins;
+       }
+
+       return 0;
+}
+
+int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
+                        unsigned int port, void *data,  unsigned int count,
+                        int in)
+{
+       return in ? kvm_sev_es_ins(vcpu, size, port, data, count)
+                 : kvm_sev_es_outs(vcpu, size, port, data, count);
+}
+EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
@@ -11283,3 +11578,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
index e7ca622..c5ee0f5 100644 (file)
@@ -3,6 +3,7 @@
 #define ARCH_X86_KVM_X86_H
 
 #include <linux/kvm_host.h>
+#include <asm/mce.h>
 #include <asm/pvclock.h>
 #include "kvm_cache_regs.h"
 #include "kvm_emulate.h"
@@ -278,6 +279,7 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
 
 extern u64 host_xcr0;
 extern u64 supported_xcr0;
+extern u64 host_xss;
 extern u64 supported_xss;
 
 static inline bool kvm_mpx_supported(void)
@@ -366,10 +368,29 @@ static inline bool kvm_dr6_valid(u64 data)
        return !(data >> 32);
 }
 
+/*
+ * Trigger machine check on the host. We assume all the MSRs are already set up
+ * by the CPU and that we still run on the same CPU as the MCE occurred on.
+ * We pass a fake environment to the machine check handler because we want
+ * the guest to be always treated like user space, no matter what context
+ * it used internally.
+ */
+static inline void kvm_machine_check(void)
+{
+#if defined(CONFIG_X86_MCE)
+       struct pt_regs regs = {
+               .cs = 3, /* Fake ring 3 no matter what the guest ran on */
+               .flags = X86_EFLAGS_IF,
+       };
+
+       do_machine_check(&regs);
+#endif
+}
+
 void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
 void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
 int kvm_spec_ctrl_test_value(u64 value);
-int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu);
 int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
                              struct x86_exception *e);
@@ -407,4 +428,12 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
        __reserved_bits;                                \
 })
 
+int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes,
+                         void *dst);
+int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes,
+                        void *dst);
+int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
+                        unsigned int port, void *data,  unsigned int count,
+                        int in);
+
 #endif
index 58f7fb9..4229950 100644 (file)
@@ -63,13 +63,12 @@ static bool is_string_insn(struct insn *insn)
  */
 bool insn_has_rep_prefix(struct insn *insn)
 {
+       insn_byte_t p;
        int i;
 
        insn_get_prefixes(insn);
 
-       for (i = 0; i < insn->prefixes.nbytes; i++) {
-               insn_byte_t p = insn->prefixes.bytes[i];
-
+       for_each_insn_prefix(insn, i, p) {
                if (p == 0xf2 || p == 0xf3)
                        return true;
        }
@@ -95,14 +94,15 @@ static int get_seg_reg_override_idx(struct insn *insn)
 {
        int idx = INAT_SEG_REG_DEFAULT;
        int num_overrides = 0, i;
+       insn_byte_t p;
 
        insn_get_prefixes(insn);
 
        /* Look for any segment override prefixes. */
-       for (i = 0; i < insn->prefixes.nbytes; i++) {
+       for_each_insn_prefix(insn, i, p) {
                insn_attr_t attr;
 
-               attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]);
+               attr = inat_get_opcode_attribute(p);
                switch (attr) {
                case INAT_MAKE_PREFIX(INAT_PFX_CS):
                        idx = INAT_SEG_REG_CS;
index 733b983..6c5eb6f 100644 (file)
@@ -45,8 +45,8 @@
 #define PMD_FLAGS_LARGE                (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
 
 #define PMD_FLAGS_DEC          PMD_FLAGS_LARGE
-#define PMD_FLAGS_DEC_WP       ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
-                                (_PAGE_PAT | _PAGE_PWT))
+#define PMD_FLAGS_DEC_WP       ((PMD_FLAGS_DEC & ~_PAGE_LARGE_CACHE_MASK) | \
+                                (_PAGE_PAT_LARGE | _PAGE_PWT))
 
 #define PMD_FLAGS_ENC          (PMD_FLAGS_LARGE | _PAGE_ENC)
 
index 4414869..5eb4dc2 100644 (file)
@@ -938,6 +938,7 @@ int phys_to_target_node(phys_addr_t start)
 
        return meminfo_to_nid(&numa_reserved_meminfo, start);
 }
+EXPORT_SYMBOL_GPL(phys_to_target_node);
 
 int memory_add_physaddr_to_nid(u64 start)
 {
@@ -947,4 +948,5 @@ int memory_add_physaddr_to_nid(u64 start)
                nid = numa_meminfo.blk[0].nid;
        return nid;
 }
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
index 11666ba..569ac1d 100644 (file)
@@ -474,8 +474,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
        /*
         * The membarrier system call requires a full memory barrier and
         * core serialization before returning to user-space, after
-        * storing to rq->curr. Writing to CR3 provides that full
-        * memory barrier and core serializing instruction.
+        * storing to rq->curr, when changing mm.  This is because
+        * membarrier() sends IPIs to all CPUs that are in the target mm
+        * to make them issue memory barriers.  However, if another CPU
+        * switches to/from the target mm concurrently with
+        * membarrier(), it can cause that CPU not to receive an IPI
+        * when it really should issue a memory barrier.  Writing to CR3
+        * provides that full memory barrier and core serializing
+        * instruction.
         */
        if (real_prev == next) {
                VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
index 8f5759d..e1e8d4e 100644 (file)
@@ -78,28 +78,30 @@ int __init efi_alloc_page_tables(void)
        gfp_mask = GFP_KERNEL | __GFP_ZERO;
        efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
        if (!efi_pgd)
-               return -ENOMEM;
+               goto fail;
 
        pgd = efi_pgd + pgd_index(EFI_VA_END);
        p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END);
-       if (!p4d) {
-               free_page((unsigned long)efi_pgd);
-               return -ENOMEM;
-       }
+       if (!p4d)
+               goto free_pgd;
 
        pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
-       if (!pud) {
-               if (pgtable_l5_enabled())
-                       free_page((unsigned long) pgd_page_vaddr(*pgd));
-               free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);
-               return -ENOMEM;
-       }
+       if (!pud)
+               goto free_p4d;
 
        efi_mm.pgd = efi_pgd;
        mm_init_cpumask(&efi_mm);
        init_new_context(NULL, &efi_mm);
 
        return 0;
+
+free_p4d:
+       if (pgtable_l5_enabled())
+               free_page((unsigned long)pgd_page_vaddr(*pgd));
+free_pgd:
+       free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);
+fail:
+       return -ENOMEM;
 }
 
 /*
index 799f4eb..043c73d 100644 (file)
@@ -93,10 +93,20 @@ void xen_init_lock_cpu(int cpu)
 
 void xen_uninit_lock_cpu(int cpu)
 {
+       int irq;
+
        if (!xen_pvspin)
                return;
 
-       unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
+       /*
+        * When booting the kernel with 'mitigations=auto,nosmt', the secondary
+        * CPUs are not activated, and lock_kicker_irq is not initialized.
+        */
+       irq = per_cpu(lock_kicker_irq, cpu);
+       if (irq == -1)
+               return;
+
+       unbind_from_irqhandler(irq, NULL);
        per_cpu(lock_kicker_irq, cpu) = -1;
        kfree(per_cpu(irq_name, cpu));
        per_cpu(irq_name, cpu) = NULL;
index fa054a1..4dc04e6 100644 (file)
@@ -69,7 +69,7 @@
  */
 #define VMALLOC_START          (XCHAL_KSEG_CACHED_VADDR - 0x10000000)
 #define VMALLOC_END            (VMALLOC_START + 0x07FEFFFF)
-#define TLBTEMP_BASE_1         (VMALLOC_END + 1)
+#define TLBTEMP_BASE_1         (VMALLOC_START + 0x08000000)
 #define TLBTEMP_BASE_2         (TLBTEMP_BASE_1 + DCACHE_WAY_SIZE)
 #if 2 * DCACHE_WAY_SIZE > ICACHE_WAY_SIZE
 #define TLBTEMP_SIZE           (2 * DCACHE_WAY_SIZE)
index b975811..5c9fb80 100644 (file)
@@ -302,7 +302,7 @@ strncpy_from_user(char *dst, const char __user *src, long count)
        return -EFAULT;
 }
 #else
-long strncpy_from_user(char *dst, const char *src, long count);
+long strncpy_from_user(char *dst, const char __user *src, long count);
 #endif
 
 /*
index 5835406..085b8c7 100644 (file)
@@ -70,8 +70,10 @@ static inline void kmap_invalidate_coherent(struct page *page,
                        kvaddr = TLBTEMP_BASE_1 +
                                (page_to_phys(page) & DCACHE_ALIAS_MASK);
 
+                       preempt_disable();
                        __invalidate_dcache_page_alias(kvaddr,
                                                       page_to_phys(page));
+                       preempt_enable();
                }
        }
 }
@@ -156,6 +158,7 @@ void flush_dcache_page(struct page *page)
                if (!alias && !mapping)
                        return;
 
+               preempt_disable();
                virt = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK);
                __flush_invalidate_dcache_page_alias(virt, phys);
 
@@ -166,6 +169,7 @@ void flush_dcache_page(struct page *page)
 
                if (mapping)
                        __invalidate_icache_page_alias(virt, phys);
+               preempt_enable();
        }
 
        /* There shouldn't be an entry in the cache for this page anymore. */
@@ -199,8 +203,10 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address,
        unsigned long phys = page_to_phys(pfn_to_page(pfn));
        unsigned long virt = TLBTEMP_BASE_1 + (address & DCACHE_ALIAS_MASK);
 
+       preempt_disable();
        __flush_invalidate_dcache_page_alias(virt, phys);
        __invalidate_icache_page_alias(virt, phys);
+       preempt_enable();
 }
 EXPORT_SYMBOL(local_flush_cache_page);
 
@@ -227,11 +233,13 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep)
                unsigned long phys = page_to_phys(page);
                unsigned long tmp;
 
+               preempt_disable();
                tmp = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK);
                __flush_invalidate_dcache_page_alias(tmp, phys);
                tmp = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK);
                __flush_invalidate_dcache_page_alias(tmp, phys);
                __invalidate_icache_page_alias(tmp, phys);
+               preempt_enable();
 
                clear_bit(PG_arch_1, &page->flags);
        }
@@ -265,7 +273,9 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 
        if (alias) {
                unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK);
+               preempt_disable();
                __flush_invalidate_dcache_page_alias(t, phys);
+               preempt_enable();
        }
 
        /* Copy data */
@@ -280,9 +290,11 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
        if (alias) {
                unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK);
 
+               preempt_disable();
                __flush_invalidate_dcache_range((unsigned long) dst, len);
                if ((vma->vm_flags & VM_EXEC) != 0)
                        __invalidate_icache_page_alias(t, phys);
+               preempt_enable();
 
        } else if ((vma->vm_flags & VM_EXEC) != 0) {
                __flush_dcache_range((unsigned long)dst,len);
@@ -304,7 +316,9 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
 
        if (alias) {
                unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK);
+               preempt_disable();
                __flush_invalidate_dcache_page_alias(t, phys);
+               preempt_enable();
        }
 
        memcpy(dst, src, len);
index c68bdf5..54fbe1e 100644 (file)
@@ -849,6 +849,7 @@ static void blkcg_fill_root_iostats(void)
                        blkg_iostat_set(&blkg->iostat.cur, &tmp);
                        u64_stats_update_end(&blkg->iostat.sync);
                }
+               disk_put_part(part);
        }
 }
 
index e32958f..fd5cee9 100644 (file)
@@ -225,13 +225,18 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
        /* release the tag's ownership to the req cloned from */
        spin_lock_irqsave(&fq->mq_flush_lock, flags);
 
-       WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE);
        if (!refcount_dec_and_test(&flush_rq->ref)) {
                fq->rq_status = error;
                spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
                return;
        }
 
+       /*
+        * Flush request has to be marked as IDLE when it is really ended
+        * because its .end_io() is called from timeout code path too for
+        * avoiding use-after-free.
+        */
+       WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE);
        if (fq->rq_status != BLK_STS_OK)
                error = fq->rq_status;
 
index bcf5e45..97b7c28 100644 (file)
@@ -144,7 +144,7 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q,
 static inline unsigned get_max_io_size(struct request_queue *q,
                                       struct bio *bio)
 {
-       unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
+       unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector, 0);
        unsigned max_sectors = sectors;
        unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT;
        unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT;
index 9741d1d..659cdb8 100644 (file)
@@ -547,7 +547,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 
        t->io_min = max(t->io_min, b->io_min);
        t->io_opt = lcm_not_zero(t->io_opt, b->io_opt);
-       t->chunk_sectors = lcm_not_zero(t->chunk_sectors, b->chunk_sectors);
+
+       /* Set non-power-of-2 compatible chunk_sectors boundary */
+       if (b->chunk_sectors)
+               t->chunk_sectors = gcd(t->chunk_sectors, b->chunk_sectors);
 
        /* Physical block size a multiple of the logical block size? */
        if (t->physical_block_size & (t->logical_block_size - 1)) {
index 35abcb1..86f8195 100644 (file)
@@ -103,6 +103,13 @@ int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots)
        spin_lock_init(&ksm->idle_slots_lock);
 
        slot_hashtable_size = roundup_pow_of_two(num_slots);
+       /*
+        * hash_ptr() assumes bits != 0, so ensure the hash table has at least 2
+        * buckets.  This only makes a difference when there is only 1 keyslot.
+        */
+       if (slot_hashtable_size < 2)
+               slot_hashtable_size = 2;
+
        ksm->log_slot_ht_size = ilog2(slot_hashtable_size);
        ksm->slot_hashtable = kvmalloc_array(slot_hashtable_size,
                                             sizeof(ksm->slot_hashtable[0]),
index c0cd1b9..5762280 100644 (file)
@@ -145,6 +145,7 @@ obj-$(CONFIG_OF)            += of/
 obj-$(CONFIG_SSB)              += ssb/
 obj-$(CONFIG_BCMA)             += bcma/
 obj-$(CONFIG_VHOST_RING)       += vhost/
+obj-$(CONFIG_VHOST_IOTLB)      += vhost/
 obj-$(CONFIG_VHOST)            += vhost/
 obj-$(CONFIG_VLYNQ)            += vlynq/
 obj-$(CONFIG_GREYBUS)          += greybus/
index ecc3998..6284aff 100644 (file)
@@ -47,9 +47,12 @@ static int spk_ttyio_ldisc_open(struct tty_struct *tty)
 {
        struct spk_ldisc_data *ldisc_data;
 
+       if (tty != speakup_tty)
+               /* Somebody tried to use this line discipline outside speakup */
+               return -ENODEV;
+
        if (!tty->ops->write)
                return -EOPNOTSUPP;
-       speakup_tty = tty;
 
        ldisc_data = kmalloc(sizeof(*ldisc_data), GFP_KERNEL);
        if (!ldisc_data)
@@ -57,7 +60,7 @@ static int spk_ttyio_ldisc_open(struct tty_struct *tty)
 
        init_completion(&ldisc_data->completion);
        ldisc_data->buf_free = true;
-       speakup_tty->disc_data = ldisc_data;
+       tty->disc_data = ldisc_data;
 
        return 0;
 }
@@ -181,9 +184,25 @@ static int spk_ttyio_initialise_ldisc(struct spk_synth *synth)
 
        tty_unlock(tty);
 
+       mutex_lock(&speakup_tty_mutex);
+       speakup_tty = tty;
        ret = tty_set_ldisc(tty, N_SPEAKUP);
        if (ret)
-               pr_err("speakup: Failed to set N_SPEAKUP on tty\n");
+               speakup_tty = NULL;
+       mutex_unlock(&speakup_tty_mutex);
+
+       if (!ret)
+               /* Success */
+               return 0;
+
+       pr_err("speakup: Failed to set N_SPEAKUP on tty\n");
+
+       tty_lock(tty);
+       if (tty->ops->close)
+               tty->ops->close(tty, NULL);
+       tty_unlock(tty);
+
+       tty_kclose(tty);
 
        return ret;
 }
index 552fd9f..3294cc8 100644 (file)
@@ -633,6 +633,10 @@ int apei_map_generic_address(struct acpi_generic_address *reg)
        if (rc)
                return rc;
 
+       /* IO space doesn't need mapping */
+       if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO)
+               return 0;
+
        if (!acpi_os_map_generic_address(reg))
                return -ENXIO;
 
index 9929ff5..770d840 100644 (file)
@@ -44,7 +44,7 @@ static DEFINE_SPINLOCK(iort_fwnode_lock);
  * iort_set_fwnode() - Create iort_fwnode and use it to register
  *                    iommu data in the iort_fwnode_list
  *
- * @node: IORT table node associated with the IOMMU
+ * @iort_node: IORT table node associated with the IOMMU
  * @fwnode: fwnode associated with the IORT node
  *
  * Returns: 0 on success
@@ -673,7 +673,8 @@ static int iort_dev_find_its_id(struct device *dev, u32 id,
 /**
  * iort_get_device_domain() - Find MSI domain related to a device
  * @dev: The device.
- * @req_id: Requester ID for the device.
+ * @id: Requester ID for the device.
+ * @bus_token: irq domain bus token.
  *
  * Returns: the MSI domain for this device, NULL otherwise
  */
@@ -1136,7 +1137,7 @@ static int rc_dma_get_range(struct device *dev, u64 *size)
  *
  * @dev: device to configure
  * @dma_addr: device DMA address result pointer
- * @size: DMA range size result pointer
+ * @dma_size: DMA range size result pointer
  */
 void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size)
 {
@@ -1526,6 +1527,7 @@ static __init const struct iort_dev_config *iort_get_dev_cfg(
 /**
  * iort_add_platform_device() - Allocate a platform device for IORT node
  * @node: Pointer to device ACPI IORT node
+ * @ops: Pointer to IORT device config struct
  *
  * Returns: 0 on success, <0 failure
  */
index 48354f8..66c3983 100644 (file)
@@ -352,6 +352,7 @@ static int acpi_fan_get_fps(struct acpi_device *device)
                struct acpi_fan_fps *fps = &fan->fps[i];
 
                snprintf(fps->name, ACPI_FPS_NAME_LEN, "state%d", i);
+               sysfs_attr_init(&fps->dev_attr.attr);
                fps->dev_attr.show = show_state;
                fps->dev_attr.store = NULL;
                fps->dev_attr.attr.name = fps->name;
index 7af74fb..09ad733 100644 (file)
@@ -1706,6 +1706,8 @@ static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb)
 
        if (push_scqe(card, vc, scq, &scqe, skb) != 0) {
                atomic_inc(&vcc->stats->tx_err);
+               dma_unmap_single(&card->pcidev->dev, NS_PRV_DMA(skb), skb->len,
+                                DMA_TO_DEVICE);
                dev_kfree_skb_any(skb);
                return -EIO;
        }
index 501e9da..9ebf539 100644 (file)
@@ -132,73 +132,12 @@ module_param(log_stats, int, 0644);
 
 #define BLKBACK_INVALID_HANDLE (~0)
 
-/* Number of free pages to remove on each call to gnttab_free_pages */
-#define NUM_BATCH_FREE_PAGES 10
-
 static inline bool persistent_gnt_timeout(struct persistent_gnt *persistent_gnt)
 {
        return pgrant_timeout && (jiffies - persistent_gnt->last_used >=
                        HZ * pgrant_timeout);
 }
 
-static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&ring->free_pages_lock, flags);
-       if (list_empty(&ring->free_pages)) {
-               BUG_ON(ring->free_pages_num != 0);
-               spin_unlock_irqrestore(&ring->free_pages_lock, flags);
-               return gnttab_alloc_pages(1, page);
-       }
-       BUG_ON(ring->free_pages_num == 0);
-       page[0] = list_first_entry(&ring->free_pages, struct page, lru);
-       list_del(&page[0]->lru);
-       ring->free_pages_num--;
-       spin_unlock_irqrestore(&ring->free_pages_lock, flags);
-
-       return 0;
-}
-
-static inline void put_free_pages(struct xen_blkif_ring *ring, struct page **page,
-                                  int num)
-{
-       unsigned long flags;
-       int i;
-
-       spin_lock_irqsave(&ring->free_pages_lock, flags);
-       for (i = 0; i < num; i++)
-               list_add(&page[i]->lru, &ring->free_pages);
-       ring->free_pages_num += num;
-       spin_unlock_irqrestore(&ring->free_pages_lock, flags);
-}
-
-static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
-{
-       /* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
-       struct page *page[NUM_BATCH_FREE_PAGES];
-       unsigned int num_pages = 0;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ring->free_pages_lock, flags);
-       while (ring->free_pages_num > num) {
-               BUG_ON(list_empty(&ring->free_pages));
-               page[num_pages] = list_first_entry(&ring->free_pages,
-                                                  struct page, lru);
-               list_del(&page[num_pages]->lru);
-               ring->free_pages_num--;
-               if (++num_pages == NUM_BATCH_FREE_PAGES) {
-                       spin_unlock_irqrestore(&ring->free_pages_lock, flags);
-                       gnttab_free_pages(num_pages, page);
-                       spin_lock_irqsave(&ring->free_pages_lock, flags);
-                       num_pages = 0;
-               }
-       }
-       spin_unlock_irqrestore(&ring->free_pages_lock, flags);
-       if (num_pages != 0)
-               gnttab_free_pages(num_pages, page);
-}
-
 #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
 
 static int do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags);
@@ -331,7 +270,8 @@ static void free_persistent_gnts(struct xen_blkif_ring *ring, struct rb_root *ro
                        unmap_data.count = segs_to_unmap;
                        BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
 
-                       put_free_pages(ring, pages, segs_to_unmap);
+                       gnttab_page_cache_put(&ring->free_pages, pages,
+                                             segs_to_unmap);
                        segs_to_unmap = 0;
                }
 
@@ -371,7 +311,8 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
                if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
                        unmap_data.count = segs_to_unmap;
                        BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
-                       put_free_pages(ring, pages, segs_to_unmap);
+                       gnttab_page_cache_put(&ring->free_pages, pages,
+                                             segs_to_unmap);
                        segs_to_unmap = 0;
                }
                kfree(persistent_gnt);
@@ -379,7 +320,7 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
        if (segs_to_unmap > 0) {
                unmap_data.count = segs_to_unmap;
                BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
-               put_free_pages(ring, pages, segs_to_unmap);
+               gnttab_page_cache_put(&ring->free_pages, pages, segs_to_unmap);
        }
 }
 
@@ -664,9 +605,10 @@ purge_gnt_list:
 
                /* Shrink the free pages pool if it is too large. */
                if (time_before(jiffies, blkif->buffer_squeeze_end))
-                       shrink_free_pagepool(ring, 0);
+                       gnttab_page_cache_shrink(&ring->free_pages, 0);
                else
-                       shrink_free_pagepool(ring, max_buffer_pages);
+                       gnttab_page_cache_shrink(&ring->free_pages,
+                                                max_buffer_pages);
 
                if (log_stats && time_after(jiffies, ring->st_print))
                        print_stats(ring);
@@ -697,7 +639,7 @@ void xen_blkbk_free_caches(struct xen_blkif_ring *ring)
        ring->persistent_gnt_c = 0;
 
        /* Since we are shutting down remove all pages from the buffer */
-       shrink_free_pagepool(ring, 0 /* All */);
+       gnttab_page_cache_shrink(&ring->free_pages, 0 /* All */);
 }
 
 static unsigned int xen_blkbk_unmap_prepare(
@@ -736,7 +678,7 @@ static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_
           but is this the best way to deal with this? */
        BUG_ON(result);
 
-       put_free_pages(ring, data->pages, data->count);
+       gnttab_page_cache_put(&ring->free_pages, data->pages, data->count);
        make_response(ring, pending_req->id,
                      pending_req->operation, pending_req->status);
        free_req(ring, pending_req);
@@ -803,7 +745,8 @@ static void xen_blkbk_unmap(struct xen_blkif_ring *ring,
                if (invcount) {
                        ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
                        BUG_ON(ret);
-                       put_free_pages(ring, unmap_pages, invcount);
+                       gnttab_page_cache_put(&ring->free_pages, unmap_pages,
+                                             invcount);
                }
                pages += batch;
                num -= batch;
@@ -850,7 +793,8 @@ again:
                        pages[i]->page = persistent_gnt->page;
                        pages[i]->persistent_gnt = persistent_gnt;
                } else {
-                       if (get_free_page(ring, &pages[i]->page))
+                       if (gnttab_page_cache_get(&ring->free_pages,
+                                                 &pages[i]->page))
                                goto out_of_memory;
                        addr = vaddr(pages[i]->page);
                        pages_to_gnt[segs_to_map] = pages[i]->page;
@@ -883,7 +827,8 @@ again:
                        BUG_ON(new_map_idx >= segs_to_map);
                        if (unlikely(map[new_map_idx].status != 0)) {
                                pr_debug("invalid buffer -- could not remap it\n");
-                               put_free_pages(ring, &pages[seg_idx]->page, 1);
+                               gnttab_page_cache_put(&ring->free_pages,
+                                                     &pages[seg_idx]->page, 1);
                                pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
                                ret |= 1;
                                goto next;
@@ -944,7 +889,7 @@ next:
 
 out_of_memory:
        pr_alert("%s: out of memory\n", __func__);
-       put_free_pages(ring, pages_to_gnt, segs_to_map);
+       gnttab_page_cache_put(&ring->free_pages, pages_to_gnt, segs_to_map);
        for (i = last_map; i < num; i++)
                pages[i]->handle = BLKBACK_INVALID_HANDLE;
        return -ENOMEM;
index c6ea5d3..a1b9df2 100644 (file)
@@ -288,9 +288,7 @@ struct xen_blkif_ring {
        struct work_struct      persistent_purge_work;
 
        /* Buffer of free pages to map grant refs. */
-       spinlock_t              free_pages_lock;
-       int                     free_pages_num;
-       struct list_head        free_pages;
+       struct gnttab_page_cache free_pages;
 
        struct work_struct      free_work;
        /* Thread shutdown wait queue. */
index f570556..76912c5 100644 (file)
@@ -144,8 +144,7 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
                INIT_LIST_HEAD(&ring->pending_free);
                INIT_LIST_HEAD(&ring->persistent_purge_list);
                INIT_WORK(&ring->persistent_purge_work, xen_blkbk_unmap_purged_grants);
-               spin_lock_init(&ring->free_pages_lock);
-               INIT_LIST_HEAD(&ring->free_pages);
+               gnttab_page_cache_init(&ring->free_pages);
 
                spin_lock_init(&ring->pending_free_lock);
                init_waitqueue_head(&ring->pending_free_wq);
@@ -317,8 +316,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
                BUG_ON(atomic_read(&ring->persistent_gnt_in_use) != 0);
                BUG_ON(!list_empty(&ring->persistent_purge_list));
                BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
-               BUG_ON(!list_empty(&ring->free_pages));
-               BUG_ON(ring->free_pages_num != 0);
+               BUG_ON(ring->free_pages.num_pages != 0);
                BUG_ON(ring->persistent_gnt_c != 0);
                WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
                ring->active = false;
index efb088d..92ecf1a 100644 (file)
@@ -227,6 +227,9 @@ static int sysc_wait_softreset(struct sysc *ddata)
        u32 sysc_mask, syss_done, rstval;
        int syss_offset, error = 0;
 
+       if (ddata->cap->regbits->srst_shift < 0)
+               return 0;
+
        syss_offset = ddata->offsets[SYSC_SYSSTATUS];
        sysc_mask = BIT(ddata->cap->regbits->srst_shift);
 
@@ -970,9 +973,15 @@ static int sysc_enable_module(struct device *dev)
                        return error;
                }
        }
-       error = sysc_wait_softreset(ddata);
-       if (error)
-               dev_warn(ddata->dev, "OCP softreset timed out\n");
+       /*
+        * Some modules like i2c and hdq1w have unusable reset status unless
+        * the module reset quirk is enabled. Skip status check on enable.
+        */
+       if (!(ddata->cfg.quirks & SYSC_MODULE_QUIRK_ENA_RESETDONE)) {
+               error = sysc_wait_softreset(ddata);
+               if (error)
+                       dev_warn(ddata->dev, "OCP softreset timed out\n");
+       }
        if (ddata->cfg.quirks & SYSC_QUIRK_OPT_CLKS_IN_RESET)
                sysc_disable_opt_clocks(ddata);
 
@@ -1373,17 +1382,17 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
        SYSC_QUIRK("hdmi", 0, 0, 0x10, -ENODEV, 0x50030200, 0xffffffff,
                   SYSC_QUIRK_OPT_CLKS_NEEDED),
        SYSC_QUIRK("hdq1w", 0, 0, 0x14, 0x18, 0x00000006, 0xffffffff,
-                  SYSC_MODULE_QUIRK_HDQ1W),
+                  SYSC_MODULE_QUIRK_HDQ1W | SYSC_MODULE_QUIRK_ENA_RESETDONE),
        SYSC_QUIRK("hdq1w", 0, 0, 0x14, 0x18, 0x0000000a, 0xffffffff,
-                  SYSC_MODULE_QUIRK_HDQ1W),
+                  SYSC_MODULE_QUIRK_HDQ1W | SYSC_MODULE_QUIRK_ENA_RESETDONE),
        SYSC_QUIRK("i2c", 0, 0, 0x20, 0x10, 0x00000036, 0x000000ff,
-                  SYSC_MODULE_QUIRK_I2C),
+                  SYSC_MODULE_QUIRK_I2C | SYSC_MODULE_QUIRK_ENA_RESETDONE),
        SYSC_QUIRK("i2c", 0, 0, 0x20, 0x10, 0x0000003c, 0x000000ff,
-                  SYSC_MODULE_QUIRK_I2C),
+                  SYSC_MODULE_QUIRK_I2C | SYSC_MODULE_QUIRK_ENA_RESETDONE),
        SYSC_QUIRK("i2c", 0, 0, 0x20, 0x10, 0x00000040, 0x000000ff,
-                  SYSC_MODULE_QUIRK_I2C),
+                  SYSC_MODULE_QUIRK_I2C | SYSC_MODULE_QUIRK_ENA_RESETDONE),
        SYSC_QUIRK("i2c", 0, 0, 0x10, 0x90, 0x5040000a, 0xfffff0f0,
-                  SYSC_MODULE_QUIRK_I2C),
+                  SYSC_MODULE_QUIRK_I2C | SYSC_MODULE_QUIRK_ENA_RESETDONE),
        SYSC_QUIRK("gpu", 0x50000000, 0x14, -ENODEV, -ENODEV, 0x00010201, 0xffffffff, 0),
        SYSC_QUIRK("gpu", 0x50000000, 0xfe00, 0xfe10, -ENODEV, 0x40000000 , 0xffffffff,
                   SYSC_MODULE_QUIRK_SGX),
@@ -2880,7 +2889,7 @@ static int sysc_check_active_timer(struct sysc *ddata)
 
        if ((ddata->cfg.quirks & SYSC_QUIRK_NO_RESET_ON_INIT) &&
            (ddata->cfg.quirks & SYSC_QUIRK_NO_IDLE))
-               return -EBUSY;
+               return -ENXIO;
 
        return 0;
 }
index 3b393cb..3061896 100644 (file)
@@ -5,8 +5,8 @@ config MXC_CLK
        depends on ARCH_MXC || COMPILE_TEST
 
 config MXC_CLK_SCU
-       tristate "IMX SCU clock"
-       depends on ARCH_MXC || COMPILE_TEST
+       tristate
+       depends on ARCH_MXC
        depends on IMX_SCU && HAVE_ARM_SMCCC
 
 config CLK_IMX1
index d900f6b..892e91b 100644 (file)
@@ -55,7 +55,7 @@ struct r9a06g032_clkdesc {
                        u16 sel, g1, r1, g2, r2;
                } dual;
        };
-} __packed;
+};
 
 #define I_GATE(_clk, _rst, _rdy, _midle, _scon, _mirack, _mistat) \
        { .gate = _clk, .reset = _rst, \
index e27771d..a60aee1 100644 (file)
@@ -368,7 +368,7 @@ static const struct regmap_config ti_eqep_regmap32_config = {
        .reg_bits = 32,
        .val_bits = 32,
        .reg_stride = 4,
-       .max_register = 0x24,
+       .max_register = QUPRD,
 };
 
 static const struct regmap_config ti_eqep_regmap16_config = {
@@ -376,7 +376,7 @@ static const struct regmap_config ti_eqep_regmap16_config = {
        .reg_bits = 16,
        .val_bits = 16,
        .reg_stride = 2,
-       .max_register = 0x1e,
+       .max_register = QCPRDLAT,
 };
 
 static int ti_eqep_probe(struct platform_device *pdev)
index e855e86..8286205 100644 (file)
@@ -8,6 +8,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/clk-provider.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
 #include <linux/cpumask.h>
@@ -228,15 +229,22 @@ static struct cpufreq_driver scmi_cpufreq_driver = {
 static int scmi_cpufreq_probe(struct scmi_device *sdev)
 {
        int ret;
+       struct device *dev = &sdev->dev;
 
        handle = sdev->handle;
 
        if (!handle || !handle->perf_ops)
                return -ENODEV;
 
+#ifdef CONFIG_COMMON_CLK
+       /* dummy clock provider as needed by OPP if clocks property is used */
+       if (of_find_property(dev->of_node, "#clock-cells", NULL))
+               devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, NULL);
+#endif
+
        ret = cpufreq_register_driver(&scmi_cpufreq_driver);
        if (ret) {
-               dev_err(&sdev->dev, "%s: registering cpufreq failed, err: %d\n",
+               dev_err(dev, "%s: registering cpufreq failed, err: %d\n",
                        __func__, ret);
        }
 
index 4b4079f..7eb2c56 100644 (file)
@@ -42,6 +42,8 @@ static const struct tegra186_cpufreq_cluster_info tegra186_clusters[] = {
 struct tegra186_cpufreq_cluster {
        const struct tegra186_cpufreq_cluster_info *info;
        struct cpufreq_frequency_table *table;
+       u32 ref_clk_khz;
+       u32 div;
 };
 
 struct tegra186_cpufreq_data {
@@ -94,7 +96,7 @@ static int tegra186_cpufreq_set_target(struct cpufreq_policy *policy,
 
 static unsigned int tegra186_cpufreq_get(unsigned int cpu)
 {
-       struct cpufreq_frequency_table *tbl;
+       struct tegra186_cpufreq_data *data = cpufreq_get_driver_data();
        struct cpufreq_policy *policy;
        void __iomem *edvd_reg;
        unsigned int i, freq = 0;
@@ -104,17 +106,23 @@ static unsigned int tegra186_cpufreq_get(unsigned int cpu)
        if (!policy)
                return 0;
 
-       tbl = policy->freq_table;
        edvd_reg = policy->driver_data;
        ndiv = readl(edvd_reg) & EDVD_CORE_VOLT_FREQ_F_MASK;
 
-       for (i = 0; tbl[i].frequency != CPUFREQ_TABLE_END; i++) {
-               if ((tbl[i].driver_data & EDVD_CORE_VOLT_FREQ_F_MASK) == ndiv) {
-                       freq = tbl[i].frequency;
-                       break;
+       for (i = 0; i < data->num_clusters; i++) {
+               struct tegra186_cpufreq_cluster *cluster = &data->clusters[i];
+               int core;
+
+               for (core = 0; core < ARRAY_SIZE(cluster->info->cpus); core++) {
+                       if (cluster->info->cpus[core] != policy->cpu)
+                               continue;
+
+                       freq = (cluster->ref_clk_khz * ndiv) / cluster->div;
+                       goto out;
                }
        }
 
+out:
        cpufreq_cpu_put(policy);
 
        return freq;
@@ -133,7 +141,7 @@ static struct cpufreq_driver tegra186_cpufreq_driver = {
 
 static struct cpufreq_frequency_table *init_vhint_table(
        struct platform_device *pdev, struct tegra_bpmp *bpmp,
-       unsigned int cluster_id)
+       struct tegra186_cpufreq_cluster *cluster)
 {
        struct cpufreq_frequency_table *table;
        struct mrq_cpu_vhint_request req;
@@ -152,7 +160,7 @@ static struct cpufreq_frequency_table *init_vhint_table(
 
        memset(&req, 0, sizeof(req));
        req.addr = phys;
-       req.cluster_id = cluster_id;
+       req.cluster_id = cluster->info->bpmp_cluster_id;
 
        memset(&msg, 0, sizeof(msg));
        msg.mrq = MRQ_CPU_VHINT;
@@ -185,6 +193,9 @@ static struct cpufreq_frequency_table *init_vhint_table(
                goto free;
        }
 
+       cluster->ref_clk_khz = data->ref_clk_hz / 1000;
+       cluster->div = data->pdiv * data->mdiv;
+
        for (i = data->vfloor, j = 0; i <= data->vceil; i++) {
                struct cpufreq_frequency_table *point;
                u16 ndiv = data->ndiv[i];
@@ -202,8 +213,7 @@ static struct cpufreq_frequency_table *init_vhint_table(
 
                point = &table[j++];
                point->driver_data = edvd_val;
-               point->frequency = data->ref_clk_hz * ndiv / data->pdiv /
-                       data->mdiv / 1000;
+               point->frequency = (cluster->ref_clk_khz * ndiv) / cluster->div;
        }
 
        table[j].frequency = CPUFREQ_TABLE_END;
@@ -245,8 +255,7 @@ static int tegra186_cpufreq_probe(struct platform_device *pdev)
                struct tegra186_cpufreq_cluster *cluster = &data->clusters[i];
 
                cluster->info = &tegra186_clusters[i];
-               cluster->table = init_vhint_table(
-                       pdev, bpmp, cluster->info->bpmp_cluster_id);
+               cluster->table = init_vhint_table(pdev, bpmp, cluster);
                if (IS_ERR(cluster->table)) {
                        err = PTR_ERR(cluster->table);
                        goto put_bpmp;
index e895670..191966d 100644 (file)
@@ -189,7 +189,7 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
        }
 
        local_fiq_disable();
-       tegra_pm_set_cpu_in_lp2();
+       RCU_NONIDLE(tegra_pm_set_cpu_in_lp2());
        cpu_pm_enter();
 
        switch (index) {
@@ -207,7 +207,7 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
        }
 
        cpu_pm_exit();
-       tegra_pm_clear_cpu_in_lp2();
+       RCU_NONIDLE(tegra_pm_clear_cpu_in_lp2());
        local_fiq_enable();
 
        return err ?: index;
index 567428e..d2834c2 100644 (file)
@@ -50,7 +50,6 @@ config DEV_DAX_HMEM
          Say M if unsure.
 
 config DEV_DAX_HMEM_DEVICES
-       depends on NUMA_KEEP_MEMINFO # for phys_to_target_node()
        depends on DEV_DAX_HMEM && DAX=y
        def_bool y
 
index 7974fa0..962cbb5 100644 (file)
@@ -1039,16 +1039,15 @@ static int get_dma_id(struct dma_device *device)
 static int __dma_async_device_channel_register(struct dma_device *device,
                                               struct dma_chan *chan)
 {
-       int rc = 0;
+       int rc;
 
        chan->local = alloc_percpu(typeof(*chan->local));
        if (!chan->local)
-               goto err_out;
+               return -ENOMEM;
        chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL);
        if (!chan->dev) {
-               free_percpu(chan->local);
-               chan->local = NULL;
-               goto err_out;
+               rc = -ENOMEM;
+               goto err_free_local;
        }
 
        /*
@@ -1061,7 +1060,8 @@ static int __dma_async_device_channel_register(struct dma_device *device,
        if (chan->chan_id < 0) {
                pr_err("%s: unable to alloc ida for chan: %d\n",
                       __func__, chan->chan_id);
-               goto err_out;
+               rc = chan->chan_id;
+               goto err_free_dev;
        }
 
        chan->dev->device.class = &dma_devclass;
@@ -1082,9 +1082,10 @@ static int __dma_async_device_channel_register(struct dma_device *device,
        mutex_lock(&device->chan_mutex);
        ida_free(&device->chan_ida, chan->chan_id);
        mutex_unlock(&device->chan_mutex);
- err_out:
-       free_percpu(chan->local);
+ err_free_dev:
        kfree(chan->dev);
+ err_free_local:
+       free_percpu(chan->local);
        return rc;
 }
 
index 200b910..6633449 100644 (file)
@@ -271,7 +271,7 @@ int idxd_wq_map_portal(struct idxd_wq *wq)
        resource_size_t start;
 
        start = pci_resource_start(pdev, IDXD_WQ_BAR);
-       start = start + wq->id * IDXD_PORTAL_SIZE;
+       start += idxd_get_wq_portal_full_offset(wq->id, IDXD_PORTAL_LIMITED);
 
        wq->dportal = devm_ioremap(dev, start, IDXD_PORTAL_SIZE);
        if (!wq->dportal)
@@ -295,7 +295,7 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq)
        int i, wq_offset;
 
        lockdep_assert_held(&idxd->dev_lock);
-       memset(&wq->wqcfg, 0, sizeof(wq->wqcfg));
+       memset(wq->wqcfg, 0, idxd->wqcfg_size);
        wq->type = IDXD_WQT_NONE;
        wq->size = 0;
        wq->group = NULL;
@@ -304,8 +304,8 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq)
        clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
        memset(wq->name, 0, WQ_NAME_SIZE);
 
-       for (i = 0; i < 8; i++) {
-               wq_offset = idxd->wqcfg_offset + wq->id * 32 + i * sizeof(u32);
+       for (i = 0; i < WQCFG_STRIDES(idxd); i++) {
+               wq_offset = WQCFG_OFFSET(idxd, wq->id, i);
                iowrite32(0, idxd->reg_base + wq_offset);
                dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n",
                        wq->id, i, wq_offset,
@@ -539,10 +539,10 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
        if (!wq->group)
                return 0;
 
-       memset(&wq->wqcfg, 0, sizeof(union wqcfg));
+       memset(wq->wqcfg, 0, idxd->wqcfg_size);
 
        /* byte 0-3 */
-       wq->wqcfg.wq_size = wq->size;
+       wq->wqcfg->wq_size = wq->size;
 
        if (wq->size == 0) {
                dev_warn(dev, "Incorrect work queue size: 0\n");
@@ -550,22 +550,21 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
        }
 
        /* bytes 4-7 */
-       wq->wqcfg.wq_thresh = wq->threshold;
+       wq->wqcfg->wq_thresh = wq->threshold;
 
        /* byte 8-11 */
-       wq->wqcfg.priv = !!(wq->type == IDXD_WQT_KERNEL);
-       wq->wqcfg.mode = 1;
-
-       wq->wqcfg.priority = wq->priority;
+       wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL);
+       wq->wqcfg->mode = 1;
+       wq->wqcfg->priority = wq->priority;
 
        /* bytes 12-15 */
-       wq->wqcfg.max_xfer_shift = ilog2(wq->max_xfer_bytes);
-       wq->wqcfg.max_batch_shift = ilog2(wq->max_batch_size);
+       wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes);
+       wq->wqcfg->max_batch_shift = ilog2(wq->max_batch_size);
 
        dev_dbg(dev, "WQ %d CFGs\n", wq->id);
-       for (i = 0; i < 8; i++) {
-               wq_offset = idxd->wqcfg_offset + wq->id * 32 + i * sizeof(u32);
-               iowrite32(wq->wqcfg.bits[i], idxd->reg_base + wq_offset);
+       for (i = 0; i < WQCFG_STRIDES(idxd); i++) {
+               wq_offset = WQCFG_OFFSET(idxd, wq->id, i);
+               iowrite32(wq->wqcfg->bits[i], idxd->reg_base + wq_offset);
                dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n",
                        wq->id, i, wq_offset,
                        ioread32(idxd->reg_base + wq_offset));
index c64df19..d48f193 100644 (file)
@@ -103,7 +103,7 @@ struct idxd_wq {
        u32 priority;
        enum idxd_wq_state state;
        unsigned long flags;
-       union wqcfg wqcfg;
+       union wqcfg *wqcfg;
        u32 vec_ptr;            /* interrupt steering */
        struct dsa_hw_desc **hw_descs;
        int num_descs;
@@ -183,6 +183,7 @@ struct idxd_device {
        int max_wq_size;
        int token_limit;
        int nr_tokens;          /* non-reserved tokens */
+       unsigned int wqcfg_size;
 
        union sw_err_reg sw_err;
        wait_queue_head_t cmd_waitq;
index 11e5ce1..0a4432b 100644 (file)
@@ -178,6 +178,9 @@ static int idxd_setup_internals(struct idxd_device *idxd)
                wq->idxd_cdev.minor = -1;
                wq->max_xfer_bytes = idxd->max_xfer_bytes;
                wq->max_batch_size = idxd->max_batch_size;
+               wq->wqcfg = devm_kzalloc(dev, idxd->wqcfg_size, GFP_KERNEL);
+               if (!wq->wqcfg)
+                       return -ENOMEM;
        }
 
        for (i = 0; i < idxd->max_engines; i++) {
@@ -251,6 +254,8 @@ static void idxd_read_caps(struct idxd_device *idxd)
        dev_dbg(dev, "total workqueue size: %u\n", idxd->max_wq_size);
        idxd->max_wqs = idxd->hw.wq_cap.num_wqs;
        dev_dbg(dev, "max workqueues: %u\n", idxd->max_wqs);
+       idxd->wqcfg_size = 1 << (idxd->hw.wq_cap.wqcfg_size + IDXD_WQCFG_MIN);
+       dev_dbg(dev, "wqcfg size: %u\n", idxd->wqcfg_size);
 
        /* reading operation capabilities */
        for (i = 0; i < 4; i++) {
index a39e7ae..5439033 100644 (file)
@@ -8,7 +8,7 @@
 
 #define IDXD_MMIO_BAR          0
 #define IDXD_WQ_BAR            2
-#define IDXD_PORTAL_SIZE       0x4000
+#define IDXD_PORTAL_SIZE       PAGE_SIZE
 
 /* MMIO Device BAR0 Registers */
 #define IDXD_VER_OFFSET                        0x00
@@ -43,7 +43,8 @@ union wq_cap_reg {
        struct {
                u64 total_wq_size:16;
                u64 num_wqs:8;
-               u64 rsvd:24;
+               u64 wqcfg_size:4;
+               u64 rsvd:20;
                u64 shared_mode:1;
                u64 dedicated_mode:1;
                u64 rsvd2:1;
@@ -55,6 +56,7 @@ union wq_cap_reg {
        u64 bits;
 } __packed;
 #define IDXD_WQCAP_OFFSET              0x20
+#define IDXD_WQCFG_MIN                 5
 
 union group_cap_reg {
        struct {
@@ -333,4 +335,23 @@ union wqcfg {
        };
        u32 bits[8];
 } __packed;
+
+/*
+ * This macro calculates the offset into the WQCFG register
+ * idxd - struct idxd *
+ * n - wq id
+ * ofs - the index of the 32b dword for the config register
+ *
+ * The WQCFG register block is divided into groups per each wq. The n index
+ * allows us to move to the register group that's for that particular wq.
+ * Each register is 32bits. The ofs gives us the number of register to access.
+ */
+#define WQCFG_OFFSET(_idxd_dev, n, ofs) \
+({\
+       typeof(_idxd_dev) __idxd_dev = (_idxd_dev);     \
+       (__idxd_dev)->wqcfg_offset + (n) * (__idxd_dev)->wqcfg_size + sizeof(u32) * (ofs);      \
+})
+
+#define WQCFG_STRIDES(_idxd_dev) ((_idxd_dev)->wqcfg_size / sizeof(u32))
+
 #endif
index 156a1ee..417048e 100644 (file)
@@ -74,7 +74,7 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
        if (idxd->state != IDXD_DEV_ENABLED)
                return -EIO;
 
-       portal = wq->dportal + idxd_get_wq_portal_offset(IDXD_PORTAL_UNLIMITED);
+       portal = wq->dportal;
        /*
         * The wmb() flushes writes to coherent DMA data before possibly
         * triggering a DMA read. The wmb() is necessary even on UP because
index 0be3855..289c59e 100644 (file)
 #define DCA2_TAG_MAP_BYTE3 0x82
 #define DCA2_TAG_MAP_BYTE4 0x82
 
-/* verify if tag map matches expected values */
-static inline int dca2_tag_map_valid(u8 *tag_map)
-{
-       return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) &&
-               (tag_map[1] == DCA2_TAG_MAP_BYTE1) &&
-               (tag_map[2] == DCA2_TAG_MAP_BYTE2) &&
-               (tag_map[3] == DCA2_TAG_MAP_BYTE3) &&
-               (tag_map[4] == DCA2_TAG_MAP_BYTE4));
-}
-
 /*
  * "Legacy" DCA systems do not implement the DCA register set in the
  * I/OAT device.  Software needs direct support for their tag mappings.
index e9f0101..0f5c193 100644 (file)
@@ -2799,7 +2799,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
         * If burst size is smaller than bus width then make sure we only
         * transfer one at a time to avoid a burst stradling an MFIFO entry.
         */
-       if (desc->rqcfg.brst_size * 8 < pl330->pcfg.data_bus_width)
+       if (burst * 8 < pl330->pcfg.data_bus_width)
                desc->rqcfg.brst_len = 1;
 
        desc->bytes_requested = len;
index aa24e55..8563a39 100644 (file)
@@ -83,7 +83,7 @@ EXPORT_SYMBOL(xudma_rflow_is_gp);
 #define XUDMA_GET_PUT_RESOURCE(res)                                    \
 struct udma_##res *xudma_##res##_get(struct udma_dev *ud, int id)      \
 {                                                                      \
-       return __udma_reserve_##res(ud, false, id);                     \
+       return __udma_reserve_##res(ud, UDMA_TP_NORMAL, id);            \
 }                                                                      \
 EXPORT_SYMBOL(xudma_##res##_get);                                      \
                                                                        \
index c9fe5e3..268a080 100644 (file)
@@ -1522,29 +1522,38 @@ static void omap_dma_free(struct omap_dmadev *od)
        }
 }
 
+/* Currently used by omap2 & 3 to block deeper SoC idle states */
+static bool omap_dma_busy(struct omap_dmadev *od)
+{
+       struct omap_chan *c;
+       int lch = -1;
+
+       while (1) {
+               lch = find_next_bit(od->lch_bitmap, od->lch_count, lch + 1);
+               if (lch >= od->lch_count)
+                       break;
+               c = od->lch_map[lch];
+               if (!c)
+                       continue;
+               if (omap_dma_chan_read(c, CCR) & CCR_ENABLE)
+                       return true;
+       }
+
+       return false;
+}
+
 /* Currently only used for omap2. For omap1, also a check for lcd_dma is needed */
 static int omap_dma_busy_notifier(struct notifier_block *nb,
                                  unsigned long cmd, void *v)
 {
        struct omap_dmadev *od;
-       struct omap_chan *c;
-       int lch = -1;
 
        od = container_of(nb, struct omap_dmadev, nb);
 
        switch (cmd) {
        case CPU_CLUSTER_PM_ENTER:
-               while (1) {
-                       lch = find_next_bit(od->lch_bitmap, od->lch_count,
-                                           lch + 1);
-                       if (lch >= od->lch_count)
-                               break;
-                       c = od->lch_map[lch];
-                       if (!c)
-                               continue;
-                       if (omap_dma_chan_read(c, CCR) & CCR_ENABLE)
-                               return NOTIFY_BAD;
-               }
+               if (omap_dma_busy(od))
+                       return NOTIFY_BAD;
                break;
        case CPU_CLUSTER_PM_ENTER_FAILED:
        case CPU_CLUSTER_PM_EXIT:
@@ -1595,6 +1604,8 @@ static int omap_dma_context_notifier(struct notifier_block *nb,
 
        switch (cmd) {
        case CPU_CLUSTER_PM_ENTER:
+               if (omap_dma_busy(od))
+                       return NOTIFY_BAD;
                omap_dma_context_save(od);
                break;
        case CPU_CLUSTER_PM_ENTER_FAILED:
index ecff354..22faea6 100644 (file)
@@ -517,8 +517,8 @@ struct xilinx_dma_device {
 #define to_dma_tx_descriptor(tx) \
        container_of(tx, struct xilinx_dma_tx_descriptor, async_tx)
 #define xilinx_dma_poll_timeout(chan, reg, val, cond, delay_us, timeout_us) \
-       readl_poll_timeout(chan->xdev->regs + chan->ctrl_offset + reg, val, \
-                          cond, delay_us, timeout_us)
+       readl_poll_timeout_atomic(chan->xdev->regs + chan->ctrl_offset + reg, \
+                                 val, cond, delay_us, timeout_us)
 
 /* IO accessors */
 static inline u32 dma_read(struct xilinx_dma_chan *chan, u32 reg)
@@ -948,8 +948,10 @@ static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan,
 {
        struct xilinx_cdma_tx_segment *cdma_seg;
        struct xilinx_axidma_tx_segment *axidma_seg;
+       struct xilinx_aximcdma_tx_segment *aximcdma_seg;
        struct xilinx_cdma_desc_hw *cdma_hw;
        struct xilinx_axidma_desc_hw *axidma_hw;
+       struct xilinx_aximcdma_desc_hw *aximcdma_hw;
        struct list_head *entry;
        u32 residue = 0;
 
@@ -961,13 +963,23 @@ static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan,
                        cdma_hw = &cdma_seg->hw;
                        residue += (cdma_hw->control - cdma_hw->status) &
                                   chan->xdev->max_buffer_len;
-               } else {
+               } else if (chan->xdev->dma_config->dmatype ==
+                          XDMA_TYPE_AXIDMA) {
                        axidma_seg = list_entry(entry,
                                                struct xilinx_axidma_tx_segment,
                                                node);
                        axidma_hw = &axidma_seg->hw;
                        residue += (axidma_hw->control - axidma_hw->status) &
                                   chan->xdev->max_buffer_len;
+               } else {
+                       aximcdma_seg =
+                               list_entry(entry,
+                                          struct xilinx_aximcdma_tx_segment,
+                                          node);
+                       aximcdma_hw = &aximcdma_seg->hw;
+                       residue +=
+                               (aximcdma_hw->control - aximcdma_hw->status) &
+                               chan->xdev->max_buffer_len;
                }
        }
 
@@ -1135,7 +1147,7 @@ static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan)
                        upper_32_bits(chan->seg_p + sizeof(*chan->seg_mv) *
                                ((i + 1) % XILINX_DMA_NUM_DESCS));
                        chan->seg_mv[i].phys = chan->seg_p +
-                               sizeof(*chan->seg_v) * i;
+                               sizeof(*chan->seg_mv) * i;
                        list_add_tail(&chan->seg_mv[i].node,
                                      &chan->free_seg_list);
                }
@@ -1560,7 +1572,7 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan)
 static void xilinx_mcdma_start_transfer(struct xilinx_dma_chan *chan)
 {
        struct xilinx_dma_tx_descriptor *head_desc, *tail_desc;
-       struct xilinx_axidma_tx_segment *tail_segment;
+       struct xilinx_aximcdma_tx_segment *tail_segment;
        u32 reg;
 
        /*
@@ -1582,7 +1594,7 @@ static void xilinx_mcdma_start_transfer(struct xilinx_dma_chan *chan)
        tail_desc = list_last_entry(&chan->pending_list,
                                    struct xilinx_dma_tx_descriptor, node);
        tail_segment = list_last_entry(&tail_desc->segments,
-                                      struct xilinx_axidma_tx_segment, node);
+                                      struct xilinx_aximcdma_tx_segment, node);
 
        reg = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest));
 
@@ -1864,6 +1876,7 @@ static void append_desc_queue(struct xilinx_dma_chan *chan,
        struct xilinx_vdma_tx_segment *tail_segment;
        struct xilinx_dma_tx_descriptor *tail_desc;
        struct xilinx_axidma_tx_segment *axidma_tail_segment;
+       struct xilinx_aximcdma_tx_segment *aximcdma_tail_segment;
        struct xilinx_cdma_tx_segment *cdma_tail_segment;
 
        if (list_empty(&chan->pending_list))
@@ -1885,11 +1898,17 @@ static void append_desc_queue(struct xilinx_dma_chan *chan,
                                                struct xilinx_cdma_tx_segment,
                                                node);
                cdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
-       } else {
+       } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
                axidma_tail_segment = list_last_entry(&tail_desc->segments,
                                               struct xilinx_axidma_tx_segment,
                                               node);
                axidma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
+       } else {
+               aximcdma_tail_segment =
+                       list_last_entry(&tail_desc->segments,
+                                       struct xilinx_aximcdma_tx_segment,
+                                       node);
+               aximcdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
        }
 
        /*
@@ -2836,10 +2855,11 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
                chan->stop_transfer = xilinx_dma_stop_transfer;
        }
 
-       /* check if SG is enabled (only for AXIDMA and CDMA) */
+       /* check if SG is enabled (only for AXIDMA, AXIMCDMA, and CDMA) */
        if (xdev->dma_config->dmatype != XDMA_TYPE_VDMA) {
-               if (dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) &
-                   XILINX_DMA_DMASR_SG_MASK)
+               if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA ||
+                   dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) &
+                           XILINX_DMA_DMASR_SG_MASK)
                        chan->has_sg = true;
                dev_dbg(chan->dev, "ch %d: SG %s\n", chan->id,
                        chan->has_sg ? "enabled" : "disabled");
index 36ec1f7..d989549 100644 (file)
@@ -270,7 +270,7 @@ config EFI_DEV_PATH_PARSER
 
 config EFI_EARLYCON
        def_bool y
-       depends on SERIAL_EARLYCON && !ARM && !IA64
+       depends on EFI && SERIAL_EARLYCON && !ARM && !IA64
        select FONT_SUPPORT
        select ARCH_USE_MEMREMAP_PROT
 
index 5e5480a..6c6eec0 100644 (file)
@@ -390,10 +390,10 @@ static int __init efisubsys_init(void)
 
        if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE |
                                      EFI_RT_SUPPORTED_GET_NEXT_VARIABLE_NAME)) {
-               efivar_ssdt_load();
                error = generic_ops_register();
                if (error)
                        goto err_put;
+               efivar_ssdt_load();
                platform_device_register_simple("efivars", 0, NULL, 0);
        }
 
index efb8a66..fd95ede 100644 (file)
 #include <linux/of_platform.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/hashtable.h>
 
 #include <linux/firmware/xlnx-zynqmp.h>
 #include "zynqmp-debug.h"
 
+/* Max HashMap Order for PM API feature check (1<<7 = 128) */
+#define PM_API_FEATURE_CHECK_MAX_ORDER  7
+
 static bool feature_check_enabled;
-static u32 zynqmp_pm_features[PM_API_MAX];
+static DEFINE_HASHTABLE(pm_api_features_map, PM_API_FEATURE_CHECK_MAX_ORDER);
+
+/**
+ * struct pm_api_feature_data - PM API Feature data
+ * @pm_api_id:         PM API Id, used as key to index into hashmap
+ * @feature_status:    status of PM API feature: valid, invalid
+ * @hentry:            hlist_node that hooks this entry into hashtable
+ */
+struct pm_api_feature_data {
+       u32 pm_api_id;
+       int feature_status;
+       struct hlist_node hentry;
+};
 
 static const struct mfd_cell firmware_devs[] = {
        {
@@ -142,29 +158,37 @@ static int zynqmp_pm_feature(u32 api_id)
        int ret;
        u32 ret_payload[PAYLOAD_ARG_CNT];
        u64 smc_arg[2];
+       struct pm_api_feature_data *feature_data;
 
        if (!feature_check_enabled)
                return 0;
 
-       /* Return value if feature is already checked */
-       if (api_id > ARRAY_SIZE(zynqmp_pm_features))
-               return PM_FEATURE_INVALID;
+       /* Check for existing entry in hash table for given api */
+       hash_for_each_possible(pm_api_features_map, feature_data, hentry,
+                              api_id) {
+               if (feature_data->pm_api_id == api_id)
+                       return feature_data->feature_status;
+       }
 
-       if (zynqmp_pm_features[api_id] != PM_FEATURE_UNCHECKED)
-               return zynqmp_pm_features[api_id];
+       /* Add new entry if not present */
+       feature_data = kmalloc(sizeof(*feature_data), GFP_KERNEL);
+       if (!feature_data)
+               return -ENOMEM;
 
+       feature_data->pm_api_id = api_id;
        smc_arg[0] = PM_SIP_SVC | PM_FEATURE_CHECK;
        smc_arg[1] = api_id;
 
        ret = do_fw_call(smc_arg[0], smc_arg[1], 0, ret_payload);
-       if (ret) {
-               zynqmp_pm_features[api_id] = PM_FEATURE_INVALID;
-               return PM_FEATURE_INVALID;
-       }
+       if (ret)
+               ret = -EOPNOTSUPP;
+       else
+               ret = ret_payload[1];
 
-       zynqmp_pm_features[api_id] = ret_payload[1];
+       feature_data->feature_status = ret;
+       hash_add(pm_api_features_map, &feature_data->hentry, api_id);
 
-       return zynqmp_pm_features[api_id];
+       return ret;
 }
 
 /**
@@ -200,9 +224,12 @@ int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 arg0, u32 arg1,
         * Make sure to stay in x0 register
         */
        u64 smc_arg[4];
+       int ret;
 
-       if (zynqmp_pm_feature(pm_api_id) == PM_FEATURE_INVALID)
-               return -ENOTSUPP;
+       /* Check if feature is supported or not */
+       ret = zynqmp_pm_feature(pm_api_id);
+       if (ret < 0)
+               return ret;
 
        smc_arg[0] = PM_SIP_SVC | pm_api_id;
        smc_arg[1] = ((u64)arg1 << 32) | arg0;
@@ -615,7 +642,7 @@ EXPORT_SYMBOL_GPL(zynqmp_pm_set_sd_tapdelay);
  */
 int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type)
 {
-       return zynqmp_pm_invoke_fn(PM_IOCTL, node_id, IOCTL_SET_SD_TAPDELAY,
+       return zynqmp_pm_invoke_fn(PM_IOCTL, node_id, IOCTL_SD_DLL_RESET,
                                   type, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(zynqmp_pm_sd_dll_reset);
@@ -1252,9 +1279,17 @@ static int zynqmp_firmware_probe(struct platform_device *pdev)
 
 static int zynqmp_firmware_remove(struct platform_device *pdev)
 {
+       struct pm_api_feature_data *feature_data;
+       int i;
+
        mfd_remove_devices(&pdev->dev);
        zynqmp_pm_api_debugfs_exit();
 
+       hash_for_each(pm_api_features_map, i, feature_data, hentry) {
+               hash_del(&feature_data->hentry);
+               kfree(feature_data);
+       }
+
        return 0;
 }
 
index 7cd5a29..5645226 100644 (file)
@@ -142,6 +142,7 @@ config FPGA_DFL
        tristate "FPGA Device Feature List (DFL) support"
        select FPGA_BRIDGE
        select FPGA_REGION
+       depends on HAS_IOMEM
        help
          Device Feature List (DFL) defines a feature list structure that
          creates a linked list of feature headers within the MMIO space
index 5bda38e..2bc173c 100644 (file)
@@ -192,6 +192,7 @@ static int arizona_gpio_probe(struct platform_device *pdev)
        ret = devm_gpiochip_add_data(&pdev->dev, &arizona_gpio->gpio_chip,
                                     arizona_gpio);
        if (ret < 0) {
+               pm_runtime_disable(&pdev->dev);
                dev_err(&pdev->dev, "Could not register gpiochip, %d\n",
                        ret);
                return ret;
index 2a9046c..4275c18 100644 (file)
@@ -724,6 +724,8 @@ static int dwapb_gpio_probe(struct platform_device *pdev)
                        return err;
        }
 
+       platform_set_drvdata(pdev, gpio);
+
        return 0;
 }
 
index ad61daf..865ab2b 100644 (file)
@@ -598,7 +598,7 @@ static int sprd_eic_probe(struct platform_device *pdev)
                 */
                res = platform_get_resource(pdev, IORESOURCE_MEM, i);
                if (!res)
-                       continue;
+                       break;
 
                sprd_eic->base[i] = devm_ioremap_resource(&pdev->dev, res);
                if (IS_ERR(sprd_eic->base[i]))
index 433e2c3..2f24559 100644 (file)
@@ -1197,6 +1197,13 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
 
        devm_gpiochip_add_data(&pdev->dev, &mvchip->chip, mvchip);
 
+       /* Some MVEBU SoCs have simple PWM support for GPIO lines */
+       if (IS_ENABLED(CONFIG_PWM)) {
+               err = mvebu_pwm_probe(pdev, mvchip, id);
+               if (err)
+                       return err;
+       }
+
        /* Some gpio controllers do not provide irq support */
        if (!have_irqs)
                return 0;
@@ -1206,7 +1213,8 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
        if (!mvchip->domain) {
                dev_err(&pdev->dev, "couldn't allocate irq domain %s (DT).\n",
                        mvchip->chip.label);
-               return -ENODEV;
+               err = -ENODEV;
+               goto err_pwm;
        }
 
        err = irq_alloc_domain_generic_chips(
@@ -1254,14 +1262,12 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
                                                 mvchip);
        }
 
-       /* Some MVEBU SoCs have simple PWM support for GPIO lines */
-       if (IS_ENABLED(CONFIG_PWM))
-               return mvebu_pwm_probe(pdev, mvchip, id);
-
        return 0;
 
 err_domain:
        irq_domain_remove(mvchip->domain);
+err_pwm:
+       pwmchip_remove(&mvchip->mvpwm->chip);
 
        return err;
 }
index 0b5a17a..3521c1d 100644 (file)
@@ -574,7 +574,7 @@ static int zynq_gpio_irq_reqres(struct irq_data *d)
        struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
        int ret;
 
-       ret = pm_runtime_get_sync(chip->parent);
+       ret = pm_runtime_resume_and_get(chip->parent);
        if (ret < 0)
                return ret;
 
@@ -942,7 +942,7 @@ static int zynq_gpio_probe(struct platform_device *pdev)
 
        pm_runtime_set_active(&pdev->dev);
        pm_runtime_enable(&pdev->dev);
-       ret = pm_runtime_get_sync(&pdev->dev);
+       ret = pm_runtime_resume_and_get(&pdev->dev);
        if (ret < 0)
                goto err_pm_dis;
 
index 089ddca..6e3c4d7 100644 (file)
@@ -1806,6 +1806,11 @@ EXPORT_SYMBOL_GPL(gpiochip_generic_request);
  */
 void gpiochip_generic_free(struct gpio_chip *gc, unsigned offset)
 {
+#ifdef CONFIG_PINCTRL
+       if (list_empty(&gc->gpiodev->pin_ranges))
+               return;
+#endif
+
        pinctrl_gpio_free(gc->gpiodev->base + offset);
 }
 EXPORT_SYMBOL_GPL(gpiochip_generic_free);
index e3783f5..026789b 100644 (file)
@@ -4852,7 +4852,7 @@ int amdgpu_device_baco_enter(struct drm_device *dev)
        if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
                return -ENOTSUPP;
 
-       if (ras && ras->supported)
+       if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
                adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
 
        return amdgpu_dpm_baco_enter(adev);
@@ -4871,7 +4871,7 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
        if (ret)
                return ret;
 
-       if (ras && ras->supported)
+       if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
                adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
 
        return 0;
index 9579349..1b56dbc 100644 (file)
@@ -459,6 +459,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
        struct amdgpu_device *adev = drm_to_adev(dev);
        struct amdgpu_bo *bo;
        struct amdgpu_bo_param bp;
+       struct drm_gem_object *gobj;
        int ret;
 
        memset(&bp, 0, sizeof(bp));
@@ -469,17 +470,20 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
        bp.type = ttm_bo_type_sg;
        bp.resv = resv;
        dma_resv_lock(resv, NULL);
-       ret = amdgpu_bo_create(adev, &bp, &bo);
+       ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE,
+                       AMDGPU_GEM_DOMAIN_CPU,
+                       0, ttm_bo_type_sg, resv, &gobj);
        if (ret)
                goto error;
 
+       bo = gem_to_amdgpu_bo(gobj);
        bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
        bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
        if (dma_buf->ops != &amdgpu_dmabuf_ops)
                bo->prime_shared_count = 1;
 
        dma_resv_unlock(resv);
-       return &bo->tbo.base;
+       return gobj;
 
 error:
        dma_resv_unlock(resv);
index 42d9748..8e988f0 100644 (file)
@@ -1055,10 +1055,10 @@ static const struct pci_device_id pciidlist[] = {
        {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
        {0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
        /* Arcturus */
-       {0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT},
-       {0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT},
-       {0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT},
-       {0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT},
+       {0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},
+       {0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},
+       {0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},
+       {0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},
        /* Navi10 */
        {0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
        {0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
index 7e8265d..e8c76bd 100644 (file)
@@ -66,26 +66,12 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
        bp.type = type;
        bp.resv = resv;
        bp.preferred_domain = initial_domain;
-retry:
        bp.flags = flags;
        bp.domain = initial_domain;
        r = amdgpu_bo_create(adev, &bp, &bo);
-       if (r) {
-               if (r != -ERESTARTSYS) {
-                       if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
-                               flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-                               goto retry;
-                       }
-
-                       if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
-                               initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
-                               goto retry;
-                       }
-                       DRM_DEBUG("Failed to allocate GEM object (%ld, %d, %u, %d)\n",
-                                 size, initial_domain, alignment, r);
-               }
+       if (r)
                return r;
-       }
+
        *obj = &bo->tbo.base;
 
        return 0;
@@ -225,7 +211,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
        uint64_t size = args->in.bo_size;
        struct dma_resv *resv = NULL;
        struct drm_gem_object *gobj;
-       uint32_t handle;
+       uint32_t handle, initial_domain;
        int r;
 
        /* reject invalid gem flags */
@@ -269,9 +255,28 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
                resv = vm->root.base.bo->tbo.base.resv;
        }
 
+retry:
+       initial_domain = (u32)(0xffffffff & args->in.domains);
        r = amdgpu_gem_object_create(adev, size, args->in.alignment,
-                                    (u32)(0xffffffff & args->in.domains),
+                                    initial_domain,
                                     flags, ttm_bo_type_device, resv, &gobj);
+       if (r) {
+               if (r != -ERESTARTSYS) {
+                       if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
+                               flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+                               goto retry;
+                       }
+
+                       if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
+                               initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
+                               goto retry;
+                       }
+                       DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n",
+                                 size, initial_domain, args->in.alignment, r);
+               }
+               return r;
+       }
+
        if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
                if (!r) {
                        struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
index 36604d7..3e4892b 100644 (file)
@@ -499,6 +499,9 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
        else
                size = amdgpu_gmc_get_vbios_fb_size(adev);
 
+       if (adev->mman.keep_stolen_vga_memory)
+               size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION);
+
        /* set to 0 if the pre-OS buffer uses up most of vram */
        if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
                size = 0;
index 4e36551..82cd8e5 100644 (file)
@@ -1172,7 +1172,7 @@ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
                        con->dir, &con->disable_ras_err_cnt_harvest);
 }
 
-void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
+static void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
                struct ras_fs_if *head)
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -1194,7 +1194,6 @@ void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
 
 void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
 {
-#if defined(CONFIG_DEBUG_FS)
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        struct ras_manager *obj;
        struct ras_fs_if fs_info;
@@ -1203,7 +1202,7 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
         * it won't be called in resume path, no need to check
         * suspend and gpu reset status
         */
-       if (!con)
+       if (!IS_ENABLED(CONFIG_DEBUG_FS) || !con)
                return;
 
        amdgpu_ras_debugfs_create_ctrl_node(adev);
@@ -1217,10 +1216,9 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
                        amdgpu_ras_debugfs_create(adev, &fs_info);
                }
        }
-#endif
 }
 
-void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
+static void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
                struct ras_common_if *head)
 {
        struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
@@ -1234,7 +1232,6 @@ void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
 
 static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
 {
-#if defined(CONFIG_DEBUG_FS)
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        struct ras_manager *obj, *tmp;
 
@@ -1243,7 +1240,6 @@ static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
        }
 
        con->dir = NULL;
-#endif
 }
 /* debugfs end */
 
@@ -1291,7 +1287,8 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
 
 static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
 {
-       amdgpu_ras_debugfs_remove_all(adev);
+       if (IS_ENABLED(CONFIG_DEBUG_FS))
+               amdgpu_ras_debugfs_remove_all(adev);
        amdgpu_ras_sysfs_remove_all(adev);
        return 0;
 }
index 6b8d7bb..ec398ed 100644 (file)
@@ -607,14 +607,8 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
 int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
                struct ras_common_if *head);
 
-void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
-               struct ras_fs_if *head);
-
 void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev);
 
-void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
-               struct ras_common_if *head);
-
 int amdgpu_ras_error_query(struct amdgpu_device *adev,
                struct ras_query_if *info);
 
index 8039d23..a0248d7 100644 (file)
@@ -69,10 +69,10 @@ static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev,
 
 static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,
                                    unsigned int type,
-                                   uint64_t size)
+                                   uint64_t size_in_page)
 {
        return ttm_range_man_init(&adev->mman.bdev, type,
-                                 false, size >> PAGE_SHIFT);
+                                 false, size_in_page);
 }
 
 /**
index 5eb6328..edbb819 100644 (file)
@@ -67,6 +67,7 @@ struct amdgpu_uvd {
        unsigned                harvest_config;
        /* store image width to adjust nb memory state */
        unsigned                decode_image_width;
+       uint32_t                keyselect;
 };
 
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
index 3579565..55f4b8c 100644 (file)
@@ -3105,6 +3105,8 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] =
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000),
+       SOC15_REG_GOLDEN_VALUE(GC, 0 ,mmGCEA_SDP_TAG_RESERVE0, 0xffffffff, 0x10100100),
+       SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE1, 0xffffffff, 0x17000088),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400),
        SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
index 9f39527..2a48505 100644 (file)
@@ -186,7 +186,7 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev)
                        if (err)
                                goto out;
 
-                       err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[0]);
+                       err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[i]);
                        if (err)
                                goto out;
                }
index 7cf4b11..41800fc 100644 (file)
@@ -277,15 +277,8 @@ static void uvd_v3_1_mc_resume(struct amdgpu_device *adev)
  */
 static int uvd_v3_1_fw_validate(struct amdgpu_device *adev)
 {
-       void *ptr;
-       uint32_t ucode_len, i;
-       uint32_t keysel;
-
-       ptr = adev->uvd.inst[0].cpu_addr;
-       ptr += 192 + 16;
-       memcpy(&ucode_len, ptr, 4);
-       ptr += ucode_len;
-       memcpy(&keysel, ptr, 4);
+       int i;
+       uint32_t keysel = adev->uvd.keyselect;
 
        WREG32(mmUVD_FW_START, keysel);
 
@@ -550,6 +543,8 @@ static int uvd_v3_1_sw_init(void *handle)
        struct amdgpu_ring *ring;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        int r;
+       void *ptr;
+       uint32_t ucode_len;
 
        /* UVD TRAP */
        r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
@@ -571,6 +566,13 @@ static int uvd_v3_1_sw_init(void *handle)
        if (r)
                return r;
 
+       /* Retrieval firmware validate key */
+       ptr = adev->uvd.inst[0].cpu_addr;
+       ptr += 192 + 16;
+       memcpy(&ucode_len, ptr, 4);
+       ptr += ucode_len;
+       memcpy(&adev->uvd.keyselect, ptr, 4);
+
        r = amdgpu_uvd_entity_init(adev);
 
        return r;
index e074f7e..b5f8f3d 100644 (file)
@@ -1011,6 +1011,11 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
        tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
        WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
 
+       /* Stall DPG before WPTR/RPTR reset */
+       WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+               UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+               ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
        /* set the write pointer delay */
        WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
 
@@ -1033,6 +1038,10 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
        WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
                lower_32_bits(ring->wptr));
 
+       /* Unstall DPG */
+       WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+               0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
        return 0;
 }
 
@@ -1556,8 +1565,14 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
                                        UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
                                        UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
 
+                               /* Stall DPG before WPTR/RPTR reset */
+                               WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+                                       UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
+                                       ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
                                /* Restore */
                                ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+                               ring->wptr = 0;
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
@@ -1565,14 +1580,16 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
 
                                ring = &adev->vcn.inst[inst_idx].ring_enc[1];
+                               ring->wptr = 0;
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
                                WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
 
-                               WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
-                                       RREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+                               /* Unstall DPG */
+                               WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
+                                       0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
 
                                SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS,
                                        UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
@@ -1630,10 +1647,6 @@ static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
 {
        struct amdgpu_device *adev = ring->adev;
 
-       if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
-               WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2,
-                       lower_32_bits(ring->wptr) | 0x80000000);
-
        if (ring->use_doorbell) {
                adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
                WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
index 222f1df..8cc51ce 100644 (file)
@@ -1736,6 +1736,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
        }
 
        mutex_unlock(&p->mutex);
+       dma_buf_put(dmabuf);
 
        args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
 
@@ -1745,6 +1746,7 @@ err_free:
        amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
 err_unlock:
        mutex_unlock(&p->mutex);
+       dma_buf_put(dmabuf);
        return r;
 }
 
index e93e18c..0f7749e 100644 (file)
@@ -1041,7 +1041,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
        amdgpu_dm_init_color_mod();
 
 #ifdef CONFIG_DRM_AMD_DC_HDCP
-       if (adev->asic_type >= CHIP_RAVEN) {
+       if (adev->dm.dc->caps.max_links > 0 && adev->asic_type >= CHIP_RAVEN) {
                adev->dm.hdcp_workqueue = hdcp_create_workqueue(adev, &init_params.cp_psp, adev->dm.dc);
 
                if (!adev->dm.hdcp_workqueue)
@@ -1058,9 +1058,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
                goto error;
        }
 
-       /* Update the actual used number of crtc */
-       adev->mode_info.num_crtc = adev->dm.display_indexes_num;
-
        /* create fake encoders for MST */
        dm_dp_create_fake_mst_encoders(adev);
 
@@ -3251,6 +3248,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
        enum dc_connection_type new_connection_type = dc_connection_none;
        const struct dc_plane_cap *plane;
 
+       dm->display_indexes_num = dm->dc->caps.max_streams;
+       /* Update the actual used number of crtc */
+       adev->mode_info.num_crtc = adev->dm.display_indexes_num;
+
        link_cnt = dm->dc->caps.max_links;
        if (amdgpu_dm_mode_config_init(dm->adev)) {
                DRM_ERROR("DM: Failed to initialize mode config\n");
@@ -3312,8 +3313,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
                        goto fail;
                }
 
-       dm->display_indexes_num = dm->dc->caps.max_streams;
-
        /* loops over all connectors on the board */
        for (i = 0; i < link_cnt; i++) {
                struct dc_link *link = NULL;
@@ -7506,7 +7505,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
        bool mode_set_reset_required = false;
 
        drm_atomic_helper_update_legacy_modeset_state(dev, state);
-       drm_atomic_helper_calc_timestamping_constants(state);
 
        dm_state = dm_atomic_get_new_state(state);
        if (dm_state && dm_state->context) {
@@ -7533,6 +7531,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
                }
        }
 
+       drm_atomic_helper_calc_timestamping_constants(state);
+
        /* update changed items */
        for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
                struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
index 2f8fee0..6b431db 100644 (file)
@@ -163,8 +163,17 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base,
                        new_clocks->dppclk_khz = 100000;
        }
 
-       if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) {
-               if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz)
+       /*
+        * Temporally ignore thew 0 cases for disp and dpp clks.
+        * We may have a new feature that requires 0 clks in the future.
+        */
+       if (new_clocks->dppclk_khz == 0 || new_clocks->dispclk_khz == 0) {
+               new_clocks->dppclk_khz = clk_mgr_base->clks.dppclk_khz;
+               new_clocks->dispclk_khz = clk_mgr_base->clks.dispclk_khz;
+       }
+
+       if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr_base->clks.dppclk_khz)) {
+               if (clk_mgr_base->clks.dppclk_khz > new_clocks->dppclk_khz)
                        dpp_clock_lowered = true;
                clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz;
                update_dppclk = true;
@@ -570,7 +579,7 @@ static struct clk_bw_params rn_bw_params = {
 
 };
 
-static struct wm_table ddr4_wm_table = {
+static struct wm_table ddr4_wm_table_gs = {
        .entries = {
                {
                        .wm_inst = WM_A,
@@ -607,7 +616,7 @@ static struct wm_table ddr4_wm_table = {
        }
 };
 
-static struct wm_table lpddr4_wm_table = {
+static struct wm_table lpddr4_wm_table_gs = {
        .entries = {
                {
                        .wm_inst = WM_A,
@@ -681,6 +690,80 @@ static struct wm_table lpddr4_wm_table_with_disabled_ppt = {
        }
 };
 
+static struct wm_table ddr4_wm_table_rn = {
+       .entries = {
+               {
+                       .wm_inst = WM_A,
+                       .wm_type = WM_TYPE_PSTATE_CHG,
+                       .pstate_latency_us = 11.72,
+                       .sr_exit_time_us = 9.09,
+                       .sr_enter_plus_exit_time_us = 10.14,
+                       .valid = true,
+               },
+               {
+                       .wm_inst = WM_B,
+                       .wm_type = WM_TYPE_PSTATE_CHG,
+                       .pstate_latency_us = 11.72,
+                       .sr_exit_time_us = 10.12,
+                       .sr_enter_plus_exit_time_us = 11.48,
+                       .valid = true,
+               },
+               {
+                       .wm_inst = WM_C,
+                       .wm_type = WM_TYPE_PSTATE_CHG,
+                       .pstate_latency_us = 11.72,
+                       .sr_exit_time_us = 10.12,
+                       .sr_enter_plus_exit_time_us = 11.48,
+                       .valid = true,
+               },
+               {
+                       .wm_inst = WM_D,
+                       .wm_type = WM_TYPE_PSTATE_CHG,
+                       .pstate_latency_us = 11.72,
+                       .sr_exit_time_us = 10.12,
+                       .sr_enter_plus_exit_time_us = 11.48,
+                       .valid = true,
+               },
+       }
+};
+
+static struct wm_table lpddr4_wm_table_rn = {
+       .entries = {
+               {
+                       .wm_inst = WM_A,
+                       .wm_type = WM_TYPE_PSTATE_CHG,
+                       .pstate_latency_us = 11.65333,
+                       .sr_exit_time_us = 7.32,
+                       .sr_enter_plus_exit_time_us = 8.38,
+                       .valid = true,
+               },
+               {
+                       .wm_inst = WM_B,
+                       .wm_type = WM_TYPE_PSTATE_CHG,
+                       .pstate_latency_us = 11.65333,
+                       .sr_exit_time_us = 9.82,
+                       .sr_enter_plus_exit_time_us = 11.196,
+                       .valid = true,
+               },
+               {
+                       .wm_inst = WM_C,
+                       .wm_type = WM_TYPE_PSTATE_CHG,
+                       .pstate_latency_us = 11.65333,
+                       .sr_exit_time_us = 9.89,
+                       .sr_enter_plus_exit_time_us = 11.24,
+                       .valid = true,
+               },
+               {
+                       .wm_inst = WM_D,
+                       .wm_type = WM_TYPE_PSTATE_CHG,
+                       .pstate_latency_us = 11.65333,
+                       .sr_exit_time_us = 9.748,
+                       .sr_enter_plus_exit_time_us = 11.102,
+                       .valid = true,
+               },
+       }
+};
+
 static unsigned int find_dcfclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage)
 {
        int i;
@@ -762,6 +845,11 @@ void rn_clk_mgr_construct(
        struct dc_debug_options *debug = &ctx->dc->debug;
        struct dpm_clocks clock_table = { 0 };
        enum pp_smu_status status = 0;
+       int is_green_sardine = 0;
+
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+       is_green_sardine = ASICREV_IS_GREEN_SARDINE(ctx->asic_id.hw_internal_rev);
+#endif
 
        clk_mgr->base.ctx = ctx;
        clk_mgr->base.funcs = &dcn21_funcs;
@@ -802,10 +890,16 @@ void rn_clk_mgr_construct(
                        if (clk_mgr->periodic_retraining_disabled) {
                                rn_bw_params.wm_table = lpddr4_wm_table_with_disabled_ppt;
                        } else {
-                               rn_bw_params.wm_table = lpddr4_wm_table;
+                               if (is_green_sardine)
+                                       rn_bw_params.wm_table = lpddr4_wm_table_gs;
+                               else
+                                       rn_bw_params.wm_table = lpddr4_wm_table_rn;
                        }
                } else {
-                       rn_bw_params.wm_table = ddr4_wm_table;
+                       if (is_green_sardine)
+                               rn_bw_params.wm_table = ddr4_wm_table_gs;
+                       else
+                               rn_bw_params.wm_table = ddr4_wm_table_rn;
                }
                /* Saved clocks configured at boot for debug purposes */
                rn_dump_clk_registers(&clk_mgr->base.boot_snapshot, &clk_mgr->base, &log_info);
index fec87a2..5b0cedf 100644 (file)
@@ -3394,10 +3394,13 @@ uint32_t dc_bandwidth_in_kbps_from_timing(
 {
        uint32_t bits_per_channel = 0;
        uint32_t kbps;
+       struct fixed31_32 link_bw_kbps;
 
        if (timing->flags.DSC) {
-               kbps = (timing->pix_clk_100hz * timing->dsc_cfg.bits_per_pixel);
-               kbps = kbps / 160 + ((kbps % 160) ? 1 : 0);
+               link_bw_kbps = dc_fixpt_from_int(timing->pix_clk_100hz);
+               link_bw_kbps = dc_fixpt_div_int(link_bw_kbps, 160);
+               link_bw_kbps = dc_fixpt_mul_int(link_bw_kbps, timing->dsc_cfg.bits_per_pixel);
+               kbps = dc_fixpt_ceil(link_bw_kbps);
                return kbps;
        }
 
index 2a1fea5..3f1e7a1 100644 (file)
@@ -299,8 +299,8 @@ irq_source_info_dcn20[DAL_IRQ_SOURCES_NUMBER] = {
        pflip_int_entry(1),
        pflip_int_entry(2),
        pflip_int_entry(3),
-       [DC_IRQ_SOURCE_PFLIP5] = dummy_irq_entry(),
-       [DC_IRQ_SOURCE_PFLIP6] = dummy_irq_entry(),
+       pflip_int_entry(4),
+       pflip_int_entry(5),
        [DC_IRQ_SOURCE_PFLIP_UNDERLAY0] = dummy_irq_entry(),
        gpio_pad_int_entry(0),
        gpio_pad_int_entry(1),
index b965205..9e837a5 100644 (file)
 #define FEATURE_CORE_CSTATES_MASK     (1 << FEATURE_CORE_CSTATES_BIT)
 
 /* Workload bits */
-#define WORKLOAD_DEFAULT_BIT              0
-#define WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT 1
-#define WORKLOAD_PPLIB_POWER_SAVING_BIT   2
-#define WORKLOAD_PPLIB_VIDEO_BIT          3
-#define WORKLOAD_PPLIB_VR_BIT             4
-#define WORKLOAD_PPLIB_COMPUTE_BIT        5
-#define WORKLOAD_PPLIB_CUSTOM_BIT         6
-#define WORKLOAD_PPLIB_COUNT              7
+#define WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT 0
+#define WORKLOAD_PPLIB_VIDEO_BIT          2
+#define WORKLOAD_PPLIB_VR_BIT             3
+#define WORKLOAD_PPLIB_COMPUTE_BIT        4
+#define WORKLOAD_PPLIB_CUSTOM_BIT         5
+#define WORKLOAD_PPLIB_COUNT              6
 
 typedef struct {
        /* MP1_EXT_SCRATCH0 */
index 719597c..6606511 100644 (file)
@@ -24,6 +24,8 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/pci.h>
+
 #include <drm/amdgpu_drm.h>
 #include "processpptables.h"
 #include <atom-types.h>
@@ -984,6 +986,8 @@ static int init_thermal_controller(
                        struct pp_hwmgr *hwmgr,
                        const ATOM_PPLIB_POWERPLAYTABLE *powerplay_table)
 {
+       struct amdgpu_device *adev = hwmgr->adev;
+
        hwmgr->thermal_controller.ucType =
                        powerplay_table->sThermalController.ucType;
        hwmgr->thermal_controller.ucI2cLine =
@@ -1008,7 +1012,104 @@ static int init_thermal_controller(
                   ATOM_PP_THERMALCONTROLLER_NONE != hwmgr->thermal_controller.ucType,
                   PHM_PlatformCaps_ThermalController);
 
-       hwmgr->thermal_controller.use_hw_fan_control = 1;
+        if (powerplay_table->usTableSize >= sizeof(ATOM_PPLIB_POWERPLAYTABLE3)) {
+               const ATOM_PPLIB_POWERPLAYTABLE3 *powerplay_table3 =
+                       (const ATOM_PPLIB_POWERPLAYTABLE3 *)powerplay_table;
+
+               if (0 == le16_to_cpu(powerplay_table3->usFanTableOffset)) {
+                       hwmgr->thermal_controller.use_hw_fan_control = 1;
+                       return 0;
+               } else {
+                       const ATOM_PPLIB_FANTABLE *fan_table =
+                               (const ATOM_PPLIB_FANTABLE *)(((unsigned long)powerplay_table) +
+                                                             le16_to_cpu(powerplay_table3->usFanTableOffset));
+
+                       if (1 <= fan_table->ucFanTableFormat) {
+                               hwmgr->thermal_controller.advanceFanControlParameters.ucTHyst =
+                                       fan_table->ucTHyst;
+                               hwmgr->thermal_controller.advanceFanControlParameters.usTMin =
+                                       le16_to_cpu(fan_table->usTMin);
+                               hwmgr->thermal_controller.advanceFanControlParameters.usTMed =
+                                       le16_to_cpu(fan_table->usTMed);
+                               hwmgr->thermal_controller.advanceFanControlParameters.usTHigh =
+                                       le16_to_cpu(fan_table->usTHigh);
+                               hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin =
+                                       le16_to_cpu(fan_table->usPWMMin);
+                               hwmgr->thermal_controller.advanceFanControlParameters.usPWMMed =
+                                       le16_to_cpu(fan_table->usPWMMed);
+                               hwmgr->thermal_controller.advanceFanControlParameters.usPWMHigh =
+                                       le16_to_cpu(fan_table->usPWMHigh);
+                               hwmgr->thermal_controller.advanceFanControlParameters.usTMax = 10900;
+                               hwmgr->thermal_controller.advanceFanControlParameters.ulCycleDelay = 100000;
+
+                               phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+                                           PHM_PlatformCaps_MicrocodeFanControl);
+                       }
+
+                       if (2 <= fan_table->ucFanTableFormat) {
+                               const ATOM_PPLIB_FANTABLE2 *fan_table2 =
+                                       (const ATOM_PPLIB_FANTABLE2 *)(((unsigned long)powerplay_table) +
+                                                                      le16_to_cpu(powerplay_table3->usFanTableOffset));
+                               hwmgr->thermal_controller.advanceFanControlParameters.usTMax =
+                                       le16_to_cpu(fan_table2->usTMax);
+                       }
+
+                       if (3 <= fan_table->ucFanTableFormat) {
+                               const ATOM_PPLIB_FANTABLE3 *fan_table3 =
+                                       (const ATOM_PPLIB_FANTABLE3 *) (((unsigned long)powerplay_table) +
+                                                                       le16_to_cpu(powerplay_table3->usFanTableOffset));
+
+                               hwmgr->thermal_controller.advanceFanControlParameters.ucFanControlMode =
+                                       fan_table3->ucFanControlMode;
+
+                               if ((3 == fan_table->ucFanTableFormat) &&
+                                   (0x67B1 == adev->pdev->device))
+                                       hwmgr->thermal_controller.advanceFanControlParameters.usDefaultMaxFanPWM =
+                                               47;
+                               else
+                                       hwmgr->thermal_controller.advanceFanControlParameters.usDefaultMaxFanPWM =
+                                               le16_to_cpu(fan_table3->usFanPWMMax);
+
+                               hwmgr->thermal_controller.advanceFanControlParameters.usDefaultFanOutputSensitivity =
+                                       4836;
+                               hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity =
+                                       le16_to_cpu(fan_table3->usFanOutputSensitivity);
+                       }
+
+                       if (6 <= fan_table->ucFanTableFormat) {
+                               const ATOM_PPLIB_FANTABLE4 *fan_table4 =
+                                       (const ATOM_PPLIB_FANTABLE4 *)(((unsigned long)powerplay_table) +
+                                                                      le16_to_cpu(powerplay_table3->usFanTableOffset));
+
+                               phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+                                           PHM_PlatformCaps_FanSpeedInTableIsRPM);
+
+                               hwmgr->thermal_controller.advanceFanControlParameters.usDefaultMaxFanRPM =
+                                       le16_to_cpu(fan_table4->usFanRPMMax);
+                       }
+
+                       if (7 <= fan_table->ucFanTableFormat) {
+                               const ATOM_PPLIB_FANTABLE5 *fan_table5 =
+                                       (const ATOM_PPLIB_FANTABLE5 *)(((unsigned long)powerplay_table) +
+                                                                      le16_to_cpu(powerplay_table3->usFanTableOffset));
+
+                               if (0x67A2 == adev->pdev->device ||
+                                   0x67A9 == adev->pdev->device ||
+                                   0x67B9 == adev->pdev->device) {
+                                       phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+                                                   PHM_PlatformCaps_GeminiRegulatorFanControlSupport);
+                                       hwmgr->thermal_controller.advanceFanControlParameters.usFanCurrentLow =
+                                               le16_to_cpu(fan_table5->usFanCurrentLow);
+                                       hwmgr->thermal_controller.advanceFanControlParameters.usFanCurrentHigh =
+                                               le16_to_cpu(fan_table5->usFanCurrentHigh);
+                                       hwmgr->thermal_controller.advanceFanControlParameters.usFanRPMLow =
+                                               le16_to_cpu(fan_table5->usFanRPMLow);
+                                       hwmgr->thermal_controller.advanceFanControlParameters.usFanRPMHigh =
+                                               le16_to_cpu(fan_table5->usFanRPMHigh);
+                               }
+                       }
+               }
+       }
 
        return 0;
 }
index cf60f39..e6f40ee 100644 (file)
@@ -1297,15 +1297,9 @@ static int conv_power_profile_to_pplib_workload(int power_profile)
        int pplib_workload = 0;
 
        switch (power_profile) {
-       case PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT:
-               pplib_workload = WORKLOAD_DEFAULT_BIT;
-               break;
        case PP_SMC_POWER_PROFILE_FULLSCREEN3D:
                pplib_workload = WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT;
                break;
-       case PP_SMC_POWER_PROFILE_POWERSAVING:
-               pplib_workload = WORKLOAD_PPLIB_POWER_SAVING_BIT;
-               break;
        case PP_SMC_POWER_PROFILE_VIDEO:
                pplib_workload = WORKLOAD_PPLIB_VIDEO_BIT;
                break;
@@ -1315,6 +1309,9 @@ static int conv_power_profile_to_pplib_workload(int power_profile)
        case PP_SMC_POWER_PROFILE_COMPUTE:
                pplib_workload = WORKLOAD_PPLIB_COMPUTE_BIT;
                break;
+       case PP_SMC_POWER_PROFILE_CUSTOM:
+               pplib_workload = WORKLOAD_PPLIB_CUSTOM_BIT;
+               break;
        }
 
        return pplib_workload;
index 895d89b..cf7c4f0 100644 (file)
@@ -217,7 +217,7 @@ static struct cmn2asic_mapping sienna_cichlid_workload_map[PP_SMC_POWER_PROFILE_
        WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING,          WORKLOAD_PPLIB_POWER_SAVING_BIT),
        WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO,                WORKLOAD_PPLIB_VIDEO_BIT),
        WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR,                   WORKLOAD_PPLIB_VR_BIT),
-       WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE,              WORKLOAD_PPLIB_CUSTOM_BIT),
+       WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE,              WORKLOAD_PPLIB_COMPUTE_BIT),
        WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM,               WORKLOAD_PPLIB_CUSTOM_BIT),
 };
 
index 2380759..6db96fa 100644 (file)
@@ -1164,7 +1164,12 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu,
        if (ret)
                return ret;
 
-       crystal_clock_freq = amdgpu_asic_get_xclk(adev);
+       /*
+        * crystal_clock_freq div by 4 is required since the fan control
+        * module refers to 25MHz
+        */
+
+       crystal_clock_freq = amdgpu_asic_get_xclk(adev) / 4;
        tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed);
        WREG32_SOC15(THM, 0, mmCG_TACH_CTRL,
                     REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_CTRL),
index 834a156..0a1e1cf 100644 (file)
@@ -742,7 +742,6 @@ static void ast_crtc_dpms(struct drm_crtc *crtc, int mode)
        case DRM_MODE_DPMS_SUSPEND:
                if (ast->tx_chip_type == AST_TX_DP501)
                        ast_set_dp501_video_output(crtc->dev, 1);
-               ast_crtc_load_lut(ast, crtc);
                break;
        case DRM_MODE_DPMS_OFF:
                if (ast->tx_chip_type == AST_TX_DP501)
@@ -778,6 +777,21 @@ static int ast_crtc_helper_atomic_check(struct drm_crtc *crtc,
 }
 
 static void
+ast_crtc_helper_atomic_flush(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state)
+{
+       struct ast_private *ast = to_ast_private(crtc->dev);
+       struct ast_crtc_state *ast_crtc_state = to_ast_crtc_state(crtc->state);
+       struct ast_crtc_state *old_ast_crtc_state = to_ast_crtc_state(old_crtc_state);
+
+       /*
+        * The gamma LUT has to be reloaded after changing the primary
+        * plane's color format.
+        */
+       if (old_ast_crtc_state->format != ast_crtc_state->format)
+               ast_crtc_load_lut(ast, crtc);
+}
+
+static void
 ast_crtc_helper_atomic_enable(struct drm_crtc *crtc,
                              struct drm_crtc_state *old_crtc_state)
 {
@@ -830,6 +844,7 @@ ast_crtc_helper_atomic_disable(struct drm_crtc *crtc,
 
 static const struct drm_crtc_helper_funcs ast_crtc_helper_funcs = {
        .atomic_check = ast_crtc_helper_atomic_check,
+       .atomic_flush = ast_crtc_helper_atomic_flush,
        .atomic_enable = ast_crtc_helper_atomic_enable,
        .atomic_disable = ast_crtc_helper_atomic_disable,
 };
index 748df1c..0c79a9b 100644 (file)
@@ -2327,12 +2327,6 @@ static enum drm_connector_status dw_hdmi_detect(struct dw_hdmi *hdmi)
 {
        enum drm_connector_status result;
 
-       mutex_lock(&hdmi->mutex);
-       hdmi->force = DRM_FORCE_UNSPECIFIED;
-       dw_hdmi_update_power(hdmi);
-       dw_hdmi_update_phy_mask(hdmi);
-       mutex_unlock(&hdmi->mutex);
-
        result = hdmi->phy.ops->read_hpd(hdmi, hdmi->phy.data);
 
        mutex_lock(&hdmi->mutex);
index 50cad0e..375c79e 100644 (file)
@@ -140,7 +140,7 @@ static void drm_gem_vram_placement(struct drm_gem_vram_object *gbo,
        unsigned int c = 0;
 
        if (pl_flag & DRM_GEM_VRAM_PL_FLAG_TOPDOWN)
-               pl_flag = TTM_PL_FLAG_TOPDOWN;
+               invariant_flags = TTM_PL_FLAG_TOPDOWN;
 
        gbo->placement.placement = gbo->placements;
        gbo->placement.busy_placement = gbo->placements;
index 6417f37..951d5f7 100644 (file)
@@ -1,7 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config DRM_EXYNOS
        tristate "DRM Support for Samsung SoC Exynos Series"
-       depends on OF && DRM && (ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || ARCH_MULTIPLATFORM || COMPILE_TEST)
+       depends on OF && DRM && COMMON_CLK
+       depends on ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || ARCH_MULTIPLATFORM || COMPILE_TEST
        depends on MMU
        select DRM_KMS_HELPER
        select VIDEOMODE_HELPERS
index 31337d2..aabf09f 100644 (file)
@@ -12878,10 +12878,11 @@ compute_sink_pipe_bpp(const struct drm_connector_state *conn_state,
        case 10 ... 11:
                bpp = 10 * 3;
                break;
-       case 12:
+       case 12 ... 16:
                bpp = 12 * 3;
                break;
        default:
+               MISSING_CASE(conn_state->max_bpc);
                return -EINVAL;
        }
 
@@ -18020,16 +18021,6 @@ int intel_modeset_init_nogem(struct drm_i915_private *i915)
        if (!HAS_GMCH(i915))
                sanitize_watermarks(i915);
 
-       /*
-        * Force all active planes to recompute their states. So that on
-        * mode_setcrtc after probe, all the intel_plane_state variables
-        * are already calculated and there is no assert_plane warnings
-        * during bootup.
-        */
-       ret = intel_initial_commit(dev);
-       if (ret)
-               drm_dbg_kms(&i915->drm, "Initial commit in probe failed.\n");
-
        return 0;
 }
 
@@ -18038,11 +18029,21 @@ int intel_modeset_init(struct drm_i915_private *i915)
 {
        int ret;
 
-       intel_overlay_setup(i915);
-
        if (!HAS_DISPLAY(i915))
                return 0;
 
+       /*
+        * Force all active planes to recompute their states. So that on
+        * mode_setcrtc after probe, all the intel_plane_state variables
+        * are already calculated and there is no assert_plane warnings
+        * during bootup.
+        */
+       ret = intel_initial_commit(&i915->drm);
+       if (ret)
+               drm_dbg_kms(&i915->drm, "Initial modeset failed, %d\n", ret);
+
+       intel_overlay_setup(i915);
+
        ret = intel_fbdev_init(&i915->drm);
        if (ret)
                return ret;
index bf1e9cf..9bc59fd 100644 (file)
@@ -573,7 +573,7 @@ static u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp,
                return 0;
        }
        /* Also take into account max slice width */
-       min_slice_count = min_t(u8, min_slice_count,
+       min_slice_count = max_t(u8, min_slice_count,
                                DIV_ROUND_UP(mode_hdisplay,
                                             max_slice_width));
 
index 1904e6e..b07dc11 100644 (file)
@@ -3097,7 +3097,7 @@ static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
                        break;
 }
 
-static void eb_request_add(struct i915_execbuffer *eb)
+static int eb_request_add(struct i915_execbuffer *eb, int err)
 {
        struct i915_request *rq = eb->request;
        struct intel_timeline * const tl = i915_request_timeline(rq);
@@ -3118,6 +3118,7 @@ static void eb_request_add(struct i915_execbuffer *eb)
                /* Serialise with context_close via the add_to_timeline */
                i915_request_set_error_once(rq, -ENOENT);
                __i915_request_skip(rq);
+               err = -ENOENT; /* override any transient errors */
        }
 
        __i915_request_queue(rq, &attr);
@@ -3127,6 +3128,8 @@ static void eb_request_add(struct i915_execbuffer *eb)
                retire_requests(tl, prev);
 
        mutex_unlock(&tl->mutex);
+
+       return err;
 }
 
 static const i915_user_extension_fn execbuf_extensions[] = {
@@ -3332,7 +3335,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
        err = eb_submit(&eb, batch);
 err_request:
        i915_request_get(eb.request);
-       eb_request_add(&eb);
+       err = eb_request_add(&eb, err);
 
        if (eb.fences)
                signal_fence_array(&eb);
index d8b206e..a24cc1f 100644 (file)
 #include "i915_trace.h"
 #include "intel_breadcrumbs.h"
 #include "intel_context.h"
+#include "intel_engine_pm.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_requests.h"
 
-static void irq_enable(struct intel_engine_cs *engine)
+static bool irq_enable(struct intel_engine_cs *engine)
 {
        if (!engine->irq_enable)
-               return;
+               return false;
 
        /* Caller disables interrupts */
        spin_lock(&engine->gt->irq_lock);
        engine->irq_enable(engine);
        spin_unlock(&engine->gt->irq_lock);
+
+       return true;
 }
 
 static void irq_disable(struct intel_engine_cs *engine)
@@ -57,12 +60,11 @@ static void irq_disable(struct intel_engine_cs *engine)
 
 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 {
-       lockdep_assert_held(&b->irq_lock);
-
-       if (!b->irq_engine || b->irq_armed)
-               return;
-
-       if (!intel_gt_pm_get_if_awake(b->irq_engine->gt))
+       /*
+        * Since we are waiting on a request, the GPU should be busy
+        * and should have its own rpm reference.
+        */
+       if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
                return;
 
        /*
@@ -73,25 +75,24 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
         */
        WRITE_ONCE(b->irq_armed, true);
 
-       /*
-        * Since we are waiting on a request, the GPU should be busy
-        * and should have its own rpm reference. This is tracked
-        * by i915->gt.awake, we can forgo holding our own wakref
-        * for the interrupt as before i915->gt.awake is released (when
-        * the driver is idle) we disarm the breadcrumbs.
-        */
-
-       if (!b->irq_enabled++)
-               irq_enable(b->irq_engine);
+       /* Requests may have completed before we could enable the interrupt. */
+       if (!b->irq_enabled++ && irq_enable(b->irq_engine))
+               irq_work_queue(&b->irq_work);
 }
 
-static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 {
-       lockdep_assert_held(&b->irq_lock);
-
-       if (!b->irq_engine || !b->irq_armed)
+       if (!b->irq_engine)
                return;
 
+       spin_lock(&b->irq_lock);
+       if (!b->irq_armed)
+               __intel_breadcrumbs_arm_irq(b);
+       spin_unlock(&b->irq_lock);
+}
+
+static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+{
        GEM_BUG_ON(!b->irq_enabled);
        if (!--b->irq_enabled)
                irq_disable(b->irq_engine);
@@ -100,20 +101,37 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
        intel_gt_pm_put_async(b->irq_engine->gt);
 }
 
+static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+{
+       spin_lock(&b->irq_lock);
+       if (b->irq_armed)
+               __intel_breadcrumbs_disarm_irq(b);
+       spin_unlock(&b->irq_lock);
+}
+
 static void add_signaling_context(struct intel_breadcrumbs *b,
                                  struct intel_context *ce)
 {
-       intel_context_get(ce);
-       list_add_tail(&ce->signal_link, &b->signalers);
-       if (list_is_first(&ce->signal_link, &b->signalers))
-               __intel_breadcrumbs_arm_irq(b);
+       lockdep_assert_held(&ce->signal_lock);
+
+       spin_lock(&b->signalers_lock);
+       list_add_rcu(&ce->signal_link, &b->signalers);
+       spin_unlock(&b->signalers_lock);
 }
 
-static void remove_signaling_context(struct intel_breadcrumbs *b,
+static bool remove_signaling_context(struct intel_breadcrumbs *b,
                                     struct intel_context *ce)
 {
-       list_del(&ce->signal_link);
-       intel_context_put(ce);
+       lockdep_assert_held(&ce->signal_lock);
+
+       if (!list_empty(&ce->signals))
+               return false;
+
+       spin_lock(&b->signalers_lock);
+       list_del_rcu(&ce->signal_link);
+       spin_unlock(&b->signalers_lock);
+
+       return true;
 }
 
 static inline bool __request_completed(const struct i915_request *rq)
@@ -174,73 +192,103 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
                intel_engine_add_retire(b->irq_engine, tl);
 }
 
-static bool __signal_request(struct i915_request *rq, struct list_head *signals)
+static bool __signal_request(struct i915_request *rq)
 {
-       clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+       GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
 
        if (!__dma_fence_signal(&rq->fence)) {
                i915_request_put(rq);
                return false;
        }
 
-       list_add_tail(&rq->signal_link, signals);
        return true;
 }
 
+static struct llist_node *
+slist_add(struct llist_node *node, struct llist_node *head)
+{
+       node->next = head;
+       return node;
+}
+
 static void signal_irq_work(struct irq_work *work)
 {
        struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
        const ktime_t timestamp = ktime_get();
-       struct intel_context *ce, *cn;
-       struct list_head *pos, *next;
-       LIST_HEAD(signal);
-
-       spin_lock(&b->irq_lock);
+       struct llist_node *signal, *sn;
+       struct intel_context *ce;
 
-       if (list_empty(&b->signalers))
-               __intel_breadcrumbs_disarm_irq(b);
+       signal = NULL;
+       if (unlikely(!llist_empty(&b->signaled_requests)))
+               signal = llist_del_all(&b->signaled_requests);
 
-       list_splice_init(&b->signaled_requests, &signal);
+       /*
+        * Keep the irq armed until the interrupt after all listeners are gone.
+        *
+        * Enabling/disabling the interrupt is rather costly, roughly a couple
+        * of hundred microseconds. If we are proactive and enable/disable
+        * the interrupt around every request that wants a breadcrumb, we
+        * quickly drown in the extra orders of magnitude of latency imposed
+        * on request submission.
+        *
+        * So we try to be lazy, and keep the interrupts enabled until no
+        * more listeners appear within a breadcrumb interrupt interval (that
+        * is until a request completes that no one cares about). The
+        * observation is that listeners come in batches, and will often
+        * listen to a bunch of requests in succession. Though note on icl+,
+        * interrupts are always enabled due to concerns with rc6 being
+        * dysfunctional with per-engine interrupt masking.
+        *
+        * We also try to avoid raising too many interrupts, as they may
+        * be generated by userspace batches and it is unfortunately rather
+        * too easy to drown the CPU under a flood of GPU interrupts. Thus
+        * whenever no one appears to be listening, we turn off the interrupts.
+        * Fewer interrupts should conserve power -- at the very least, fewer
+        * interrupt draw less ire from other users of the system and tools
+        * like powertop.
+        */
+       if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers))
+               intel_breadcrumbs_disarm_irq(b);
 
-       list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
-               GEM_BUG_ON(list_empty(&ce->signals));
+       rcu_read_lock();
+       list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
+               struct i915_request *rq;
 
-               list_for_each_safe(pos, next, &ce->signals) {
-                       struct i915_request *rq =
-                               list_entry(pos, typeof(*rq), signal_link);
+               list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
+                       bool release;
 
-                       GEM_BUG_ON(!check_signal_order(ce, rq));
                        if (!__request_completed(rq))
                                break;
 
+                       if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
+                                               &rq->fence.flags))
+                               break;
+
                        /*
                         * Queue for execution after dropping the signaling
                         * spinlock as the callback chain may end up adding
                         * more signalers to the same context or engine.
                         */
-                       __signal_request(rq, &signal);
-               }
+                       spin_lock(&ce->signal_lock);
+                       list_del_rcu(&rq->signal_link);
+                       release = remove_signaling_context(b, ce);
+                       spin_unlock(&ce->signal_lock);
 
-               /*
-                * We process the list deletion in bulk, only using a list_add
-                * (not list_move) above but keeping the status of
-                * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit.
-                */
-               if (!list_is_first(pos, &ce->signals)) {
-                       /* Advance the list to the first incomplete request */
-                       __list_del_many(&ce->signals, pos);
-                       if (&ce->signals == pos) { /* now empty */
+                       if (__signal_request(rq))
+                               /* We own signal_node now, xfer to local list */
+                               signal = slist_add(&rq->signal_node, signal);
+
+                       if (release) {
                                add_retire(b, ce->timeline);
-                               remove_signaling_context(b, ce);
+                               intel_context_put(ce);
                        }
                }
        }
+       rcu_read_unlock();
 
-       spin_unlock(&b->irq_lock);
-
-       list_for_each_safe(pos, next, &signal) {
+       llist_for_each_safe(signal, sn, signal) {
                struct i915_request *rq =
-                       list_entry(pos, typeof(*rq), signal_link);
+                       llist_entry(signal, typeof(*rq), signal_node);
                struct list_head cb_list;
 
                spin_lock(&rq->lock);
@@ -251,6 +299,9 @@ static void signal_irq_work(struct irq_work *work)
 
                i915_request_put(rq);
        }
+
+       if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
+               intel_breadcrumbs_arm_irq(b);
 }
 
 struct intel_breadcrumbs *
@@ -262,14 +313,15 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
        if (!b)
                return NULL;
 
-       spin_lock_init(&b->irq_lock);
+       b->irq_engine = irq_engine;
+
+       spin_lock_init(&b->signalers_lock);
        INIT_LIST_HEAD(&b->signalers);
-       INIT_LIST_HEAD(&b->signaled_requests);
+       init_llist_head(&b->signaled_requests);
 
+       spin_lock_init(&b->irq_lock);
        init_irq_work(&b->irq_work, signal_irq_work);
 
-       b->irq_engine = irq_engine;
-
        return b;
 }
 
@@ -292,27 +344,28 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
 
 void intel_breadcrumbs_park(struct intel_breadcrumbs *b)
 {
-       unsigned long flags;
-
-       if (!READ_ONCE(b->irq_armed))
-               return;
-
-       spin_lock_irqsave(&b->irq_lock, flags);
-       __intel_breadcrumbs_disarm_irq(b);
-       spin_unlock_irqrestore(&b->irq_lock, flags);
-
-       if (!list_empty(&b->signalers))
-               irq_work_queue(&b->irq_work);
+       /* Kick the work once more to drain the signalers */
+       irq_work_sync(&b->irq_work);
+       while (unlikely(READ_ONCE(b->irq_armed))) {
+               local_irq_disable();
+               signal_irq_work(&b->irq_work);
+               local_irq_enable();
+               cond_resched();
+       }
+       GEM_BUG_ON(!list_empty(&b->signalers));
 }
 
 void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
 {
+       irq_work_sync(&b->irq_work);
+       GEM_BUG_ON(!list_empty(&b->signalers));
+       GEM_BUG_ON(b->irq_armed);
        kfree(b);
 }
 
-static void insert_breadcrumb(struct i915_request *rq,
-                             struct intel_breadcrumbs *b)
+static void insert_breadcrumb(struct i915_request *rq)
 {
+       struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
        struct intel_context *ce = rq->context;
        struct list_head *pos;
 
@@ -327,12 +380,14 @@ static void insert_breadcrumb(struct i915_request *rq,
         * its signal completion.
         */
        if (__request_completed(rq)) {
-               if (__signal_request(rq, &b->signaled_requests))
+               if (__signal_request(rq) &&
+                   llist_add(&rq->signal_node, &b->signaled_requests))
                        irq_work_queue(&b->irq_work);
                return;
        }
 
        if (list_empty(&ce->signals)) {
+               intel_context_get(ce);
                add_signaling_context(b, ce);
                pos = &ce->signals;
        } else {
@@ -358,18 +413,22 @@ static void insert_breadcrumb(struct i915_request *rq,
                                break;
                }
        }
-       list_add(&rq->signal_link, pos);
+       list_add_rcu(&rq->signal_link, pos);
        GEM_BUG_ON(!check_signal_order(ce, rq));
+       GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
        set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
 
-       /* Check after attaching to irq, interrupt may have already fired. */
-       if (__request_completed(rq))
-               irq_work_queue(&b->irq_work);
+       /*
+        * Defer enabling the interrupt to after HW submission and recheck
+        * the request as it may have completed and raised the interrupt as
+        * we were attaching it into the lists.
+        */
+       irq_work_queue(&b->irq_work);
 }
 
 bool i915_request_enable_breadcrumb(struct i915_request *rq)
 {
-       struct intel_breadcrumbs *b;
+       struct intel_context *ce = rq->context;
 
        /* Serialises with i915_request_retire() using rq->lock */
        if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
@@ -384,67 +443,30 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
        if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
                return true;
 
-       /*
-        * rq->engine is locked by rq->engine->active.lock. That however
-        * is not known until after rq->engine has been dereferenced and
-        * the lock acquired. Hence we acquire the lock and then validate
-        * that rq->engine still matches the lock we hold for it.
-        *
-        * Here, we are using the breadcrumb lock as a proxy for the
-        * rq->engine->active.lock, and we know that since the breadcrumb
-        * will be serialised within i915_request_submit/i915_request_unsubmit,
-        * the engine cannot change while active as long as we hold the
-        * breadcrumb lock on that engine.
-        *
-        * From the dma_fence_enable_signaling() path, we are outside of the
-        * request submit/unsubmit path, and so we must be more careful to
-        * acquire the right lock.
-        */
-       b = READ_ONCE(rq->engine)->breadcrumbs;
-       spin_lock(&b->irq_lock);
-       while (unlikely(b != READ_ONCE(rq->engine)->breadcrumbs)) {
-               spin_unlock(&b->irq_lock);
-               b = READ_ONCE(rq->engine)->breadcrumbs;
-               spin_lock(&b->irq_lock);
-       }
-
-       /*
-        * Now that we are finally serialised with request submit/unsubmit,
-        * [with b->irq_lock] and with i915_request_retire() [via checking
-        * SIGNALED with rq->lock] confirm the request is indeed active. If
-        * it is no longer active, the breadcrumb will be attached upon
-        * i915_request_submit().
-        */
+       spin_lock(&ce->signal_lock);
        if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
-               insert_breadcrumb(rq, b);
-
-       spin_unlock(&b->irq_lock);
+               insert_breadcrumb(rq);
+       spin_unlock(&ce->signal_lock);
 
        return true;
 }
 
 void i915_request_cancel_breadcrumb(struct i915_request *rq)
 {
-       struct intel_breadcrumbs *b = rq->engine->breadcrumbs;
+       struct intel_context *ce = rq->context;
+       bool release;
 
-       /*
-        * We must wait for b->irq_lock so that we know the interrupt handler
-        * has released its reference to the intel_context and has completed
-        * the DMA_FENCE_FLAG_SIGNALED_BIT/I915_FENCE_FLAG_SIGNAL dance (if
-        * required).
-        */
-       spin_lock(&b->irq_lock);
-       if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
-               struct intel_context *ce = rq->context;
+       if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
+               return;
 
-               list_del(&rq->signal_link);
-               if (list_empty(&ce->signals))
-                       remove_signaling_context(b, ce);
+       spin_lock(&ce->signal_lock);
+       list_del_rcu(&rq->signal_link);
+       release = remove_signaling_context(rq->engine->breadcrumbs, ce);
+       spin_unlock(&ce->signal_lock);
+       if (release)
+               intel_context_put(ce);
 
-               clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
-               i915_request_put(rq);
-       }
-       spin_unlock(&b->irq_lock);
+       i915_request_put(rq);
 }
 
 static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
@@ -454,18 +476,17 @@ static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
 
        drm_printf(p, "Signals:\n");
 
-       spin_lock_irq(&b->irq_lock);
-       list_for_each_entry(ce, &b->signalers, signal_link) {
-               list_for_each_entry(rq, &ce->signals, signal_link) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
+               list_for_each_entry_rcu(rq, &ce->signals, signal_link)
                        drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
                                   rq->fence.context, rq->fence.seqno,
                                   i915_request_completed(rq) ? "!" :
                                   i915_request_started(rq) ? "*" :
                                   "",
                                   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
-               }
        }
-       spin_unlock_irq(&b->irq_lock);
+       rcu_read_unlock();
 }
 
 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
index 8e53b99..a74bb30 100644 (file)
  * the overhead of waking that client is much preferred.
  */
 struct intel_breadcrumbs {
-       spinlock_t irq_lock; /* protects the lists used in hardirq context */
-
        /* Not all breadcrumbs are attached to physical HW */
        struct intel_engine_cs *irq_engine;
 
+       spinlock_t signalers_lock; /* protects the list of signalers */
        struct list_head signalers;
-       struct list_head signaled_requests;
+       struct llist_head signaled_requests;
 
+       spinlock_t irq_lock; /* protects the interrupt from hardirq context */
        struct irq_work irq_work; /* for use from inside irq_lock */
-
        unsigned int irq_enabled;
-
        bool irq_armed;
 };
 
index 92a3f25..349e7fa 100644 (file)
@@ -25,11 +25,18 @@ static struct intel_context *intel_context_alloc(void)
        return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL);
 }
 
-void intel_context_free(struct intel_context *ce)
+static void rcu_context_free(struct rcu_head *rcu)
 {
+       struct intel_context *ce = container_of(rcu, typeof(*ce), rcu);
+
        kmem_cache_free(global.slab_ce, ce);
 }
 
+void intel_context_free(struct intel_context *ce)
+{
+       call_rcu(&ce->rcu, rcu_context_free);
+}
+
 struct intel_context *
 intel_context_create(struct intel_engine_cs *engine)
 {
@@ -356,8 +363,7 @@ static int __intel_context_active(struct i915_active *active)
 }
 
 void
-intel_context_init(struct intel_context *ce,
-                  struct intel_engine_cs *engine)
+intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
 {
        GEM_BUG_ON(!engine->cops);
        GEM_BUG_ON(!engine->gt->vm);
@@ -373,7 +379,8 @@ intel_context_init(struct intel_context *ce,
 
        ce->vm = i915_vm_get(engine->gt->vm);
 
-       INIT_LIST_HEAD(&ce->signal_link);
+       /* NB ce->signal_link/lock is used under RCU */
+       spin_lock_init(&ce->signal_lock);
        INIT_LIST_HEAD(&ce->signals);
 
        mutex_init(&ce->pin_mutex);
index 552cb57..52fa9c1 100644 (file)
@@ -25,6 +25,7 @@ DECLARE_EWMA(runtime, 3, 8);
 struct i915_gem_context;
 struct i915_gem_ww_ctx;
 struct i915_vma;
+struct intel_breadcrumbs;
 struct intel_context;
 struct intel_ring;
 
@@ -44,7 +45,16 @@ struct intel_context_ops {
 };
 
 struct intel_context {
-       struct kref ref;
+       /*
+        * Note: Some fields may be accessed under RCU.
+        *
+        * Unless otherwise noted a field can safely be assumed to be protected
+        * by strong reference counting.
+        */
+       union {
+               struct kref ref; /* no kref_get_unless_zero()! */
+               struct rcu_head rcu;
+       };
 
        struct intel_engine_cs *engine;
        struct intel_engine_cs *inflight;
@@ -54,8 +64,15 @@ struct intel_context {
        struct i915_address_space *vm;
        struct i915_gem_context __rcu *gem_context;
 
-       struct list_head signal_link;
-       struct list_head signals;
+       /*
+        * @signal_lock protects the list of requests that need signaling,
+        * @signals. While there are any requests that need signaling,
+        * we add the context to the breadcrumbs worker, and remove it
+        * upon completion/cancellation of the last request.
+        */
+       struct list_head signal_link; /* Accessed under RCU */
+       struct list_head signals; /* Guarded by signal_lock */
+       spinlock_t signal_lock; /* protects signals, the list of requests */
 
        struct i915_vma *state;
        struct intel_ring *ring;
index f82c6dd..724b2cb 100644 (file)
 struct virtual_engine {
        struct intel_engine_cs base;
        struct intel_context context;
+       struct rcu_work rcu;
 
        /*
         * We allow only a single request through the virtual engine at a time
@@ -2787,6 +2788,9 @@ static void __execlists_hold(struct i915_request *rq)
 static bool execlists_hold(struct intel_engine_cs *engine,
                           struct i915_request *rq)
 {
+       if (i915_request_on_hold(rq))
+               return false;
+
        spin_lock_irq(&engine->active.lock);
 
        if (i915_request_completed(rq)) { /* too late! */
@@ -3168,8 +3172,10 @@ static void execlists_submission_tasklet(unsigned long data)
                spin_unlock_irqrestore(&engine->active.lock, flags);
 
                /* Recheck after serialising with direct-submission */
-               if (unlikely(timeout && preempt_timeout(engine)))
+               if (unlikely(timeout && preempt_timeout(engine))) {
+                       cancel_timer(&engine->execlists.preempt);
                        execlists_reset(engine, "preemption time out");
+               }
        }
 }
 
@@ -5425,44 +5431,90 @@ static struct list_head *virtual_queue(struct virtual_engine *ve)
        return &ve->base.execlists.default_priolist.requests[0];
 }
 
-static void virtual_context_destroy(struct kref *kref)
+static void rcu_virtual_context_destroy(struct work_struct *wrk)
 {
        struct virtual_engine *ve =
-               container_of(kref, typeof(*ve), context.ref);
+               container_of(wrk, typeof(*ve), rcu.work);
        unsigned int n;
 
-       GEM_BUG_ON(!list_empty(virtual_queue(ve)));
-       GEM_BUG_ON(ve->request);
        GEM_BUG_ON(ve->context.inflight);
 
+       /* Preempt-to-busy may leave a stale request behind. */
+       if (unlikely(ve->request)) {
+               struct i915_request *old;
+
+               spin_lock_irq(&ve->base.active.lock);
+
+               old = fetch_and_zero(&ve->request);
+               if (old) {
+                       GEM_BUG_ON(!i915_request_completed(old));
+                       __i915_request_submit(old);
+                       i915_request_put(old);
+               }
+
+               spin_unlock_irq(&ve->base.active.lock);
+       }
+
+       /*
+        * Flush the tasklet in case it is still running on another core.
+        *
+        * This needs to be done before we remove ourselves from the siblings'
+        * rbtrees as in the case it is running in parallel, it may reinsert
+        * the rb_node into a sibling.
+        */
+       tasklet_kill(&ve->base.execlists.tasklet);
+
+       /* Decouple ourselves from the siblings, no more access allowed. */
        for (n = 0; n < ve->num_siblings; n++) {
                struct intel_engine_cs *sibling = ve->siblings[n];
                struct rb_node *node = &ve->nodes[sibling->id].rb;
-               unsigned long flags;
 
                if (RB_EMPTY_NODE(node))
                        continue;
 
-               spin_lock_irqsave(&sibling->active.lock, flags);
+               spin_lock_irq(&sibling->active.lock);
 
                /* Detachment is lazily performed in the execlists tasklet */
                if (!RB_EMPTY_NODE(node))
                        rb_erase_cached(node, &sibling->execlists.virtual);
 
-               spin_unlock_irqrestore(&sibling->active.lock, flags);
+               spin_unlock_irq(&sibling->active.lock);
        }
        GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
+       GEM_BUG_ON(!list_empty(virtual_queue(ve)));
 
        if (ve->context.state)
                __execlists_context_fini(&ve->context);
        intel_context_fini(&ve->context);
 
+       intel_breadcrumbs_free(ve->base.breadcrumbs);
        intel_engine_free_request_pool(&ve->base);
 
        kfree(ve->bonds);
        kfree(ve);
 }
 
+static void virtual_context_destroy(struct kref *kref)
+{
+       struct virtual_engine *ve =
+               container_of(kref, typeof(*ve), context.ref);
+
+       GEM_BUG_ON(!list_empty(&ve->context.signals));
+
+       /*
+        * When destroying the virtual engine, we have to be aware that
+        * it may still be in use from an hardirq/softirq context causing
+        * the resubmission of a completed request (background completion
+        * due to preempt-to-busy). Before we can free the engine, we need
+        * to flush the submission code and tasklets that are still potentially
+        * accessing the engine. Flushing the tasklets requires process context,
+        * and since we can guard the resubmit onto the engine with an RCU read
+        * lock, we can delegate the free of the engine to an RCU worker.
+        */
+       INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
+       queue_rcu_work(system_wq, &ve->rcu);
+}
+
 static void virtual_engine_initial_hint(struct virtual_engine *ve)
 {
        int swp;
index b8f56e6..413dadf 100644 (file)
@@ -59,8 +59,7 @@ struct drm_i915_mocs_table {
 #define _L3_CACHEABILITY(value)        ((value) << 4)
 
 /* Helper defines */
-#define GEN9_NUM_MOCS_ENTRIES  62  /* 62 out of 64 - 63 & 64 are reserved. */
-#define GEN11_NUM_MOCS_ENTRIES 64  /* 63-64 are reserved, but configured. */
+#define GEN9_NUM_MOCS_ENTRIES  64  /* 63-64 are reserved, but configured. */
 
 /* (e)LLC caching options */
 /*
@@ -131,7 +130,19 @@ static const struct drm_i915_mocs_entry skl_mocs_table[] = {
        GEN9_MOCS_ENTRIES,
        MOCS_ENTRY(I915_MOCS_CACHED,
                   LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
-                  L3_3_WB)
+                  L3_3_WB),
+
+       /*
+        * mocs:63
+        * - used by the L3 for all of its evictions.
+        *   Thus it is expected to allow LLC cacheability to enable coherent
+        *   flows to be maintained.
+        * - used to force L3 uncachable cycles.
+        *   Thus it is expected to make the surface L3 uncacheable.
+        */
+       MOCS_ENTRY(63,
+                  LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+                  L3_1_UC)
 };
 
 /* NOTE: the LE_TGT_CACHE is not used on Broxton */
@@ -243,8 +254,9 @@ static const struct drm_i915_mocs_entry tgl_mocs_table[] = {
         * only, __init_mocs_table() take care to program unused index with
         * this entry.
         */
-       MOCS_ENTRY(1, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
-                  L3_3_WB),
+       MOCS_ENTRY(I915_MOCS_PTE,
+                  LE_0_PAGETABLE | LE_TC_0_PAGETABLE,
+                  L3_1_UC),
        GEN11_MOCS_ENTRIES,
 
        /* Implicitly enable L1 - HDC:L1 + L3 + LLC */
@@ -315,11 +327,11 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
        if (INTEL_GEN(i915) >= 12) {
                table->size  = ARRAY_SIZE(tgl_mocs_table);
                table->table = tgl_mocs_table;
-               table->n_entries = GEN11_NUM_MOCS_ENTRIES;
+               table->n_entries = GEN9_NUM_MOCS_ENTRIES;
        } else if (IS_GEN(i915, 11)) {
                table->size  = ARRAY_SIZE(icl_mocs_table);
                table->table = icl_mocs_table;
-               table->n_entries = GEN11_NUM_MOCS_ENTRIES;
+               table->n_entries = GEN9_NUM_MOCS_ENTRIES;
        } else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) {
                table->size  = ARRAY_SIZE(skl_mocs_table);
                table->n_entries = GEN9_NUM_MOCS_ENTRIES;
index ab675d3..d7b8e44 100644 (file)
@@ -56,9 +56,12 @@ static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
 
 static void gen11_rc6_enable(struct intel_rc6 *rc6)
 {
-       struct intel_uncore *uncore = rc6_to_uncore(rc6);
+       struct intel_gt *gt = rc6_to_gt(rc6);
+       struct intel_uncore *uncore = gt->uncore;
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
+       u32 pg_enable;
+       int i;
 
        /* 2b: Program RC6 thresholds.*/
        set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
@@ -102,10 +105,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
                GEN6_RC_CTL_RC6_ENABLE |
                GEN6_RC_CTL_EI_MODE(1);
 
-       set(uncore, GEN9_PG_ENABLE,
-           GEN9_RENDER_PG_ENABLE |
-           GEN9_MEDIA_PG_ENABLE |
-           GEN11_MEDIA_SAMPLER_PG_ENABLE);
+       pg_enable =
+               GEN9_RENDER_PG_ENABLE |
+               GEN9_MEDIA_PG_ENABLE |
+               GEN11_MEDIA_SAMPLER_PG_ENABLE;
+
+       if (INTEL_GEN(gt->i915) >= 12) {
+               for (i = 0; i < I915_MAX_VCS; i++)
+                       if (HAS_ENGINE(gt, _VCS(i)))
+                               pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) |
+                                             VDN_MFX_POWERGATE_ENABLE(i));
+       }
+
+       set(uncore, GEN9_PG_ENABLE, pg_enable);
 }
 
 static void gen9_rc6_enable(struct intel_rc6 *rc6)
index e6a00ee..c1c9cc0 100644 (file)
@@ -883,6 +883,10 @@ void intel_rps_park(struct intel_rps *rps)
                adj = -2;
        rps->last_adj = adj;
        rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq);
+       if (rps->cur_freq < rps->efficient_freq) {
+               rps->cur_freq = rps->efficient_freq;
+               rps->last_adj = 0;
+       }
 
        GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
 }
index 6c580d0..4a3bde7 100644 (file)
@@ -131,8 +131,10 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
                        return;
                }
 
-               if (wal->list)
+               if (wal->list) {
                        memcpy(list, wal->list, sizeof(*wa) * wal->count);
+                       kfree(wal->list);
+               }
 
                wal->list = list;
        }
index f011ea4..5982b62 100644 (file)
@@ -73,7 +73,7 @@ void *shmem_pin_map(struct file *file)
        mapping_set_unevictable(file->f_mapping);
        return vaddr;
 err_page:
-       while (--i >= 0)
+       while (i--)
                put_page(pages[i]);
        kvfree(pages);
        return NULL;
@@ -103,10 +103,13 @@ static int __shmem_rw(struct file *file, loff_t off,
                        return PTR_ERR(page);
 
                vaddr = kmap(page);
-               if (write)
+               if (write) {
                        memcpy(vaddr + offset_in_page(off), ptr, this);
-               else
+                       set_page_dirty(page);
+               } else {
                        memcpy(ptr, vaddr + offset_in_page(off), this);
+               }
+               mark_page_accessed(page);
                kunmap(page);
                put_page(page);
 
index 7ba16dd..d7898e8 100644 (file)
@@ -164,7 +164,7 @@ static unsigned char virtual_dp_monitor_edid[GVT_EDID_NUM][EDID_SIZE] = {
 
 /* let the virtual display supports DP1.2 */
 static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = {
-       0x12, 0x014, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+       0x12, 0x014, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
 };
 
 static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
index 9831361..a81cf0f 100644 (file)
@@ -255,7 +255,7 @@ struct intel_gvt_mmio {
 #define F_CMD_ACCESS   (1 << 3)
 /* This reg has been accessed by a VM */
 #define F_ACCESSED     (1 << 4)
-/* This reg has been accessed through GPU commands */
+/* This reg could be accessed by unaligned address */
 #define F_UNALIGN      (1 << 6)
 /* This reg is in GVT's mmio save-restor list and in hardware
  * logical context image
index ad8a9df..778eb8c 100644 (file)
@@ -829,8 +829,10 @@ static int intel_vgpu_open(struct mdev_device *mdev)
        /* Take a module reference as mdev core doesn't take
         * a reference for vendor driver.
         */
-       if (!try_module_get(THIS_MODULE))
+       if (!try_module_get(THIS_MODULE)) {
+               ret = -ENODEV;
                goto undo_group;
+       }
 
        ret = kvmgt_guest_init(mdev);
        if (ret)
index f6d7e33..399582a 100644 (file)
@@ -439,7 +439,8 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 
        if (IS_BROADWELL(dev_priv))
                ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_B);
-       else
+       /* FixMe: Re-enable APL/BXT once vfio_edid enabled */
+       else if (!IS_BROXTON(dev_priv))
                ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D);
        if (ret)
                goto out_clean_sched_policy;
index e949769..3640d0e 100644 (file)
@@ -909,8 +909,13 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
                                       DRM_I915_PERF_RECORD_OA_REPORT_LOST);
                if (ret)
                        return ret;
-               intel_uncore_write(uncore, oastatus_reg,
-                                  oastatus & ~GEN8_OASTATUS_REPORT_LOST);
+
+               intel_uncore_rmw(uncore, oastatus_reg,
+                                GEN8_OASTATUS_COUNTER_OVERFLOW |
+                                GEN8_OASTATUS_REPORT_LOST,
+                                IS_GEN_RANGE(uncore->i915, 8, 10) ?
+                                (GEN8_OASTATUS_HEAD_POINTER_WRAP |
+                                 GEN8_OASTATUS_TAIL_POINTER_WRAP) : 0);
        }
 
        return gen8_append_oa_reports(stream, buf, count, offset);
index d805d4d..5cd83ea 100644 (file)
@@ -676,6 +676,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define  GEN7_OASTATUS2_MEM_SELECT_GGTT     (1 << 0) /* 0: PPGTT, 1: GGTT */
 
 #define GEN8_OASTATUS _MMIO(0x2b08)
+#define  GEN8_OASTATUS_TAIL_POINTER_WRAP    (1 << 17)
+#define  GEN8_OASTATUS_HEAD_POINTER_WRAP    (1 << 16)
 #define  GEN8_OASTATUS_OVERRUN_STATUS      (1 << 3)
 #define  GEN8_OASTATUS_COUNTER_OVERFLOW     (1 << 2)
 #define  GEN8_OASTATUS_OABUFFER_OVERFLOW    (1 << 1)
@@ -8971,10 +8973,6 @@ enum {
 #define   GEN9_PWRGT_MEDIA_STATUS_MASK         (1 << 0)
 #define   GEN9_PWRGT_RENDER_STATUS_MASK                (1 << 1)
 
-#define POWERGATE_ENABLE                       _MMIO(0xa210)
-#define    VDN_HCP_POWERGATE_ENABLE(n)         BIT(((n) * 2) + 3)
-#define    VDN_MFX_POWERGATE_ENABLE(n)         BIT(((n) * 2) + 4)
-
 #define  GTFIFODBG                             _MMIO(0x120000)
 #define    GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV   (0x1f << 20)
 #define    GT_FIFO_FREE_ENTRIES_CHV            (0x7f << 13)
@@ -9114,9 +9112,11 @@ enum {
 #define GEN9_MEDIA_PG_IDLE_HYSTERESIS          _MMIO(0xA0C4)
 #define GEN9_RENDER_PG_IDLE_HYSTERESIS         _MMIO(0xA0C8)
 #define GEN9_PG_ENABLE                         _MMIO(0xA210)
-#define GEN9_RENDER_PG_ENABLE                  REG_BIT(0)
-#define GEN9_MEDIA_PG_ENABLE                   REG_BIT(1)
-#define GEN11_MEDIA_SAMPLER_PG_ENABLE          REG_BIT(2)
+#define   GEN9_RENDER_PG_ENABLE                        REG_BIT(0)
+#define   GEN9_MEDIA_PG_ENABLE                 REG_BIT(1)
+#define   GEN11_MEDIA_SAMPLER_PG_ENABLE                REG_BIT(2)
+#define   VDN_HCP_POWERGATE_ENABLE(n)          REG_BIT(3 + 2 * (n))
+#define   VDN_MFX_POWERGATE_ENABLE(n)          REG_BIT(4 + 2 * (n))
 #define GEN8_PUSHBUS_CONTROL                   _MMIO(0xA248)
 #define GEN8_PUSHBUS_ENABLE                    _MMIO(0xA250)
 #define GEN8_PUSHBUS_SHIFT                     _MMIO(0xA25C)
index 16b7210..620b6fa 100644 (file)
@@ -176,7 +176,9 @@ struct i915_request {
        struct intel_context *context;
        struct intel_ring *ring;
        struct intel_timeline __rcu *timeline;
+
        struct list_head signal_link;
+       struct llist_node signal_node;
 
        /*
         * The rcu epoch of when this request was allocated. Used to judiciously
index 34e0d22..cfb8067 100644 (file)
@@ -7118,23 +7118,10 @@ static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
 
 static void tgl_init_clock_gating(struct drm_i915_private *dev_priv)
 {
-       u32 vd_pg_enable = 0;
-       unsigned int i;
-
        /* Wa_1409120013:tgl */
        I915_WRITE(ILK_DPFC_CHICKEN,
                   ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL);
 
-       /* This is not a WA. Enable VD HCP & MFX_ENC powergate */
-       for (i = 0; i < I915_MAX_VCS; i++) {
-               if (HAS_ENGINE(&dev_priv->gt, _VCS(i)))
-                       vd_pg_enable |= VDN_HCP_POWERGATE_ENABLE(i) |
-                                       VDN_MFX_POWERGATE_ENABLE(i);
-       }
-
-       I915_WRITE(POWERGATE_ENABLE,
-                  I915_READ(POWERGATE_ENABLE) | vd_pg_enable);
-
        /* Wa_1409825376:tgl (pre-prod)*/
        if (IS_TGL_DISP_REVID(dev_priv, TGL_REVID_A0, TGL_REVID_B1))
                I915_WRITE(GEN9_CLKGATE_DIS_3, I915_READ(GEN9_CLKGATE_DIS_3) |
index 23a6132..412e216 100644 (file)
@@ -211,8 +211,8 @@ static int igt_gem_ww_ctx(void *arg)
                return PTR_ERR(obj);
 
        obj2 = i915_gem_object_create_internal(i915, PAGE_SIZE);
-       if (IS_ERR(obj)) {
-               err = PTR_ERR(obj);
+       if (IS_ERR(obj2)) {
+               err = PTR_ERR(obj2);
                goto put1;
        }
 
index 64bbb82..e424a6d 100644 (file)
@@ -2293,8 +2293,10 @@ static int perf_request_latency(void *arg)
                struct intel_context *ce;
 
                ce = intel_context_create(engine);
-               if (IS_ERR(ce))
+               if (IS_ERR(ce)) {
+                       err = PTR_ERR(ce);
                        goto out;
+               }
 
                err = intel_context_pin(ce);
                if (err) {
@@ -2467,8 +2469,10 @@ static int perf_series_engines(void *arg)
                struct intel_context *ce;
 
                ce = intel_context_create(engine);
-               if (IS_ERR(ce))
+               if (IS_ERR(ce)) {
+                       err = PTR_ERR(ce);
                        goto out;
+               }
 
                err = intel_context_pin(ce);
                if (err) {
index cf11c48..52f11a6 100644 (file)
@@ -522,15 +522,6 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi,
        return 0;
 }
 
-static void mtk_dpi_encoder_destroy(struct drm_encoder *encoder)
-{
-       drm_encoder_cleanup(encoder);
-}
-
-static const struct drm_encoder_funcs mtk_dpi_encoder_funcs = {
-       .destroy = mtk_dpi_encoder_destroy,
-};
-
 static int mtk_dpi_bridge_attach(struct drm_bridge *bridge,
                                 enum drm_bridge_attach_flags flags)
 {
index 4a188a9..65fd99c 100644 (file)
@@ -444,7 +444,10 @@ static void mtk_dsi_config_vdo_timing(struct mtk_dsi *dsi)
        u32 horizontal_sync_active_byte;
        u32 horizontal_backporch_byte;
        u32 horizontal_frontporch_byte;
+       u32 horizontal_front_back_byte;
+       u32 data_phy_cycles_byte;
        u32 dsi_tmp_buf_bpp, data_phy_cycles;
+       u32 delta;
        struct mtk_phy_timing *timing = &dsi->phy_timing;
 
        struct videomode *vm = &dsi->vm;
@@ -466,50 +469,30 @@ static void mtk_dsi_config_vdo_timing(struct mtk_dsi *dsi)
        horizontal_sync_active_byte = (vm->hsync_len * dsi_tmp_buf_bpp - 10);
 
        if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE)
-               horizontal_backporch_byte = vm->hback_porch * dsi_tmp_buf_bpp;
+               horizontal_backporch_byte = vm->hback_porch * dsi_tmp_buf_bpp - 10;
        else
                horizontal_backporch_byte = (vm->hback_porch + vm->hsync_len) *
-                                           dsi_tmp_buf_bpp;
+                                           dsi_tmp_buf_bpp - 10;
 
        data_phy_cycles = timing->lpx + timing->da_hs_prepare +
-                         timing->da_hs_zero + timing->da_hs_exit;
-
-       if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) {
-               if ((vm->hfront_porch + vm->hback_porch) * dsi_tmp_buf_bpp >
-                   data_phy_cycles * dsi->lanes + 18) {
-                       horizontal_frontporch_byte =
-                               vm->hfront_porch * dsi_tmp_buf_bpp -
-                               (data_phy_cycles * dsi->lanes + 18) *
-                               vm->hfront_porch /
-                               (vm->hfront_porch + vm->hback_porch);
-
-                       horizontal_backporch_byte =
-                               horizontal_backporch_byte -
-                               (data_phy_cycles * dsi->lanes + 18) *
-                               vm->hback_porch /
-                               (vm->hfront_porch + vm->hback_porch);
-               } else {
-                       DRM_WARN("HFP less than d-phy, FPS will under 60Hz\n");
-                       horizontal_frontporch_byte = vm->hfront_porch *
-                                                    dsi_tmp_buf_bpp;
-               }
+                         timing->da_hs_zero + timing->da_hs_exit + 3;
+
+       delta = dsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST ? 18 : 12;
+
+       horizontal_frontporch_byte = vm->hfront_porch * dsi_tmp_buf_bpp;
+       horizontal_front_back_byte = horizontal_frontporch_byte + horizontal_backporch_byte;
+       data_phy_cycles_byte = data_phy_cycles * dsi->lanes + delta;
+
+       if (horizontal_front_back_byte > data_phy_cycles_byte) {
+               horizontal_frontporch_byte -= data_phy_cycles_byte *
+                                             horizontal_frontporch_byte /
+                                             horizontal_front_back_byte;
+
+               horizontal_backporch_byte -= data_phy_cycles_byte *
+                                            horizontal_backporch_byte /
+                                            horizontal_front_back_byte;
        } else {
-               if ((vm->hfront_porch + vm->hback_porch) * dsi_tmp_buf_bpp >
-                   data_phy_cycles * dsi->lanes + 12) {
-                       horizontal_frontporch_byte =
-                               vm->hfront_porch * dsi_tmp_buf_bpp -
-                               (data_phy_cycles * dsi->lanes + 12) *
-                               vm->hfront_porch /
-                               (vm->hfront_porch + vm->hback_porch);
-                       horizontal_backporch_byte = horizontal_backporch_byte -
-                               (data_phy_cycles * dsi->lanes + 12) *
-                               vm->hback_porch /
-                               (vm->hfront_porch + vm->hback_porch);
-               } else {
-                       DRM_WARN("HFP less than d-phy, FPS will under 60Hz\n");
-                       horizontal_frontporch_byte = vm->hfront_porch *
-                                                    dsi_tmp_buf_bpp;
-               }
+               DRM_WARN("HFP + HBP less than d-phy, FPS will under 60Hz\n");
        }
 
        writel(horizontal_sync_active_byte, dsi->regs + DSI_HSA_WC);
index b721b8b..9e1224d 100644 (file)
@@ -22,6 +22,7 @@
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_plane.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_vblank.h>
@@ -484,17 +485,27 @@ static void mxsfb_plane_overlay_atomic_update(struct drm_plane *plane,
        writel(ctrl, mxsfb->base + LCDC_AS_CTRL);
 }
 
+static bool mxsfb_format_mod_supported(struct drm_plane *plane,
+                                      uint32_t format,
+                                      uint64_t modifier)
+{
+       return modifier == DRM_FORMAT_MOD_LINEAR;
+}
+
 static const struct drm_plane_helper_funcs mxsfb_plane_primary_helper_funcs = {
+       .prepare_fb = drm_gem_fb_prepare_fb,
        .atomic_check = mxsfb_plane_atomic_check,
        .atomic_update = mxsfb_plane_primary_atomic_update,
 };
 
 static const struct drm_plane_helper_funcs mxsfb_plane_overlay_helper_funcs = {
+       .prepare_fb = drm_gem_fb_prepare_fb,
        .atomic_check = mxsfb_plane_atomic_check,
        .atomic_update = mxsfb_plane_overlay_atomic_update,
 };
 
 static const struct drm_plane_funcs mxsfb_plane_funcs = {
+       .format_mod_supported   = mxsfb_format_mod_supported,
        .update_plane           = drm_atomic_helper_update_plane,
        .disable_plane          = drm_atomic_helper_disable_plane,
        .destroy                = drm_plane_cleanup,
index 56b335a..7daa12e 100644 (file)
@@ -1214,8 +1214,8 @@ retry:
                        }
 
                        reg->bus.offset = handle;
-                       ret = 0;
                }
+               ret = 0;
                break;
        default:
                ret = -EINVAL;
index 549bc67..c205138 100644 (file)
@@ -558,8 +558,10 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
                        NV_PRINTK(err, cli, "validating bo list\n");
                validate_fini(op, chan, NULL, NULL);
                return ret;
+       } else if (ret > 0) {
+               *apply_relocs = true;
        }
-       *apply_relocs = ret;
+
        return 0;
 }
 
@@ -662,7 +664,6 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
                nouveau_bo_wr32(nvbo, r->reloc_bo_offset >> 2, data);
        }
 
-       u_free(reloc);
        return ret;
 }
 
@@ -872,9 +873,10 @@ out:
                                break;
                        }
                }
-               u_free(reloc);
        }
 out_prevalid:
+       if (!IS_ERR(reloc))
+               u_free(reloc);
        u_free(bo);
        u_free(push);
 
index 033fd30..282e4c8 100644 (file)
@@ -195,8 +195,7 @@ static void sdi_bridge_mode_set(struct drm_bridge *bridge,
        sdi->pixelclock = adjusted_mode->clock * 1000;
 }
 
-static void sdi_bridge_enable(struct drm_bridge *bridge,
-                             struct drm_bridge_state *bridge_state)
+static void sdi_bridge_enable(struct drm_bridge *bridge)
 {
        struct sdi_device *sdi = drm_bridge_to_sdi(bridge);
        struct dispc_clock_info dispc_cinfo;
@@ -259,8 +258,7 @@ err_get_dispc:
        regulator_disable(sdi->vdds_sdi_reg);
 }
 
-static void sdi_bridge_disable(struct drm_bridge *bridge,
-                              struct drm_bridge_state *bridge_state)
+static void sdi_bridge_disable(struct drm_bridge *bridge)
 {
        struct sdi_device *sdi = drm_bridge_to_sdi(bridge);
 
@@ -278,8 +276,8 @@ static const struct drm_bridge_funcs sdi_bridge_funcs = {
        .mode_valid = sdi_bridge_mode_valid,
        .mode_fixup = sdi_bridge_mode_fixup,
        .mode_set = sdi_bridge_mode_set,
-       .atomic_enable = sdi_bridge_enable,
-       .atomic_disable = sdi_bridge_disable,
+       .enable = sdi_bridge_enable,
+       .disable = sdi_bridge_disable,
 };
 
 static void sdi_bridge_init(struct sdi_device *sdi)
index e95fdfb..ba0b3ea 100644 (file)
@@ -629,7 +629,7 @@ static int acx565akm_probe(struct spi_device *spi)
        lcd->spi = spi;
        mutex_init(&lcd->mutex);
 
-       lcd->reset_gpio = devm_gpiod_get(&spi->dev, "reset", GPIOD_OUT_LOW);
+       lcd->reset_gpio = devm_gpiod_get(&spi->dev, "reset", GPIOD_OUT_HIGH);
        if (IS_ERR(lcd->reset_gpio)) {
                dev_err(&spi->dev, "failed to get reset GPIO\n");
                return PTR_ERR(lcd->reset_gpio);
index f292c6a..41edd0a 100644 (file)
@@ -544,7 +544,7 @@ static int rockchip_lvds_bind(struct device *dev, struct device *master,
        struct device_node  *port, *endpoint;
        int ret = 0, child_count = 0;
        const char *name;
-       u32 endpoint_id;
+       u32 endpoint_id = 0;
 
        lvds->drm_dev = drm_dev;
        port = of_graph_get_port_by_id(dev->of_node, 1);
index 77497b4..55960cb 100644 (file)
@@ -814,9 +814,15 @@ static int sun4i_backend_bind(struct device *dev, struct device *master,
                 *
                 * XXX(hch): this has no business in a driver and needs to move
                 * to the device tree.
+                *
+                * If we have two subsequent calls to dma_direct_set_offset
+                * returns -EINVAL. Unfortunately, this happens when we have two
+                * backends in the system, and will result in the driver
+                * reporting an error while it has been setup properly before.
+                * Ignore EINVAL, but it should really be removed eventually.
                 */
                ret = dma_direct_set_offset(drm->dev, PHYS_OFFSET, 0, SZ_4G);
-               if (ret)
+               if (ret && ret != -EINVAL)
                        return ret;
        }
 
index d4c0804..92add2c 100644 (file)
@@ -208,6 +208,7 @@ static int sun8i_dw_hdmi_bind(struct device *dev, struct device *master,
        phy_node = of_parse_phandle(dev->of_node, "phys", 0);
        if (!phy_node) {
                dev_err(dev, "Can't found PHY phandle\n");
+               ret = -EINVAL;
                goto err_disable_clk_tmds;
        }
 
index ba9d1c3..e4baf07 100644 (file)
@@ -90,7 +90,7 @@ static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp)
        if (!fpriv)
                return -ENOMEM;
 
-       idr_init(&fpriv->contexts);
+       idr_init_base(&fpriv->contexts, 1);
        mutex_init(&fpriv->lock);
        filp->driver_priv = fpriv;
 
index 5a4fd0d..47d26b5 100644 (file)
@@ -129,7 +129,6 @@ int tegra_output_probe(struct tegra_output *output)
 
                if (!output->ddc) {
                        err = -EPROBE_DEFER;
-                       of_node_put(ddc);
                        return err;
                }
        }
index e88a17c..cc2aa23 100644 (file)
@@ -397,7 +397,6 @@ struct tegra_sor;
 struct tegra_sor_ops {
        const char *name;
        int (*probe)(struct tegra_sor *sor);
-       int (*remove)(struct tegra_sor *sor);
        void (*audio_enable)(struct tegra_sor *sor);
        void (*audio_disable)(struct tegra_sor *sor);
 };
@@ -2942,6 +2941,24 @@ static const struct drm_encoder_helper_funcs tegra_sor_dp_helpers = {
        .atomic_check = tegra_sor_encoder_atomic_check,
 };
 
+static void tegra_sor_disable_regulator(void *data)
+{
+       struct regulator *reg = data;
+
+       regulator_disable(reg);
+}
+
+static int tegra_sor_enable_regulator(struct tegra_sor *sor, struct regulator *reg)
+{
+       int err;
+
+       err = regulator_enable(reg);
+       if (err)
+               return err;
+
+       return devm_add_action_or_reset(sor->dev, tegra_sor_disable_regulator, reg);
+}
+
 static int tegra_sor_hdmi_probe(struct tegra_sor *sor)
 {
        int err;
@@ -2953,7 +2970,7 @@ static int tegra_sor_hdmi_probe(struct tegra_sor *sor)
                return PTR_ERR(sor->avdd_io_supply);
        }
 
-       err = regulator_enable(sor->avdd_io_supply);
+       err = tegra_sor_enable_regulator(sor, sor->avdd_io_supply);
        if (err < 0) {
                dev_err(sor->dev, "failed to enable AVDD I/O supply: %d\n",
                        err);
@@ -2967,7 +2984,7 @@ static int tegra_sor_hdmi_probe(struct tegra_sor *sor)
                return PTR_ERR(sor->vdd_pll_supply);
        }
 
-       err = regulator_enable(sor->vdd_pll_supply);
+       err = tegra_sor_enable_regulator(sor, sor->vdd_pll_supply);
        if (err < 0) {
                dev_err(sor->dev, "failed to enable VDD PLL supply: %d\n",
                        err);
@@ -2981,7 +2998,7 @@ static int tegra_sor_hdmi_probe(struct tegra_sor *sor)
                return PTR_ERR(sor->hdmi_supply);
        }
 
-       err = regulator_enable(sor->hdmi_supply);
+       err = tegra_sor_enable_regulator(sor, sor->hdmi_supply);
        if (err < 0) {
                dev_err(sor->dev, "failed to enable HDMI supply: %d\n", err);
                return err;
@@ -2992,19 +3009,9 @@ static int tegra_sor_hdmi_probe(struct tegra_sor *sor)
        return 0;
 }
 
-static int tegra_sor_hdmi_remove(struct tegra_sor *sor)
-{
-       regulator_disable(sor->hdmi_supply);
-       regulator_disable(sor->vdd_pll_supply);
-       regulator_disable(sor->avdd_io_supply);
-
-       return 0;
-}
-
 static const struct tegra_sor_ops tegra_sor_hdmi_ops = {
        .name = "HDMI",
        .probe = tegra_sor_hdmi_probe,
-       .remove = tegra_sor_hdmi_remove,
        .audio_enable = tegra_sor_hdmi_audio_enable,
        .audio_disable = tegra_sor_hdmi_audio_disable,
 };
@@ -3017,7 +3024,7 @@ static int tegra_sor_dp_probe(struct tegra_sor *sor)
        if (IS_ERR(sor->avdd_io_supply))
                return PTR_ERR(sor->avdd_io_supply);
 
-       err = regulator_enable(sor->avdd_io_supply);
+       err = tegra_sor_enable_regulator(sor, sor->avdd_io_supply);
        if (err < 0)
                return err;
 
@@ -3025,25 +3032,16 @@ static int tegra_sor_dp_probe(struct tegra_sor *sor)
        if (IS_ERR(sor->vdd_pll_supply))
                return PTR_ERR(sor->vdd_pll_supply);
 
-       err = regulator_enable(sor->vdd_pll_supply);
+       err = tegra_sor_enable_regulator(sor, sor->vdd_pll_supply);
        if (err < 0)
                return err;
 
        return 0;
 }
 
-static int tegra_sor_dp_remove(struct tegra_sor *sor)
-{
-       regulator_disable(sor->vdd_pll_supply);
-       regulator_disable(sor->avdd_io_supply);
-
-       return 0;
-}
-
 static const struct tegra_sor_ops tegra_sor_dp_ops = {
        .name = "DP",
        .probe = tegra_sor_dp_probe,
-       .remove = tegra_sor_dp_remove,
 };
 
 static int tegra_sor_init(struct host1x_client *client)
@@ -3145,6 +3143,7 @@ static int tegra_sor_init(struct host1x_client *client)
                if (err < 0) {
                        dev_err(sor->dev, "failed to deassert SOR reset: %d\n",
                                err);
+                       clk_disable_unprepare(sor->clk);
                        return err;
                }
 
@@ -3152,12 +3151,17 @@ static int tegra_sor_init(struct host1x_client *client)
        }
 
        err = clk_prepare_enable(sor->clk_safe);
-       if (err < 0)
+       if (err < 0) {
+               clk_disable_unprepare(sor->clk);
                return err;
+       }
 
        err = clk_prepare_enable(sor->clk_dp);
-       if (err < 0)
+       if (err < 0) {
+               clk_disable_unprepare(sor->clk_safe);
+               clk_disable_unprepare(sor->clk);
                return err;
+       }
 
        return 0;
 }
@@ -3764,17 +3768,16 @@ static int tegra_sor_probe(struct platform_device *pdev)
                return err;
 
        err = tegra_output_probe(&sor->output);
-       if (err < 0) {
-               dev_err(&pdev->dev, "failed to probe output: %d\n", err);
-               return err;
-       }
+       if (err < 0)
+               return dev_err_probe(&pdev->dev, err,
+                                    "failed to probe output\n");
 
        if (sor->ops && sor->ops->probe) {
                err = sor->ops->probe(sor);
                if (err < 0) {
                        dev_err(&pdev->dev, "failed to probe %s: %d\n",
                                sor->ops->name, err);
-                       goto output;
+                       goto remove;
                }
        }
 
@@ -3955,9 +3958,6 @@ unregister:
 rpm_disable:
        pm_runtime_disable(&pdev->dev);
 remove:
-       if (sor->ops && sor->ops->remove)
-               sor->ops->remove(sor);
-output:
        tegra_output_remove(&sor->output);
        return err;
 }
@@ -3976,12 +3976,6 @@ static int tegra_sor_remove(struct platform_device *pdev)
 
        pm_runtime_disable(&pdev->dev);
 
-       if (sor->ops && sor->ops->remove) {
-               err = sor->ops->remove(sor);
-               if (err < 0)
-                       dev_err(&pdev->dev, "failed to remove SOR: %d\n", err);
-       }
-
        tegra_output_remove(&sor->output);
 
        return 0;
index 19b75be..c5f2944 100644 (file)
@@ -219,6 +219,7 @@ struct vc4_dev {
 
        struct drm_modeset_lock ctm_state_lock;
        struct drm_private_obj ctm_manager;
+       struct drm_private_obj hvs_channels;
        struct drm_private_obj load_tracker;
 
        /* List of vc4_debugfs_info_entry for adding to debugfs once
@@ -531,6 +532,9 @@ struct vc4_crtc_state {
                unsigned int top;
                unsigned int bottom;
        } margins;
+
+       /* Transitional state below, only valid during atomic commits */
+       bool update_muxing;
 };
 
 #define VC4_HVS_CHANNEL_DISABLED ((unsigned int)-1)
index 95779d5..afc178b 100644 (file)
@@ -760,12 +760,54 @@ static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder)
 {
 }
 
+#define WIFI_2_4GHz_CH1_MIN_FREQ       2400000000ULL
+#define WIFI_2_4GHz_CH1_MAX_FREQ       2422000000ULL
+
+static int vc4_hdmi_encoder_atomic_check(struct drm_encoder *encoder,
+                                        struct drm_crtc_state *crtc_state,
+                                        struct drm_connector_state *conn_state)
+{
+       struct drm_display_mode *mode = &crtc_state->adjusted_mode;
+       struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder);
+       unsigned long long pixel_rate = mode->clock * 1000;
+       unsigned long long tmds_rate;
+
+       if (vc4_hdmi->variant->unsupported_odd_h_timings &&
+           ((mode->hdisplay % 2) || (mode->hsync_start % 2) ||
+            (mode->hsync_end % 2) || (mode->htotal % 2)))
+               return -EINVAL;
+
+       /*
+        * The 1440p@60 pixel rate is in the same range than the first
+        * WiFi channel (between 2.4GHz and 2.422GHz with 22MHz
+        * bandwidth). Slightly lower the frequency to bring it out of
+        * the WiFi range.
+        */
+       tmds_rate = pixel_rate * 10;
+       if (vc4_hdmi->disable_wifi_frequencies &&
+           (tmds_rate >= WIFI_2_4GHz_CH1_MIN_FREQ &&
+            tmds_rate <= WIFI_2_4GHz_CH1_MAX_FREQ)) {
+               mode->clock = 238560;
+               pixel_rate = mode->clock * 1000;
+       }
+
+       if (pixel_rate > vc4_hdmi->variant->max_pixel_clock)
+               return -EINVAL;
+
+       return 0;
+}
+
 static enum drm_mode_status
 vc4_hdmi_encoder_mode_valid(struct drm_encoder *encoder,
                            const struct drm_display_mode *mode)
 {
        struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder);
 
+       if (vc4_hdmi->variant->unsupported_odd_h_timings &&
+           ((mode->hdisplay % 2) || (mode->hsync_start % 2) ||
+            (mode->hsync_end % 2) || (mode->htotal % 2)))
+               return MODE_H_ILLEGAL;
+
        if ((mode->clock * 1000) > vc4_hdmi->variant->max_pixel_clock)
                return MODE_CLOCK_HIGH;
 
@@ -773,6 +815,7 @@ vc4_hdmi_encoder_mode_valid(struct drm_encoder *encoder,
 }
 
 static const struct drm_encoder_helper_funcs vc4_hdmi_encoder_helper_funcs = {
+       .atomic_check = vc4_hdmi_encoder_atomic_check,
        .mode_valid = vc4_hdmi_encoder_mode_valid,
        .disable = vc4_hdmi_encoder_disable,
        .enable = vc4_hdmi_encoder_enable,
@@ -1694,6 +1737,9 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
                vc4_hdmi->hpd_active_low = hpd_gpio_flags & OF_GPIO_ACTIVE_LOW;
        }
 
+       vc4_hdmi->disable_wifi_frequencies =
+               of_property_read_bool(dev->of_node, "wifi-2.4ghz-coexistence");
+
        pm_runtime_enable(dev);
 
        drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS);
@@ -1817,6 +1863,7 @@ static const struct vc4_hdmi_variant bcm2711_hdmi0_variant = {
                PHY_LANE_2,
                PHY_LANE_CK,
        },
+       .unsupported_odd_h_timings      = true,
 
        .init_resources         = vc5_hdmi_init_resources,
        .csc_setup              = vc5_hdmi_csc_setup,
@@ -1842,6 +1889,7 @@ static const struct vc4_hdmi_variant bcm2711_hdmi1_variant = {
                PHY_LANE_CK,
                PHY_LANE_2,
        },
+       .unsupported_odd_h_timings      = true,
 
        .init_resources         = vc5_hdmi_init_resources,
        .csc_setup              = vc5_hdmi_csc_setup,
index 63c6f8b..0526a9c 100644 (file)
@@ -62,6 +62,9 @@ struct vc4_hdmi_variant {
         */
        enum vc4_hdmi_phy_channel phy_lane_mapping[4];
 
+       /* The BCM2711 cannot deal with odd horizontal pixel timings */
+       bool unsupported_odd_h_timings;
+
        /* Callback to get the resources (memory region, interrupts,
         * clocks, etc) for that variant.
         */
@@ -139,6 +142,14 @@ struct vc4_hdmi {
        int hpd_gpio;
        bool hpd_active_low;
 
+       /*
+        * On some systems (like the RPi4), some modes are in the same
+        * frequency range than the WiFi channels (1440p@60Hz for
+        * example). Should we take evasive actions because that system
+        * has a wifi adapter?
+        */
+       bool disable_wifi_frequencies;
+
        struct cec_adapter *cec_adap;
        struct cec_msg cec_rx_msg;
        bool cec_tx_ok;
index 2b951ca..ba310c0 100644 (file)
@@ -24,6 +24,8 @@
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
+#define HVS_NUM_CHANNELS 3
+
 struct vc4_ctm_state {
        struct drm_private_state base;
        struct drm_color_ctm *ctm;
@@ -35,6 +37,17 @@ static struct vc4_ctm_state *to_vc4_ctm_state(struct drm_private_state *priv)
        return container_of(priv, struct vc4_ctm_state, base);
 }
 
+struct vc4_hvs_state {
+       struct drm_private_state base;
+       unsigned int unassigned_channels;
+};
+
+static struct vc4_hvs_state *
+to_vc4_hvs_state(struct drm_private_state *priv)
+{
+       return container_of(priv, struct vc4_hvs_state, base);
+}
+
 struct vc4_load_tracker_state {
        struct drm_private_state base;
        u64 hvs_load;
@@ -113,7 +126,7 @@ static int vc4_ctm_obj_init(struct vc4_dev *vc4)
        drm_atomic_private_obj_init(&vc4->base, &vc4->ctm_manager, &ctm_state->base,
                                    &vc4_ctm_state_funcs);
 
-       return drmm_add_action(&vc4->base, vc4_ctm_obj_fini, NULL);
+       return drmm_add_action_or_reset(&vc4->base, vc4_ctm_obj_fini, NULL);
 }
 
 /* Converts a DRM S31.32 value to the HW S0.9 format. */
@@ -169,6 +182,19 @@ vc4_ctm_commit(struct vc4_dev *vc4, struct drm_atomic_state *state)
                  VC4_SET_FIELD(ctm_state->fifo, SCALER_OLEDOFFS_DISPFIFO));
 }
 
+static struct vc4_hvs_state *
+vc4_hvs_get_global_state(struct drm_atomic_state *state)
+{
+       struct vc4_dev *vc4 = to_vc4_dev(state->dev);
+       struct drm_private_state *priv_state;
+
+       priv_state = drm_atomic_get_private_obj_state(state, &vc4->hvs_channels);
+       if (IS_ERR(priv_state))
+               return ERR_CAST(priv_state);
+
+       return to_vc4_hvs_state(priv_state);
+}
+
 static void vc4_hvs_pv_muxing_commit(struct vc4_dev *vc4,
                                     struct drm_atomic_state *state)
 {
@@ -213,10 +239,7 @@ static void vc5_hvs_pv_muxing_commit(struct vc4_dev *vc4,
 {
        struct drm_crtc_state *crtc_state;
        struct drm_crtc *crtc;
-       unsigned char dsp2_mux = 0;
-       unsigned char dsp3_mux = 3;
-       unsigned char dsp4_mux = 3;
-       unsigned char dsp5_mux = 3;
+       unsigned char mux;
        unsigned int i;
        u32 reg;
 
@@ -224,50 +247,59 @@ static void vc5_hvs_pv_muxing_commit(struct vc4_dev *vc4,
                struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state);
                struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
 
-               if (!crtc_state->active)
+               if (!vc4_state->update_muxing)
                        continue;
 
                switch (vc4_crtc->data->hvs_output) {
                case 2:
-                       dsp2_mux = (vc4_state->assigned_channel == 2) ? 0 : 1;
+                       mux = (vc4_state->assigned_channel == 2) ? 0 : 1;
+                       reg = HVS_READ(SCALER_DISPECTRL);
+                       HVS_WRITE(SCALER_DISPECTRL,
+                                 (reg & ~SCALER_DISPECTRL_DSP2_MUX_MASK) |
+                                 VC4_SET_FIELD(mux, SCALER_DISPECTRL_DSP2_MUX));
                        break;
 
                case 3:
-                       dsp3_mux = vc4_state->assigned_channel;
+                       if (vc4_state->assigned_channel == VC4_HVS_CHANNEL_DISABLED)
+                               mux = 3;
+                       else
+                               mux = vc4_state->assigned_channel;
+
+                       reg = HVS_READ(SCALER_DISPCTRL);
+                       HVS_WRITE(SCALER_DISPCTRL,
+                                 (reg & ~SCALER_DISPCTRL_DSP3_MUX_MASK) |
+                                 VC4_SET_FIELD(mux, SCALER_DISPCTRL_DSP3_MUX));
                        break;
 
                case 4:
-                       dsp4_mux = vc4_state->assigned_channel;
+                       if (vc4_state->assigned_channel == VC4_HVS_CHANNEL_DISABLED)
+                               mux = 3;
+                       else
+                               mux = vc4_state->assigned_channel;
+
+                       reg = HVS_READ(SCALER_DISPEOLN);
+                       HVS_WRITE(SCALER_DISPEOLN,
+                                 (reg & ~SCALER_DISPEOLN_DSP4_MUX_MASK) |
+                                 VC4_SET_FIELD(mux, SCALER_DISPEOLN_DSP4_MUX));
+
                        break;
 
                case 5:
-                       dsp5_mux = vc4_state->assigned_channel;
+                       if (vc4_state->assigned_channel == VC4_HVS_CHANNEL_DISABLED)
+                               mux = 3;
+                       else
+                               mux = vc4_state->assigned_channel;
+
+                       reg = HVS_READ(SCALER_DISPDITHER);
+                       HVS_WRITE(SCALER_DISPDITHER,
+                                 (reg & ~SCALER_DISPDITHER_DSP5_MUX_MASK) |
+                                 VC4_SET_FIELD(mux, SCALER_DISPDITHER_DSP5_MUX));
                        break;
 
                default:
                        break;
                }
        }
-
-       reg = HVS_READ(SCALER_DISPECTRL);
-       HVS_WRITE(SCALER_DISPECTRL,
-                 (reg & ~SCALER_DISPECTRL_DSP2_MUX_MASK) |
-                 VC4_SET_FIELD(dsp2_mux, SCALER_DISPECTRL_DSP2_MUX));
-
-       reg = HVS_READ(SCALER_DISPCTRL);
-       HVS_WRITE(SCALER_DISPCTRL,
-                 (reg & ~SCALER_DISPCTRL_DSP3_MUX_MASK) |
-                 VC4_SET_FIELD(dsp3_mux, SCALER_DISPCTRL_DSP3_MUX));
-
-       reg = HVS_READ(SCALER_DISPEOLN);
-       HVS_WRITE(SCALER_DISPEOLN,
-                 (reg & ~SCALER_DISPEOLN_DSP4_MUX_MASK) |
-                 VC4_SET_FIELD(dsp4_mux, SCALER_DISPEOLN_DSP4_MUX));
-
-       reg = HVS_READ(SCALER_DISPDITHER);
-       HVS_WRITE(SCALER_DISPDITHER,
-                 (reg & ~SCALER_DISPDITHER_DSP5_MUX_MASK) |
-                 VC4_SET_FIELD(dsp5_mux, SCALER_DISPDITHER_DSP5_MUX));
 }
 
 static void
@@ -657,53 +689,123 @@ static int vc4_load_tracker_obj_init(struct vc4_dev *vc4)
                                    &load_state->base,
                                    &vc4_load_tracker_state_funcs);
 
-       return drmm_add_action(&vc4->base, vc4_load_tracker_obj_fini, NULL);
+       return drmm_add_action_or_reset(&vc4->base, vc4_load_tracker_obj_fini, NULL);
 }
 
-#define NUM_OUTPUTS  6
-#define NUM_CHANNELS 3
+static struct drm_private_state *
+vc4_hvs_channels_duplicate_state(struct drm_private_obj *obj)
+{
+       struct vc4_hvs_state *old_state = to_vc4_hvs_state(obj->state);
+       struct vc4_hvs_state *state;
+
+       state = kzalloc(sizeof(*state), GFP_KERNEL);
+       if (!state)
+               return NULL;
 
-static int
-vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
+       __drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
+
+       state->unassigned_channels = old_state->unassigned_channels;
+
+       return &state->base;
+}
+
+static void vc4_hvs_channels_destroy_state(struct drm_private_obj *obj,
+                                          struct drm_private_state *state)
 {
-       unsigned long unassigned_channels = GENMASK(NUM_CHANNELS - 1, 0);
-       struct drm_crtc_state *old_crtc_state, *new_crtc_state;
-       struct drm_crtc *crtc;
-       int i, ret;
+       struct vc4_hvs_state *hvs_state = to_vc4_hvs_state(state);
 
-       /*
-        * Since the HVS FIFOs are shared across all the pixelvalves and
-        * the TXP (and thus all the CRTCs), we need to pull the current
-        * state of all the enabled CRTCs so that an update to a single
-        * CRTC still keeps the previous FIFOs enabled and assigned to
-        * the same CRTCs, instead of evaluating only the CRTC being
-        * modified.
-        */
-       list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-               struct drm_crtc_state *crtc_state;
+       kfree(hvs_state);
+}
 
-               if (!crtc->state->enable)
-                       continue;
+static const struct drm_private_state_funcs vc4_hvs_state_funcs = {
+       .atomic_duplicate_state = vc4_hvs_channels_duplicate_state,
+       .atomic_destroy_state = vc4_hvs_channels_destroy_state,
+};
 
-               crtc_state = drm_atomic_get_crtc_state(state, crtc);
-               if (IS_ERR(crtc_state))
-                       return PTR_ERR(crtc_state);
-       }
+static void vc4_hvs_channels_obj_fini(struct drm_device *dev, void *unused)
+{
+       struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+       drm_atomic_private_obj_fini(&vc4->hvs_channels);
+}
+
+static int vc4_hvs_channels_obj_init(struct vc4_dev *vc4)
+{
+       struct vc4_hvs_state *state;
+
+       state = kzalloc(sizeof(*state), GFP_KERNEL);
+       if (!state)
+               return -ENOMEM;
+
+       state->unassigned_channels = GENMASK(HVS_NUM_CHANNELS - 1, 0);
+       drm_atomic_private_obj_init(&vc4->base, &vc4->hvs_channels,
+                                   &state->base,
+                                   &vc4_hvs_state_funcs);
+
+       return drmm_add_action_or_reset(&vc4->base, vc4_hvs_channels_obj_fini, NULL);
+}
+
+/*
+ * The BCM2711 HVS has up to 7 outputs connected to the pixelvalves and
+ * the TXP (and therefore all the CRTCs found on that platform).
+ *
+ * The naive (and our initial) implementation would just iterate over
+ * all the active CRTCs, try to find a suitable FIFO, and then remove it
+ * from the pool of available FIFOs. However, there are a few corner
+ * cases that need to be considered:
+ *
+ * - When running in a dual-display setup (so with two CRTCs involved),
+ *   we can update the state of a single CRTC (for example by changing
+ *   its mode using xrandr under X11) without affecting the other. In
+ *   this case, the other CRTC wouldn't be in the state at all, so we
+ *   need to consider all the running CRTCs in the DRM device to assign
+ *   a FIFO, not just the one in the state.
+ *
+ * - To fix the above, we can't use drm_atomic_get_crtc_state on all
+ *   enabled CRTCs to pull their CRTC state into the global state, since
+ *   a page flip would start considering their vblank to complete. Since
+ *   we don't have a guarantee that they are actually active, that
+ *   vblank might never happen, and shouldn't even be considered if we
+ *   want to do a page flip on a single CRTC. That can be tested by
+ *   doing a modetest -v first on HDMI1 and then on HDMI0.
+ *
+ * - Since we need the pixelvalve to be disabled and enabled back when
+ *   the FIFO is changed, we should keep the FIFO assigned for as long
+ *   as the CRTC is enabled, only considering it free again once that
+ *   CRTC has been disabled. This can be tested by booting X11 on a
+ *   single display, and changing the resolution down and then back up.
+ */
+static int vc4_pv_muxing_atomic_check(struct drm_device *dev,
+                                     struct drm_atomic_state *state)
+{
+       struct vc4_hvs_state *hvs_new_state;
+       struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+       struct drm_crtc *crtc;
+       unsigned int i;
+
+       hvs_new_state = vc4_hvs_get_global_state(state);
+       if (!hvs_new_state)
+               return -EINVAL;
 
        for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+               struct vc4_crtc_state *old_vc4_crtc_state =
+                       to_vc4_crtc_state(old_crtc_state);
                struct vc4_crtc_state *new_vc4_crtc_state =
                        to_vc4_crtc_state(new_crtc_state);
                struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
                unsigned int matching_channels;
 
-               if (old_crtc_state->enable && !new_crtc_state->enable)
-                       new_vc4_crtc_state->assigned_channel = VC4_HVS_CHANNEL_DISABLED;
-
-               if (!new_crtc_state->enable)
+               /* Nothing to do here, let's skip it */
+               if (old_crtc_state->enable == new_crtc_state->enable)
                        continue;
 
-               if (new_vc4_crtc_state->assigned_channel != VC4_HVS_CHANNEL_DISABLED) {
-                       unassigned_channels &= ~BIT(new_vc4_crtc_state->assigned_channel);
+               /* Muxing will need to be modified, mark it as such */
+               new_vc4_crtc_state->update_muxing = true;
+
+               /* If we're disabling our CRTC, we put back our channel */
+               if (!new_crtc_state->enable) {
+                       hvs_new_state->unassigned_channels |= BIT(old_vc4_crtc_state->assigned_channel);
+                       new_vc4_crtc_state->assigned_channel = VC4_HVS_CHANNEL_DISABLED;
                        continue;
                }
 
@@ -731,17 +833,29 @@ vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
                 * the future, we will need to have something smarter,
                 * but it works so far.
                 */
-               matching_channels = unassigned_channels & vc4_crtc->data->hvs_available_channels;
+               matching_channels = hvs_new_state->unassigned_channels & vc4_crtc->data->hvs_available_channels;
                if (matching_channels) {
                        unsigned int channel = ffs(matching_channels) - 1;
 
                        new_vc4_crtc_state->assigned_channel = channel;
-                       unassigned_channels &= ~BIT(channel);
+                       hvs_new_state->unassigned_channels &= ~BIT(channel);
                } else {
                        return -EINVAL;
                }
        }
 
+       return 0;
+}
+
+static int
+vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
+{
+       int ret;
+
+       ret = vc4_pv_muxing_atomic_check(dev, state);
+       if (ret)
+               return ret;
+
        ret = vc4_ctm_atomic_check(dev, state);
        if (ret < 0)
                return ret;
@@ -808,6 +922,10 @@ int vc4_kms_load(struct drm_device *dev)
        if (ret)
                return ret;
 
+       ret = vc4_hvs_channels_obj_init(vc4);
+       if (ret)
+               return ret;
+
        drm_mode_config_reset(dev);
 
        drm_kms_helper_poll_init(dev);
index a50ba4a..b88f889 100644 (file)
 #define CP_2WHEEL_MOUSE_HACK           0x02
 #define CP_2WHEEL_MOUSE_HACK_ON                0x04
 
+#define VA_INVAL_LOGICAL_BOUNDARY      0x08
+
 /*
  * Some USB barcode readers from cypress have usage min and usage max in
  * the wrong order
  */
-static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+static __u8 *cp_rdesc_fixup(struct hid_device *hdev, __u8 *rdesc,
                unsigned int *rsize)
 {
-       unsigned long quirks = (unsigned long)hid_get_drvdata(hdev);
        unsigned int i;
 
-       if (!(quirks & CP_RDESC_SWAPPED_MIN_MAX))
-               return rdesc;
-
        if (*rsize < 4)
                return rdesc;
 
@@ -48,6 +46,40 @@ static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc,
        return rdesc;
 }
 
+static __u8 *va_logical_boundary_fixup(struct hid_device *hdev, __u8 *rdesc,
+               unsigned int *rsize)
+{
+       /*
+        * Varmilo VA104M (with VID Cypress and device ID 07B1) incorrectly
+        * reports Logical Minimum of its Consumer Control device as 572
+        * (0x02 0x3c). Fix this by setting its Logical Minimum to zero.
+        */
+       if (*rsize == 25 &&
+                       rdesc[0] == 0x05 && rdesc[1] == 0x0c &&
+                       rdesc[2] == 0x09 && rdesc[3] == 0x01 &&
+                       rdesc[6] == 0x19 && rdesc[7] == 0x00 &&
+                       rdesc[11] == 0x16 && rdesc[12] == 0x3c && rdesc[13] == 0x02) {
+               hid_info(hdev,
+                        "fixing up varmilo VA104M consumer control report descriptor\n");
+               rdesc[12] = 0x00;
+               rdesc[13] = 0x00;
+       }
+       return rdesc;
+}
+
+static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+               unsigned int *rsize)
+{
+       unsigned long quirks = (unsigned long)hid_get_drvdata(hdev);
+
+       if (quirks & CP_RDESC_SWAPPED_MIN_MAX)
+               rdesc = cp_rdesc_fixup(hdev, rdesc, rsize);
+       if (quirks & VA_INVAL_LOGICAL_BOUNDARY)
+               rdesc = va_logical_boundary_fixup(hdev, rdesc, rsize);
+
+       return rdesc;
+}
+
 static int cp_input_mapped(struct hid_device *hdev, struct hid_input *hi,
                struct hid_field *field, struct hid_usage *usage,
                unsigned long **bit, int *max)
@@ -128,6 +160,8 @@ static const struct hid_device_id cp_devices[] = {
                .driver_data = CP_RDESC_SWAPPED_MIN_MAX },
        { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE),
                .driver_data = CP_2WHEEL_MOUSE_HACK },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_VARMILO_VA104M_07B1),
+               .driver_data = VA_INVAL_LOGICAL_BOUNDARY },
        { }
 };
 MODULE_DEVICE_TABLE(hid, cp_devices);
index d69842f..f170fea 100644 (file)
 #define USB_DEVICE_ID_CYPRESS_BARCODE_4        0xed81
 #define USB_DEVICE_ID_CYPRESS_TRUETOUCH        0xc001
 
+#define USB_DEVICE_ID_CYPRESS_VARMILO_VA104M_07B1   0X07b1
+
 #define USB_VENDOR_ID_DATA_MODUL       0x7374
 #define USB_VENDOR_ID_DATA_MODUL_EASYMAXTOUCH  0x1201
 
 #define USB_VENDOR_ID_FRUCTEL  0x25B6
 #define USB_DEVICE_ID_GAMETEL_MT_MODE  0x0002
 
+#define USB_VENDOR_ID_GAMEVICE 0x27F8
+#define USB_DEVICE_ID_GAMEVICE_GV186   0x0BBE
+#define USB_DEVICE_ID_GAMEVICE_KISHI   0x0BBF
+
 #define USB_VENDOR_ID_GAMERON          0x0810
 #define USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR 0x0001
 #define USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR 0x0002
 #define USB_DEVICE_ID_PENPOWER         0x00f4
 
 #define USB_VENDOR_ID_GREENASIA                0x0e8f
+#define USB_DEVICE_ID_GREENASIA_DUAL_SAT_ADAPTOR 0x3010
 #define USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD        0x3013
 
 #define USB_VENDOR_ID_GRETAGMACBETH    0x0971
 #define USB_VENDOR_ID_LOGITECH         0x046d
 #define USB_DEVICE_ID_LOGITECH_AUDIOHUB 0x0a0e
 #define USB_DEVICE_ID_LOGITECH_T651    0xb00c
+#define USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD 0xb309
 #define USB_DEVICE_ID_LOGITECH_C007    0xc007
 #define USB_DEVICE_ID_LOGITECH_C077    0xc077
 #define USB_DEVICE_ID_LOGITECH_RECEIVER        0xc101
 
 #define USB_VENDOR_ID_UGTIZER                  0x2179
 #define USB_DEVICE_ID_UGTIZER_TABLET_GP0610    0x0053
+#define USB_DEVICE_ID_UGTIZER_TABLET_GT5040    0x0077
 
 #define USB_VENDOR_ID_VIEWSONIC                        0x0543
 #define USB_DEVICE_ID_VIEWSONIC_PD1011         0xe621
index 9770db6..4dca113 100644 (file)
@@ -319,6 +319,9 @@ static const struct hid_device_id hid_battery_quirks[] = {
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ASUSTEK,
                USB_DEVICE_ID_ASUSTEK_T100CHI_KEYBOARD),
          HID_BATTERY_QUIRK_IGNORE },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH,
+               USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD),
+         HID_BATTERY_QUIRK_IGNORE },
        {}
 };
 
index 044a93f..742c052 100644 (file)
 
 #include "hid-ids.h"
 
+#define QUIRK_TOUCHPAD_ON_OFF_REPORT           BIT(0)
+
+static __u8 *ite_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize)
+{
+       unsigned long quirks = (unsigned long)hid_get_drvdata(hdev);
+
+       if (quirks & QUIRK_TOUCHPAD_ON_OFF_REPORT) {
+               if (*rsize == 188 && rdesc[162] == 0x81 && rdesc[163] == 0x02) {
+                       hid_info(hdev, "Fixing up ITE keyboard report descriptor\n");
+                       rdesc[163] = HID_MAIN_ITEM_RELATIVE;
+               }
+       }
+
+       return rdesc;
+}
+
+static int ite_input_mapping(struct hid_device *hdev,
+               struct hid_input *hi, struct hid_field *field,
+               struct hid_usage *usage, unsigned long **bit,
+               int *max)
+{
+
+       unsigned long quirks = (unsigned long)hid_get_drvdata(hdev);
+
+       if ((quirks & QUIRK_TOUCHPAD_ON_OFF_REPORT) &&
+           (usage->hid & HID_USAGE_PAGE) == 0x00880000) {
+               if (usage->hid == 0x00880078) {
+                       /* Touchpad on, userspace expects F22 for this */
+                       hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_F22);
+                       return 1;
+               }
+               if (usage->hid == 0x00880079) {
+                       /* Touchpad off, userspace expects F23 for this */
+                       hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_F23);
+                       return 1;
+               }
+               return -1;
+       }
+
+       return 0;
+}
+
 static int ite_event(struct hid_device *hdev, struct hid_field *field,
                     struct hid_usage *usage, __s32 value)
 {
@@ -37,13 +79,27 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field,
        return 0;
 }
 
+static int ite_probe(struct hid_device *hdev, const struct hid_device_id *id)
+{
+       int ret;
+
+       hid_set_drvdata(hdev, (void *)id->driver_data);
+
+       ret = hid_open_report(hdev);
+       if (ret)
+               return ret;
+
+       return hid_hw_start(hdev, HID_CONNECT_DEFAULT);
+}
+
 static const struct hid_device_id ite_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
        { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) },
        /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */
        { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
                     USB_VENDOR_ID_SYNAPTICS,
-                    USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
+                    USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012),
+         .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT },
        /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */
        { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
                     USB_VENDOR_ID_SYNAPTICS,
@@ -55,6 +111,9 @@ MODULE_DEVICE_TABLE(hid, ite_devices);
 static struct hid_driver ite_driver = {
        .name = "itetech",
        .id_table = ite_devices,
+       .probe = ite_probe,
+       .report_fixup = ite_report_fixup,
+       .input_mapping = ite_input_mapping,
        .event = ite_event,
 };
 module_hid_driver(ite_driver);
index 72fb6e5..1ffcfc9 100644 (file)
@@ -328,7 +328,7 @@ static const char mse_bluetooth_descriptor[] = {
        0x25, 0x01,             /*      LOGICAL_MAX (1)                 */
        0x75, 0x01,             /*      REPORT_SIZE (1)                 */
        0x95, 0x04,             /*      REPORT_COUNT (4)                */
-       0x81, 0x06,             /*      INPUT                           */
+       0x81, 0x02,             /*      INPUT (Data,Var,Abs)            */
        0xC0,                   /*    END_COLLECTION                    */
        0xC0,                   /*  END_COLLECTION                      */
 };
@@ -866,11 +866,24 @@ static void logi_dj_recv_queue_notification(struct dj_receiver_dev *djrcv_dev,
        schedule_work(&djrcv_dev->work);
 }
 
+/*
+ * Some quad/bluetooth keyboards have a builtin touchpad in this case we see
+ * only 1 paired device with a device_type of REPORT_TYPE_KEYBOARD. For the
+ * touchpad to work we must also forward mouse input reports to the dj_hiddev
+ * created for the keyboard (instead of forwarding them to a second paired
+ * device with a device_type of REPORT_TYPE_MOUSE as we normally would).
+ */
+static const u16 kbd_builtin_touchpad_ids[] = {
+       0xb309, /* Dinovo Edge */
+       0xb30c, /* Dinovo Mini */
+};
+
 static void logi_hidpp_dev_conn_notif_equad(struct hid_device *hdev,
                                            struct hidpp_event *hidpp_report,
                                            struct dj_workitem *workitem)
 {
        struct dj_receiver_dev *djrcv_dev = hid_get_drvdata(hdev);
+       int i, id;
 
        workitem->type = WORKITEM_TYPE_PAIRED;
        workitem->device_type = hidpp_report->params[HIDPP_PARAM_DEVICE_INFO] &
@@ -882,6 +895,13 @@ static void logi_hidpp_dev_conn_notif_equad(struct hid_device *hdev,
                workitem->reports_supported |= STD_KEYBOARD | MULTIMEDIA |
                                               POWER_KEYS | MEDIA_CENTER |
                                               HIDPP;
+               id = (workitem->quad_id_msb << 8) | workitem->quad_id_lsb;
+               for (i = 0; i < ARRAY_SIZE(kbd_builtin_touchpad_ids); i++) {
+                       if (id == kbd_builtin_touchpad_ids[i]) {
+                               workitem->reports_supported |= STD_MOUSE;
+                               break;
+                       }
+               }
                break;
        case REPORT_TYPE_MOUSE:
                workitem->reports_supported |= STD_MOUSE | HIDPP;
index b8b53dc..0ca7231 100644 (file)
@@ -93,6 +93,8 @@ MODULE_PARM_DESC(disable_tap_to_click,
 #define HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS  BIT(3)
 #define HIDPP_CAPABILITY_BATTERY_VOLTAGE       BIT(4)
 
+#define lg_map_key_clear(c)  hid_map_usage_clear(hi, usage, bit, max, EV_KEY, (c))
+
 /*
  * There are two hidpp protocols in use, the first version hidpp10 is known
  * as register access protocol or RAP, the second version hidpp20 is known as
@@ -2951,6 +2953,26 @@ static int g920_get_config(struct hidpp_device *hidpp,
 }
 
 /* -------------------------------------------------------------------------- */
+/* Logitech Dinovo Mini keyboard with builtin touchpad                        */
+/* -------------------------------------------------------------------------- */
+#define DINOVO_MINI_PRODUCT_ID         0xb30c
+
+static int lg_dinovo_input_mapping(struct hid_device *hdev, struct hid_input *hi,
+               struct hid_field *field, struct hid_usage *usage,
+               unsigned long **bit, int *max)
+{
+       if ((usage->hid & HID_USAGE_PAGE) != HID_UP_LOGIVENDOR)
+               return 0;
+
+       switch (usage->hid & HID_USAGE) {
+       case 0x00d: lg_map_key_clear(KEY_MEDIA);        break;
+       default:
+               return 0;
+       }
+       return 1;
+}
+
+/* -------------------------------------------------------------------------- */
 /* HID++1.0 devices which use HID++ reports for their wheels                  */
 /* -------------------------------------------------------------------------- */
 static int hidpp10_wheel_connect(struct hidpp_device *hidpp)
@@ -3185,6 +3207,9 @@ static int hidpp_input_mapping(struct hid_device *hdev, struct hid_input *hi,
                        field->application != HID_GD_MOUSE)
                return m560_input_mapping(hdev, hi, field, usage, bit, max);
 
+       if (hdev->product == DINOVO_MINI_PRODUCT_ID)
+               return lg_dinovo_input_mapping(hdev, hi, field, usage, bit, max);
+
        return 0;
 }
 
@@ -3947,6 +3972,7 @@ static const struct hid_device_id hidpp_devices[] = {
          LDJ_DEVICE(0x405e), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
        { /* Mouse Logitech MX Anywhere 2 */
          LDJ_DEVICE(0x404a), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
+       { LDJ_DEVICE(0x4072), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
        { LDJ_DEVICE(0xb013), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
        { LDJ_DEVICE(0xb018), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
        { LDJ_DEVICE(0xb01f), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
@@ -3971,6 +3997,9 @@ static const struct hid_device_id hidpp_devices[] = {
        { /* Keyboard MX5000 (Bluetooth-receiver in HID proxy mode) */
          LDJ_DEVICE(0xb305),
          .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
+       { /* Dinovo Edge (Bluetooth-receiver in HID proxy mode) */
+         LDJ_DEVICE(0xb309),
+         .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
        { /* Keyboard MX5500 (Bluetooth-receiver in HID proxy mode) */
          LDJ_DEVICE(0xb30b),
          .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
@@ -4013,6 +4042,9 @@ static const struct hid_device_id hidpp_devices[] = {
        { /* MX5000 keyboard over Bluetooth */
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb305),
          .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
+       { /* Dinovo Edge keyboard over Bluetooth */
+         HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb309),
+         .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
        { /* MX5500 keyboard over Bluetooth */
          HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb30b),
          .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
index 0d27ccb..4211b98 100644 (file)
@@ -49,6 +49,36 @@ enum {
        MCP2221_ALT_F_NOT_GPIOD = 0xEF,
 };
 
+/* MCP GPIO direction encoding */
+enum {
+       MCP2221_DIR_OUT = 0x00,
+       MCP2221_DIR_IN = 0x01,
+};
+
+#define MCP_NGPIO      4
+
+/* MCP GPIO set command layout */
+struct mcp_set_gpio {
+       u8 cmd;
+       u8 dummy;
+       struct {
+               u8 change_value;
+               u8 value;
+               u8 change_direction;
+               u8 direction;
+       } gpio[MCP_NGPIO];
+} __packed;
+
+/* MCP GPIO get command layout */
+struct mcp_get_gpio {
+       u8 cmd;
+       u8 dummy;
+       struct {
+               u8 direction;
+               u8 value;
+       } gpio[MCP_NGPIO];
+} __packed;
+
 /*
  * There is no way to distinguish responses. Therefore next command
  * is sent only after response to previous has been received. Mutex
@@ -542,7 +572,7 @@ static int mcp_gpio_get(struct gpio_chip *gc,
 
        mcp->txbuf[0] = MCP2221_GPIO_GET;
 
-       mcp->gp_idx = (offset + 1) * 2;
+       mcp->gp_idx = offsetof(struct mcp_get_gpio, gpio[offset].value);
 
        mutex_lock(&mcp->lock);
        ret = mcp_send_data_req_status(mcp, mcp->txbuf, 1);
@@ -559,7 +589,7 @@ static void mcp_gpio_set(struct gpio_chip *gc,
        memset(mcp->txbuf, 0, 18);
        mcp->txbuf[0] = MCP2221_GPIO_SET;
 
-       mcp->gp_idx = ((offset + 1) * 4) - 1;
+       mcp->gp_idx = offsetof(struct mcp_set_gpio, gpio[offset].value);
 
        mcp->txbuf[mcp->gp_idx - 1] = 1;
        mcp->txbuf[mcp->gp_idx] = !!value;
@@ -575,7 +605,7 @@ static int mcp_gpio_dir_set(struct mcp2221 *mcp,
        memset(mcp->txbuf, 0, 18);
        mcp->txbuf[0] = MCP2221_GPIO_SET;
 
-       mcp->gp_idx = (offset + 1) * 5;
+       mcp->gp_idx = offsetof(struct mcp_set_gpio, gpio[offset].direction);
 
        mcp->txbuf[mcp->gp_idx - 1] = 1;
        mcp->txbuf[mcp->gp_idx] = val;
@@ -590,7 +620,7 @@ static int mcp_gpio_direction_input(struct gpio_chip *gc,
        struct mcp2221 *mcp = gpiochip_get_data(gc);
 
        mutex_lock(&mcp->lock);
-       ret = mcp_gpio_dir_set(mcp, offset, 0);
+       ret = mcp_gpio_dir_set(mcp, offset, MCP2221_DIR_IN);
        mutex_unlock(&mcp->lock);
 
        return ret;
@@ -603,7 +633,7 @@ static int mcp_gpio_direction_output(struct gpio_chip *gc,
        struct mcp2221 *mcp = gpiochip_get_data(gc);
 
        mutex_lock(&mcp->lock);
-       ret = mcp_gpio_dir_set(mcp, offset, 1);
+       ret = mcp_gpio_dir_set(mcp, offset, MCP2221_DIR_OUT);
        mutex_unlock(&mcp->lock);
 
        /* Can't configure as output, bailout early */
@@ -623,7 +653,7 @@ static int mcp_gpio_get_direction(struct gpio_chip *gc,
 
        mcp->txbuf[0] = MCP2221_GPIO_GET;
 
-       mcp->gp_idx = (offset + 1) * 2;
+       mcp->gp_idx = offsetof(struct mcp_get_gpio, gpio[offset].direction);
 
        mutex_lock(&mcp->lock);
        ret = mcp_send_data_req_status(mcp, mcp->txbuf, 1);
@@ -632,7 +662,7 @@ static int mcp_gpio_get_direction(struct gpio_chip *gc,
        if (ret)
                return ret;
 
-       if (mcp->gpio_dir)
+       if (mcp->gpio_dir == MCP2221_DIR_IN)
                return GPIO_LINE_DIRECTION_IN;
 
        return GPIO_LINE_DIRECTION_OUT;
@@ -758,7 +788,7 @@ static int mcp2221_raw_event(struct hid_device *hdev,
                                mcp->status = -ENOENT;
                        } else {
                                mcp->status = !!data[mcp->gp_idx];
-                               mcp->gpio_dir = !!data[mcp->gp_idx + 1];
+                               mcp->gpio_dir = data[mcp->gp_idx + 1];
                        }
                        break;
                default:
@@ -860,7 +890,7 @@ static int mcp2221_probe(struct hid_device *hdev,
        mcp->gc->get_direction = mcp_gpio_get_direction;
        mcp->gc->set = mcp_gpio_set;
        mcp->gc->get = mcp_gpio_get;
-       mcp->gc->ngpio = 4;
+       mcp->gc->ngpio = MCP_NGPIO;
        mcp->gc->base = -1;
        mcp->gc->can_sleep = 1;
        mcp->gc->parent = &hdev->dev;
index 7a2be02..bf7ecab 100644 (file)
@@ -83,7 +83,12 @@ static const struct hid_device_id hid_quirks[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER), HID_QUIRK_NO_INIT_REPORTS },
        { HID_USB_DEVICE(USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28), HID_QUIRK_NOGET },
        { HID_USB_DEVICE(USB_VENDOR_ID_FUTABA, USB_DEVICE_ID_LED_DISPLAY), HID_QUIRK_NO_INIT_REPORTS },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_SAT_ADAPTOR), HID_QUIRK_MULTI_INPUT },
        { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD), HID_QUIRK_MULTI_INPUT },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_GAMEVICE, USB_DEVICE_ID_GAMEVICE_GV186),
+               HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GAMEVICE, USB_DEVICE_ID_GAMEVICE_KISHI),
+               HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE },
        { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_DRIVING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT },
        { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FIGHTING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT },
        { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FLYING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT },
index 94c7398..3dd7d32 100644 (file)
@@ -483,7 +483,8 @@ static int sensor_hub_raw_event(struct hid_device *hdev,
                return 1;
 
        ptr = raw_data;
-       ptr++; /* Skip report id */
+       if (report->id)
+               ptr++; /* Skip report id */
 
        spin_lock_irqsave(&pdata->lock, flags);
 
index 86b5680..8e9c9e6 100644 (file)
@@ -385,6 +385,8 @@ static const struct hid_device_id uclogic_devices[] = {
                                USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER,
                                USB_DEVICE_ID_UGTIZER_TABLET_GP0610) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER,
+                               USB_DEVICE_ID_UGTIZER_TABLET_GT5040) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
                                USB_DEVICE_ID_UGEE_TABLET_G5) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE,
index 7d20d1f..d26d8cd 100644 (file)
@@ -997,6 +997,8 @@ int uclogic_params_init(struct uclogic_params *params,
                break;
        case VID_PID(USB_VENDOR_ID_UGTIZER,
                     USB_DEVICE_ID_UGTIZER_TABLET_GP0610):
+       case VID_PID(USB_VENDOR_ID_UGTIZER,
+                    USB_DEVICE_ID_UGTIZER_TABLET_GT5040):
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_XPPEN_TABLET_G540):
        case VID_PID(USB_VENDOR_ID_UGEE,
index 786e3e9..aeff1ff 100644 (file)
@@ -943,6 +943,11 @@ static void i2c_hid_acpi_enable_wakeup(struct device *dev)
        }
 }
 
+static void i2c_hid_acpi_shutdown(struct device *dev)
+{
+       acpi_device_set_power(ACPI_COMPANION(dev), ACPI_STATE_D3_COLD);
+}
+
 static const struct acpi_device_id i2c_hid_acpi_match[] = {
        {"ACPI0C50", 0 },
        {"PNP0C50", 0 },
@@ -959,6 +964,8 @@ static inline int i2c_hid_acpi_pdata(struct i2c_client *client,
 static inline void i2c_hid_acpi_fix_up_power(struct device *dev) {}
 
 static inline void i2c_hid_acpi_enable_wakeup(struct device *dev) {}
+
+static inline void i2c_hid_acpi_shutdown(struct device *dev) {}
 #endif
 
 #ifdef CONFIG_OF
@@ -1175,6 +1182,8 @@ static void i2c_hid_shutdown(struct i2c_client *client)
 
        i2c_hid_set_power(client, I2C_HID_PWR_SLEEP);
        free_irq(client->irq, ihid);
+
+       i2c_hid_acpi_shutdown(&client->dev);
 }
 
 #ifdef CONFIG_PM_SLEEP
index 0cde10f..f202ac7 100644 (file)
@@ -244,9 +244,13 @@ int hv_synic_cleanup(unsigned int cpu)
 
        /*
         * Hyper-V does not provide a way to change the connect CPU once
-        * it is set; we must prevent the connect CPU from going offline.
+        * it is set; we must prevent the connect CPU from going offline
+        * while the VM is running normally. But in the panic or kexec()
+        * path where the vmbus is already disconnected, the CPU must be
+        * allowed to shut down.
         */
-       if (cpu == VMBUS_CONNECT_CPU)
+       if (cpu == VMBUS_CONNECT_CPU &&
+           vmbus_connection.conn_state == CONNECTED)
                return -EBUSY;
 
        /*
index a97a9d0..a49e0ed 100644 (file)
@@ -734,6 +734,7 @@ config I2C_LPC2K
 config I2C_MLXBF
         tristate "Mellanox BlueField I2C controller"
         depends on MELLANOX_PLATFORM && ARM64
+       select I2C_SLAVE
         help
           Enabling this option will add I2C SMBus support for Mellanox BlueField
           system.
index c98529c..e6f8d6e 100644 (file)
@@ -412,6 +412,19 @@ static void i2c_imx_dma_free(struct imx_i2c_struct *i2c_imx)
        dma->chan_using = NULL;
 }
 
+static void i2c_imx_clear_irq(struct imx_i2c_struct *i2c_imx, unsigned int bits)
+{
+       unsigned int temp;
+
+       /*
+        * i2sr_clr_opcode is the value to clear all interrupts. Here we want to
+        * clear only <bits>, so we write ~i2sr_clr_opcode with just <bits>
+        * toggled. This is required because i.MX needs W0C and Vybrid uses W1C.
+        */
+       temp = ~i2c_imx->hwdata->i2sr_clr_opcode ^ bits;
+       imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2SR);
+}
+
 static int i2c_imx_bus_busy(struct imx_i2c_struct *i2c_imx, int for_busy, bool atomic)
 {
        unsigned long orig_jiffies = jiffies;
@@ -424,8 +437,7 @@ static int i2c_imx_bus_busy(struct imx_i2c_struct *i2c_imx, int for_busy, bool a
 
                /* check for arbitration lost */
                if (temp & I2SR_IAL) {
-                       temp &= ~I2SR_IAL;
-                       imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2SR);
+                       i2c_imx_clear_irq(i2c_imx, I2SR_IAL);
                        return -EAGAIN;
                }
 
@@ -469,7 +481,7 @@ static int i2c_imx_trx_complete(struct imx_i2c_struct *i2c_imx, bool atomic)
                 */
                readb_poll_timeout_atomic(addr, regval, regval & I2SR_IIF, 5, 1000 + 100);
                i2c_imx->i2csr = regval;
-               imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2SR);
+               i2c_imx_clear_irq(i2c_imx, I2SR_IIF | I2SR_IAL);
        } else {
                wait_event_timeout(i2c_imx->queue, i2c_imx->i2csr & I2SR_IIF, HZ / 10);
        }
@@ -478,6 +490,16 @@ static int i2c_imx_trx_complete(struct imx_i2c_struct *i2c_imx, bool atomic)
                dev_dbg(&i2c_imx->adapter.dev, "<%s> Timeout\n", __func__);
                return -ETIMEDOUT;
        }
+
+       /* check for arbitration lost */
+       if (i2c_imx->i2csr & I2SR_IAL) {
+               dev_dbg(&i2c_imx->adapter.dev, "<%s> Arbitration lost\n", __func__);
+               i2c_imx_clear_irq(i2c_imx, I2SR_IAL);
+
+               i2c_imx->i2csr = 0;
+               return -EAGAIN;
+       }
+
        dev_dbg(&i2c_imx->adapter.dev, "<%s> TRX complete\n", __func__);
        i2c_imx->i2csr = 0;
        return 0;
@@ -593,6 +615,8 @@ static void i2c_imx_stop(struct imx_i2c_struct *i2c_imx, bool atomic)
                /* Stop I2C transaction */
                dev_dbg(&i2c_imx->adapter.dev, "<%s>\n", __func__);
                temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
+               if (!(temp & I2CR_MSTA))
+                       i2c_imx->stopped = 1;
                temp &= ~(I2CR_MSTA | I2CR_MTX);
                if (i2c_imx->dma)
                        temp &= ~I2CR_DMAEN;
@@ -623,9 +647,7 @@ static irqreturn_t i2c_imx_isr(int irq, void *dev_id)
        if (temp & I2SR_IIF) {
                /* save status register */
                i2c_imx->i2csr = temp;
-               temp &= ~I2SR_IIF;
-               temp |= (i2c_imx->hwdata->i2sr_clr_opcode & I2SR_IIF);
-               imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2SR);
+               i2c_imx_clear_irq(i2c_imx, I2SR_IIF);
                wake_up(&i2c_imx->queue);
                return IRQ_HANDLED;
        }
@@ -758,9 +780,12 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx,
                 */
                dev_dbg(dev, "<%s> clear MSTA\n", __func__);
                temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
+               if (!(temp & I2CR_MSTA))
+                       i2c_imx->stopped = 1;
                temp &= ~(I2CR_MSTA | I2CR_MTX);
                imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
-               i2c_imx_bus_busy(i2c_imx, 0, false);
+               if (!i2c_imx->stopped)
+                       i2c_imx_bus_busy(i2c_imx, 0, false);
        } else {
                /*
                 * For i2c master receiver repeat restart operation like:
@@ -885,9 +910,12 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs,
                                dev_dbg(&i2c_imx->adapter.dev,
                                        "<%s> clear MSTA\n", __func__);
                                temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
+                               if (!(temp & I2CR_MSTA))
+                                       i2c_imx->stopped =  1;
                                temp &= ~(I2CR_MSTA | I2CR_MTX);
                                imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
-                               i2c_imx_bus_busy(i2c_imx, 0, atomic);
+                               if (!i2c_imx->stopped)
+                                       i2c_imx_bus_busy(i2c_imx, 0, atomic);
                        } else {
                                /*
                                 * For i2c master receiver repeat restart operation like:
index 33574d4..2fb0532 100644 (file)
@@ -1258,9 +1258,9 @@ static int mlxbf_i2c_get_gpio(struct platform_device *pdev,
                return -EFAULT;
 
        gpio_res->io = devm_ioremap(dev, params->start, size);
-       if (IS_ERR(gpio_res->io)) {
+       if (!gpio_res->io) {
                devm_release_mem_region(dev, params->start, size);
-               return PTR_ERR(gpio_res->io);
+               return -ENOMEM;
        }
 
        return 0;
@@ -1323,9 +1323,9 @@ static int mlxbf_i2c_get_corepll(struct platform_device *pdev,
                return -EFAULT;
 
        corepll_res->io = devm_ioremap(dev, params->start, size);
-       if (IS_ERR(corepll_res->io)) {
+       if (!corepll_res->io) {
                devm_release_mem_region(dev, params->start, size);
-               return PTR_ERR(corepll_res->io);
+               return -ENOMEM;
        }
 
        return 0;
@@ -1717,9 +1717,9 @@ static int mlxbf_i2c_init_coalesce(struct platform_device *pdev,
                        return -EFAULT;
 
                coalesce_res->io = ioremap(params->start, size);
-               if (IS_ERR(coalesce_res->io)) {
+               if (!coalesce_res->io) {
                        release_mem_region(params->start, size);
-                       return PTR_ERR(coalesce_res->io);
+                       return -ENOMEM;
                }
 
                priv->coalesce = coalesce_res;
index f13735b..1c259b5 100644 (file)
@@ -194,9 +194,9 @@ static irqreturn_t cci_isr(int irq, void *dev)
        if (unlikely(val & CCI_IRQ_STATUS_0_I2C_M1_ERROR)) {
                if (val & CCI_IRQ_STATUS_0_I2C_M1_Q0_NACK_ERR ||
                        val & CCI_IRQ_STATUS_0_I2C_M1_Q1_NACK_ERR)
-                       cci->master[0].status = -ENXIO;
+                       cci->master[1].status = -ENXIO;
                else
-                       cci->master[0].status = -EIO;
+                       cci->master[1].status = -EIO;
 
                writel(CCI_HALT_REQ_I2C_M1_Q0Q1, cci->base + CCI_HALT_REQ);
                ret = IRQ_HANDLED;
index fbc04b6..5a47915 100644 (file)
@@ -801,7 +801,8 @@ static int qup_i2c_bam_schedule_desc(struct qup_i2c_dev *qup)
        if (ret || qup->bus_err || qup->qup_err) {
                reinit_completion(&qup->xfer);
 
-               if (qup_i2c_change_state(qup, QUP_RUN_STATE)) {
+               ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
+               if (ret) {
                        dev_err(qup->dev, "change to run state timed out");
                        goto desc_err;
                }
index 01bace4..d793355 100644 (file)
@@ -126,26 +126,9 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev,
        struct cpuidle_state *state = &drv->states[index];
        unsigned long eax = flg2MWAIT(state->flags);
        unsigned long ecx = 1; /* break on interrupt flag */
-       bool tick;
-
-       if (!static_cpu_has(X86_FEATURE_ARAT)) {
-               /*
-                * Switch over to one-shot tick broadcast if the target C-state
-                * is deeper than C1.
-                */
-               if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) {
-                       tick = true;
-                       tick_broadcast_enter();
-               } else {
-                       tick = false;
-               }
-       }
 
        mwait_idle_with_hints(eax, ecx);
 
-       if (!static_cpu_has(X86_FEATURE_ARAT) && tick)
-               tick_broadcast_exit();
-
        return index;
 }
 
@@ -1157,6 +1140,20 @@ static bool __init intel_idle_max_cstate_reached(int cstate)
        return false;
 }
 
+static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
+{
+       unsigned long eax = flg2MWAIT(state->flags);
+
+       if (boot_cpu_has(X86_FEATURE_ARAT))
+               return false;
+
+       /*
+        * Switch over to one-shot tick broadcast if the target C-state
+        * is deeper than C1.
+        */
+       return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
+}
+
 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
 #include <acpi/processor.h>
 
@@ -1269,6 +1266,9 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
                if (disabled_states_mask & BIT(cstate))
                        state->flags |= CPUIDLE_FLAG_OFF;
 
+               if (intel_idle_state_needs_timer_stop(state))
+                       state->flags |= CPUIDLE_FLAG_TIMER_STOP;
+
                state->enter = intel_idle;
                state->enter_s2idle = intel_idle_s2idle;
        }
@@ -1507,6 +1507,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
                     !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
                        drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
 
+               if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
+                       drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
+
                drv->state_count++;
        }
 
index beb38d9..560a337 100644 (file)
@@ -126,6 +126,12 @@ enum kx_chipset {
        KX_MAX_CHIPS /* this must be last */
 };
 
+enum kx_acpi_type {
+       ACPI_GENERIC,
+       ACPI_SMO8500,
+       ACPI_KIOX010A,
+};
+
 struct kxcjk1013_data {
        struct i2c_client *client;
        struct iio_trigger *dready_trig;
@@ -143,7 +149,7 @@ struct kxcjk1013_data {
        bool motion_trigger_on;
        int64_t timestamp;
        enum kx_chipset chipset;
-       bool is_smo8500_device;
+       enum kx_acpi_type acpi_type;
 };
 
 enum kxcjk1013_axis {
@@ -270,6 +276,32 @@ static const struct {
                              {19163, 1, 0},
                              {38326, 0, 1} };
 
+#ifdef CONFIG_ACPI
+enum kiox010a_fn_index {
+       KIOX010A_SET_LAPTOP_MODE = 1,
+       KIOX010A_SET_TABLET_MODE = 2,
+};
+
+static int kiox010a_dsm(struct device *dev, int fn_index)
+{
+       acpi_handle handle = ACPI_HANDLE(dev);
+       guid_t kiox010a_dsm_guid;
+       union acpi_object *obj;
+
+       if (!handle)
+               return -ENODEV;
+
+       guid_parse("1f339696-d475-4e26-8cad-2e9f8e6d7a91", &kiox010a_dsm_guid);
+
+       obj = acpi_evaluate_dsm(handle, &kiox010a_dsm_guid, 1, fn_index, NULL);
+       if (!obj)
+               return -EIO;
+
+       ACPI_FREE(obj);
+       return 0;
+}
+#endif
+
 static int kxcjk1013_set_mode(struct kxcjk1013_data *data,
                              enum kxcjk1013_mode mode)
 {
@@ -347,6 +379,13 @@ static int kxcjk1013_chip_init(struct kxcjk1013_data *data)
 {
        int ret;
 
+#ifdef CONFIG_ACPI
+       if (data->acpi_type == ACPI_KIOX010A) {
+               /* Make sure the kbd and touchpad on 2-in-1s using 2 KXCJ91008-s work */
+               kiox010a_dsm(&data->client->dev, KIOX010A_SET_LAPTOP_MODE);
+       }
+#endif
+
        ret = i2c_smbus_read_byte_data(data->client, KXCJK1013_REG_WHO_AM_I);
        if (ret < 0) {
                dev_err(&data->client->dev, "Error reading who_am_i\n");
@@ -1247,7 +1286,7 @@ static irqreturn_t kxcjk1013_data_rdy_trig_poll(int irq, void *private)
 
 static const char *kxcjk1013_match_acpi_device(struct device *dev,
                                               enum kx_chipset *chipset,
-                                              bool *is_smo8500_device)
+                                              enum kx_acpi_type *acpi_type)
 {
        const struct acpi_device_id *id;
 
@@ -1256,7 +1295,9 @@ static const char *kxcjk1013_match_acpi_device(struct device *dev,
                return NULL;
 
        if (strcmp(id->id, "SMO8500") == 0)
-               *is_smo8500_device = true;
+               *acpi_type = ACPI_SMO8500;
+       else if (strcmp(id->id, "KIOX010A") == 0)
+               *acpi_type = ACPI_KIOX010A;
 
        *chipset = (enum kx_chipset)id->driver_data;
 
@@ -1299,7 +1340,7 @@ static int kxcjk1013_probe(struct i2c_client *client,
        } else if (ACPI_HANDLE(&client->dev)) {
                name = kxcjk1013_match_acpi_device(&client->dev,
                                                   &data->chipset,
-                                                  &data->is_smo8500_device);
+                                                  &data->acpi_type);
        } else
                return -ENODEV;
 
@@ -1316,7 +1357,7 @@ static int kxcjk1013_probe(struct i2c_client *client,
        indio_dev->modes = INDIO_DIRECT_MODE;
        indio_dev->info = &kxcjk1013_info;
 
-       if (client->irq > 0 && !data->is_smo8500_device) {
+       if (client->irq > 0 && data->acpi_type != ACPI_SMO8500) {
                ret = devm_request_threaded_irq(&client->dev, client->irq,
                                                kxcjk1013_data_rdy_trig_poll,
                                                kxcjk1013_event_handler,
index 92b2508..1aafbe2 100644 (file)
@@ -71,7 +71,7 @@
 #define JZ4725B_ADC_BATTERY_HIGH_VREF_BITS     10
 #define JZ4740_ADC_BATTERY_HIGH_VREF           (7500 * 0.986)
 #define JZ4740_ADC_BATTERY_HIGH_VREF_BITS      12
-#define JZ4770_ADC_BATTERY_VREF                        6600
+#define JZ4770_ADC_BATTERY_VREF                        1200
 #define JZ4770_ADC_BATTERY_VREF_BITS           12
 
 #define JZ_ADC_IRQ_AUX                 BIT(0)
@@ -177,13 +177,12 @@ static void ingenic_adc_set_config(struct ingenic_adc *adc,
        mutex_unlock(&adc->lock);
 }
 
-static void ingenic_adc_enable(struct ingenic_adc *adc,
-                              int engine,
-                              bool enabled)
+static void ingenic_adc_enable_unlocked(struct ingenic_adc *adc,
+                                       int engine,
+                                       bool enabled)
 {
        u8 val;
 
-       mutex_lock(&adc->lock);
        val = readb(adc->base + JZ_ADC_REG_ENABLE);
 
        if (enabled)
@@ -192,20 +191,41 @@ static void ingenic_adc_enable(struct ingenic_adc *adc,
                val &= ~BIT(engine);
 
        writeb(val, adc->base + JZ_ADC_REG_ENABLE);
+}
+
+static void ingenic_adc_enable(struct ingenic_adc *adc,
+                              int engine,
+                              bool enabled)
+{
+       mutex_lock(&adc->lock);
+       ingenic_adc_enable_unlocked(adc, engine, enabled);
        mutex_unlock(&adc->lock);
 }
 
 static int ingenic_adc_capture(struct ingenic_adc *adc,
                               int engine)
 {
+       u32 cfg;
        u8 val;
        int ret;
 
-       ingenic_adc_enable(adc, engine, true);
+       /*
+        * Disable CMD_SEL temporarily, because it causes wrong VBAT readings,
+        * probably due to the switch of VREF. We must keep the lock here to
+        * avoid races with the buffer enable/disable functions.
+        */
+       mutex_lock(&adc->lock);
+       cfg = readl(adc->base + JZ_ADC_REG_CFG);
+       writel(cfg & ~JZ_ADC_REG_CFG_CMD_SEL, adc->base + JZ_ADC_REG_CFG);
+
+       ingenic_adc_enable_unlocked(adc, engine, true);
        ret = readb_poll_timeout(adc->base + JZ_ADC_REG_ENABLE, val,
                                 !(val & BIT(engine)), 250, 1000);
        if (ret)
-               ingenic_adc_enable(adc, engine, false);
+               ingenic_adc_enable_unlocked(adc, engine, false);
+
+       writel(cfg, adc->base + JZ_ADC_REG_CFG);
+       mutex_unlock(&adc->lock);
 
        return ret;
 }
index ac415cb..79c1dd6 100644 (file)
@@ -9,9 +9,9 @@
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/iopoll.h>
 #include <linux/io.h>
 #include <linux/iio/iio.h>
@@ -276,6 +276,8 @@ static int mt6577_auxadc_probe(struct platform_device *pdev)
                goto err_disable_clk;
        }
 
+       adc_dev->dev_comp = device_get_match_data(&pdev->dev);
+
        mutex_init(&adc_dev->lock);
 
        mt6577_auxadc_mod_reg(adc_dev->reg_base + MT6577_AUXADC_MISC,
index cd870c0..a83199b 100644 (file)
  * struct stm32_adc_common_regs - stm32 common registers
  * @csr:       common status register offset
  * @ccr:       common control register offset
- * @eoc1_msk:  adc1 end of conversion flag in @csr
- * @eoc2_msk:  adc2 end of conversion flag in @csr
- * @eoc3_msk:  adc3 end of conversion flag in @csr
+ * @eoc_msk:    array of eoc (end of conversion flag) masks in csr for adc1..n
+ * @ovr_msk:    array of ovr (overrun flag) masks in csr for adc1..n
  * @ier:       interrupt enable register offset for each adc
  * @eocie_msk: end of conversion interrupt enable mask in @ier
  */
 struct stm32_adc_common_regs {
        u32 csr;
        u32 ccr;
-       u32 eoc1_msk;
-       u32 eoc2_msk;
-       u32 eoc3_msk;
+       u32 eoc_msk[STM32_ADC_MAX_ADCS];
+       u32 ovr_msk[STM32_ADC_MAX_ADCS];
        u32 ier;
        u32 eocie_msk;
 };
@@ -282,21 +280,20 @@ out:
 static const struct stm32_adc_common_regs stm32f4_adc_common_regs = {
        .csr = STM32F4_ADC_CSR,
        .ccr = STM32F4_ADC_CCR,
-       .eoc1_msk = STM32F4_EOC1 | STM32F4_OVR1,
-       .eoc2_msk = STM32F4_EOC2 | STM32F4_OVR2,
-       .eoc3_msk = STM32F4_EOC3 | STM32F4_OVR3,
+       .eoc_msk = { STM32F4_EOC1, STM32F4_EOC2, STM32F4_EOC3},
+       .ovr_msk = { STM32F4_OVR1, STM32F4_OVR2, STM32F4_OVR3},
        .ier = STM32F4_ADC_CR1,
-       .eocie_msk = STM32F4_EOCIE | STM32F4_OVRIE,
+       .eocie_msk = STM32F4_EOCIE,
 };
 
 /* STM32H7 common registers definitions */
 static const struct stm32_adc_common_regs stm32h7_adc_common_regs = {
        .csr = STM32H7_ADC_CSR,
        .ccr = STM32H7_ADC_CCR,
-       .eoc1_msk = STM32H7_EOC_MST | STM32H7_OVR_MST,
-       .eoc2_msk = STM32H7_EOC_SLV | STM32H7_OVR_SLV,
+       .eoc_msk = { STM32H7_EOC_MST, STM32H7_EOC_SLV},
+       .ovr_msk = { STM32H7_OVR_MST, STM32H7_OVR_SLV},
        .ier = STM32H7_ADC_IER,
-       .eocie_msk = STM32H7_EOCIE | STM32H7_OVRIE,
+       .eocie_msk = STM32H7_EOCIE,
 };
 
 static const unsigned int stm32_adc_offset[STM32_ADC_MAX_ADCS] = {
@@ -318,6 +315,7 @@ static void stm32_adc_irq_handler(struct irq_desc *desc)
 {
        struct stm32_adc_priv *priv = irq_desc_get_handler_data(desc);
        struct irq_chip *chip = irq_desc_get_chip(desc);
+       int i;
        u32 status;
 
        chained_irq_enter(chip, desc);
@@ -335,17 +333,12 @@ static void stm32_adc_irq_handler(struct irq_desc *desc)
         * before invoking the interrupt handler (e.g. call ISR only for
         * IRQ-enabled ADCs).
         */
-       if (status & priv->cfg->regs->eoc1_msk &&
-           stm32_adc_eoc_enabled(priv, 0))
-               generic_handle_irq(irq_find_mapping(priv->domain, 0));
-
-       if (status & priv->cfg->regs->eoc2_msk &&
-           stm32_adc_eoc_enabled(priv, 1))
-               generic_handle_irq(irq_find_mapping(priv->domain, 1));
-
-       if (status & priv->cfg->regs->eoc3_msk &&
-           stm32_adc_eoc_enabled(priv, 2))
-               generic_handle_irq(irq_find_mapping(priv->domain, 2));
+       for (i = 0; i < priv->cfg->num_irqs; i++) {
+               if ((status & priv->cfg->regs->eoc_msk[i] &&
+                    stm32_adc_eoc_enabled(priv, i)) ||
+                    (status & priv->cfg->regs->ovr_msk[i]))
+                       generic_handle_irq(irq_find_mapping(priv->domain, i));
+       }
 
        chained_irq_exit(chip, desc);
 };
index b3f31f1..16c02c3 100644 (file)
@@ -154,6 +154,7 @@ struct stm32_adc;
  * @start_conv:                routine to start conversions
  * @stop_conv:         routine to stop conversions
  * @unprepare:         optional unprepare routine (disable, power-down)
+ * @irq_clear:         routine to clear irqs
  * @smp_cycles:                programmable sampling time (ADC clock cycles)
  */
 struct stm32_adc_cfg {
@@ -166,6 +167,7 @@ struct stm32_adc_cfg {
        void (*start_conv)(struct iio_dev *, bool dma);
        void (*stop_conv)(struct iio_dev *);
        void (*unprepare)(struct iio_dev *);
+       void (*irq_clear)(struct iio_dev *indio_dev, u32 msk);
        const unsigned int *smp_cycles;
 };
 
@@ -621,6 +623,13 @@ static void stm32f4_adc_stop_conv(struct iio_dev *indio_dev)
                           STM32F4_ADON | STM32F4_DMA | STM32F4_DDS);
 }
 
+static void stm32f4_adc_irq_clear(struct iio_dev *indio_dev, u32 msk)
+{
+       struct stm32_adc *adc = iio_priv(indio_dev);
+
+       stm32_adc_clr_bits(adc, adc->cfg->regs->isr_eoc.reg, msk);
+}
+
 static void stm32h7_adc_start_conv(struct iio_dev *indio_dev, bool dma)
 {
        struct stm32_adc *adc = iio_priv(indio_dev);
@@ -659,6 +668,13 @@ static void stm32h7_adc_stop_conv(struct iio_dev *indio_dev)
        stm32_adc_clr_bits(adc, STM32H7_ADC_CFGR, STM32H7_DMNGT_MASK);
 }
 
+static void stm32h7_adc_irq_clear(struct iio_dev *indio_dev, u32 msk)
+{
+       struct stm32_adc *adc = iio_priv(indio_dev);
+       /* On STM32H7 IRQs are cleared by writing 1 into ISR register */
+       stm32_adc_set_bits(adc, adc->cfg->regs->isr_eoc.reg, msk);
+}
+
 static int stm32h7_adc_exit_pwr_down(struct iio_dev *indio_dev)
 {
        struct stm32_adc *adc = iio_priv(indio_dev);
@@ -1235,17 +1251,40 @@ static int stm32_adc_read_raw(struct iio_dev *indio_dev,
        }
 }
 
+static void stm32_adc_irq_clear(struct iio_dev *indio_dev, u32 msk)
+{
+       struct stm32_adc *adc = iio_priv(indio_dev);
+
+       adc->cfg->irq_clear(indio_dev, msk);
+}
+
 static irqreturn_t stm32_adc_threaded_isr(int irq, void *data)
 {
        struct iio_dev *indio_dev = data;
        struct stm32_adc *adc = iio_priv(indio_dev);
        const struct stm32_adc_regspec *regs = adc->cfg->regs;
        u32 status = stm32_adc_readl(adc, regs->isr_eoc.reg);
+       u32 mask = stm32_adc_readl(adc, regs->ier_eoc.reg);
 
-       if (status & regs->isr_ovr.mask)
+       /* Check ovr status right now, as ovr mask should be already disabled */
+       if (status & regs->isr_ovr.mask) {
+               /*
+                * Clear ovr bit to avoid subsequent calls to IRQ handler.
+                * This requires to stop ADC first. OVR bit state in ISR,
+                * is propaged to CSR register by hardware.
+                */
+               adc->cfg->stop_conv(indio_dev);
+               stm32_adc_irq_clear(indio_dev, regs->isr_ovr.mask);
                dev_err(&indio_dev->dev, "Overrun, stopping: restart needed\n");
+               return IRQ_HANDLED;
+       }
 
-       return IRQ_HANDLED;
+       if (!(status & mask))
+               dev_err_ratelimited(&indio_dev->dev,
+                                   "Unexpected IRQ: IER=0x%08x, ISR=0x%08x\n",
+                                   mask, status);
+
+       return IRQ_NONE;
 }
 
 static irqreturn_t stm32_adc_isr(int irq, void *data)
@@ -1254,6 +1293,10 @@ static irqreturn_t stm32_adc_isr(int irq, void *data)
        struct stm32_adc *adc = iio_priv(indio_dev);
        const struct stm32_adc_regspec *regs = adc->cfg->regs;
        u32 status = stm32_adc_readl(adc, regs->isr_eoc.reg);
+       u32 mask = stm32_adc_readl(adc, regs->ier_eoc.reg);
+
+       if (!(status & mask))
+               return IRQ_WAKE_THREAD;
 
        if (status & regs->isr_ovr.mask) {
                /*
@@ -2046,6 +2089,7 @@ static const struct stm32_adc_cfg stm32f4_adc_cfg = {
        .start_conv = stm32f4_adc_start_conv,
        .stop_conv = stm32f4_adc_stop_conv,
        .smp_cycles = stm32f4_adc_smp_cycles,
+       .irq_clear = stm32f4_adc_irq_clear,
 };
 
 static const struct stm32_adc_cfg stm32h7_adc_cfg = {
@@ -2057,6 +2101,7 @@ static const struct stm32_adc_cfg stm32h7_adc_cfg = {
        .prepare = stm32h7_adc_prepare,
        .unprepare = stm32h7_adc_unprepare,
        .smp_cycles = stm32h7_adc_smp_cycles,
+       .irq_clear = stm32h7_adc_irq_clear,
 };
 
 static const struct stm32_adc_cfg stm32mp1_adc_cfg = {
@@ -2069,6 +2114,7 @@ static const struct stm32_adc_cfg stm32mp1_adc_cfg = {
        .prepare = stm32h7_adc_prepare,
        .unprepare = stm32h7_adc_unprepare,
        .smp_cycles = stm32h7_adc_smp_cycles,
+       .irq_clear = stm32h7_adc_irq_clear,
 };
 
 static const struct of_device_id stm32_adc_of_match[] = {
index c62cacc..e3f5077 100644 (file)
@@ -256,7 +256,7 @@ int cros_ec_sensors_core_init(struct platform_device *pdev,
        struct cros_ec_sensorhub *sensor_hub = dev_get_drvdata(dev->parent);
        struct cros_ec_dev *ec = sensor_hub->ec;
        struct cros_ec_sensor_platform *sensor_platform = dev_get_platdata(dev);
-       u32 ver_mask;
+       u32 ver_mask, temp;
        int frequencies[ARRAY_SIZE(state->frequencies) / 2] = { 0 };
        int ret, i;
 
@@ -311,10 +311,16 @@ int cros_ec_sensors_core_init(struct platform_device *pdev,
                                                 &frequencies[2],
                                                 &state->fifo_max_event_count);
                } else {
-                       frequencies[1] = state->resp->info_3.min_frequency;
-                       frequencies[2] = state->resp->info_3.max_frequency;
-                       state->fifo_max_event_count =
-                           state->resp->info_3.fifo_max_event_count;
+                       if (state->resp->info_3.max_frequency == 0) {
+                               get_default_min_max_freq(state->resp->info.type,
+                                                        &frequencies[1],
+                                                        &frequencies[2],
+                                                        &temp);
+                       } else {
+                               frequencies[1] = state->resp->info_3.min_frequency;
+                               frequencies[2] = state->resp->info_3.max_frequency;
+                       }
+                       state->fifo_max_event_count = state->resp->info_3.fifo_max_event_count;
                }
                for (i = 0; i < ARRAY_SIZE(frequencies); i++) {
                        state->frequencies[2 * i] = frequencies[i] / 1000;
index 8c8d887..99562ba 100644 (file)
@@ -156,11 +156,13 @@ static const struct st_lsm6dsx_ext_dev_settings st_lsm6dsx_ext_dev_table[] = {
 static void st_lsm6dsx_shub_wait_complete(struct st_lsm6dsx_hw *hw)
 {
        struct st_lsm6dsx_sensor *sensor;
-       u32 odr;
+       u32 odr, timeout;
 
        sensor = iio_priv(hw->iio_devs[ST_LSM6DSX_ID_ACC]);
        odr = (hw->enable_mask & BIT(ST_LSM6DSX_ID_ACC)) ? sensor->odr : 12500;
-       msleep((2000000U / odr) + 1);
+       /* set 10ms as minimum timeout for i2c slave configuration */
+       timeout = max_t(u32, 2000000U / odr + 1, 10);
+       msleep(timeout);
 }
 
 /*
index cade6dc..33ad4dd 100644 (file)
@@ -544,6 +544,7 @@ config VCNL4000
 
 config VCNL4035
        tristate "VCNL4035 combined ALS and proximity sensor"
+       select IIO_BUFFER
        select IIO_TRIGGERED_BUFFER
        select REGMAP_I2C
        depends on I2C
index 32a5143..9325e18 100644 (file)
@@ -73,6 +73,9 @@ config INFINIBAND_ADDR_TRANS_CONFIGFS
          This allows the user to config the default GID type that the CM
          uses for each device, when initiaing new connections.
 
+config INFINIBAND_VIRT_DMA
+       def_bool !HIGHMEM
+
 if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/qib/Kconfig"
index 8017c40..7989b7e 100644 (file)
@@ -1269,9 +1269,6 @@ ssize_t rdma_query_gid_table(struct ib_device *device,
        unsigned long flags;
 
        rdma_for_each_port(device, port_num) {
-               if (!rdma_ib_or_roce(device, port_num))
-                       continue;
-
                table = rdma_gid_table(device, port_num);
                read_lock_irqsave(&table->rwlock, flags);
                for (i = 0; i < table->sz; i++) {
index 5740d1b..5afd142 100644 (file)
@@ -859,8 +859,8 @@ static struct cm_id_private *cm_alloc_id_priv(struct ib_device *device,
        atomic_set(&cm_id_priv->work_count, -1);
        refcount_set(&cm_id_priv->refcount, 1);
 
-       ret = xa_alloc_cyclic_irq(&cm.local_id_table, &id, NULL, xa_limit_32b,
-                                 &cm.local_id_next, GFP_KERNEL);
+       ret = xa_alloc_cyclic(&cm.local_id_table, &id, NULL, xa_limit_32b,
+                             &cm.local_id_next, GFP_KERNEL);
        if (ret < 0)
                goto error;
        cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
@@ -878,8 +878,8 @@ error:
  */
 static void cm_finalize_id(struct cm_id_private *cm_id_priv)
 {
-       xa_store_irq(&cm.local_id_table, cm_local_id(cm_id_priv->id.local_id),
-                    cm_id_priv, GFP_KERNEL);
+       xa_store(&cm.local_id_table, cm_local_id(cm_id_priv->id.local_id),
+                cm_id_priv, GFP_ATOMIC);
 }
 
 struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
@@ -1169,7 +1169,7 @@ retest:
        spin_unlock(&cm.lock);
        spin_unlock_irq(&cm_id_priv->lock);
 
-       xa_erase_irq(&cm.local_id_table, cm_local_id(cm_id->local_id));
+       xa_erase(&cm.local_id_table, cm_local_id(cm_id->local_id));
        cm_deref_id(cm_id_priv);
        wait_for_completion(&cm_id_priv->comp);
        while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
@@ -1522,6 +1522,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
                                                            id.local_id);
        if (IS_ERR(cm_id_priv->timewait_info)) {
                ret = PTR_ERR(cm_id_priv->timewait_info);
+               cm_id_priv->timewait_info = NULL;
                goto out;
        }
 
@@ -2114,6 +2115,7 @@ static int cm_req_handler(struct cm_work *work)
                                                            id.local_id);
        if (IS_ERR(cm_id_priv->timewait_info)) {
                ret = PTR_ERR(cm_id_priv->timewait_info);
+               cm_id_priv->timewait_info = NULL;
                goto destroy;
        }
        cm_id_priv->timewait_info->work.remote_id = cm_id_priv->id.remote_id;
@@ -4482,7 +4484,7 @@ static int __init ib_cm_init(void)
        cm.remote_id_table = RB_ROOT;
        cm.remote_qp_table = RB_ROOT;
        cm.remote_sidr_table = RB_ROOT;
-       xa_init_flags(&cm.local_id_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
+       xa_init_flags(&cm.local_id_table, XA_FLAGS_ALLOC);
        get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
        INIT_LIST_HEAD(&cm.timewait_list);
 
index 191e084..4e940fc 100644 (file)
@@ -940,8 +940,8 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
                        1);
                EFA_SET(&params.modify_mask,
                        EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1);
-               params.cur_qp_state = qp_attr->cur_qp_state;
-               params.qp_state = qp_attr->qp_state;
+               params.cur_qp_state = cur_state;
+               params.qp_state = new_state;
        }
 
        if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
index 7eaf995..c87b94e 100644 (file)
@@ -15245,7 +15245,8 @@ int hfi1_init_dd(struct hfi1_devdata *dd)
                    & CCE_REVISION_SW_MASK);
 
        /* alloc netdev data */
-       if (hfi1_netdev_alloc(dd))
+       ret = hfi1_netdev_alloc(dd);
+       if (ret)
                goto bail_cleanup;
 
        ret = set_up_context_variables(dd);
index 8ca51e4..329ee4f 100644 (file)
@@ -1,4 +1,5 @@
 /*
+ * Copyright(c) 2020 Cornelis Networks, Inc.
  * Copyright(c) 2015-2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
@@ -206,8 +207,6 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
        spin_lock_init(&fd->tid_lock);
        spin_lock_init(&fd->invalid_lock);
        fd->rec_cpu_num = -1; /* no cpu affinity by default */
-       fd->mm = current->mm;
-       mmgrab(fd->mm);
        fd->dd = dd;
        fp->private_data = fd;
        return 0;
@@ -711,7 +710,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 
        deallocate_ctxt(uctxt);
 done:
-       mmdrop(fdata->mm);
 
        if (atomic_dec_and_test(&dd->user_refcount))
                complete(&dd->user_comp);
index b4c6bff..e09e824 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _HFI1_KERNEL_H
 #define _HFI1_KERNEL_H
 /*
+ * Copyright(c) 2020 Cornelis Networks, Inc.
  * Copyright(c) 2015-2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
@@ -1451,7 +1452,6 @@ struct hfi1_filedata {
        u32 invalid_tid_idx;
        /* protect invalid_tids array and invalid_tid_idx */
        spinlock_t invalid_lock;
-       struct mm_struct *mm;
 };
 
 extern struct xarray hfi1_dev_table;
index 24ca17b..f3fb28e 100644 (file)
@@ -1,4 +1,5 @@
 /*
+ * Copyright(c) 2020 Cornelis Networks, Inc.
  * Copyright(c) 2016 - 2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
 #include <linux/rculist.h>
 #include <linux/mmu_notifier.h>
 #include <linux/interval_tree_generic.h>
+#include <linux/sched/mm.h>
 
 #include "mmu_rb.h"
 #include "trace.h"
 
-struct mmu_rb_handler {
-       struct mmu_notifier mn;
-       struct rb_root_cached root;
-       void *ops_arg;
-       spinlock_t lock;        /* protect the RB tree */
-       struct mmu_rb_ops *ops;
-       struct mm_struct *mm;
-       struct list_head lru_list;
-       struct work_struct del_work;
-       struct list_head del_list;
-       struct workqueue_struct *wq;
-};
-
 static unsigned long mmu_node_start(struct mmu_rb_node *);
 static unsigned long mmu_node_last(struct mmu_rb_node *);
 static int mmu_notifier_range_start(struct mmu_notifier *,
@@ -92,37 +81,36 @@ static unsigned long mmu_node_last(struct mmu_rb_node *node)
        return PAGE_ALIGN(node->addr + node->len) - 1;
 }
 
-int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
+int hfi1_mmu_rb_register(void *ops_arg,
                         struct mmu_rb_ops *ops,
                         struct workqueue_struct *wq,
                         struct mmu_rb_handler **handler)
 {
-       struct mmu_rb_handler *handlr;
+       struct mmu_rb_handler *h;
        int ret;
 
-       handlr = kmalloc(sizeof(*handlr), GFP_KERNEL);
-       if (!handlr)
+       h = kmalloc(sizeof(*h), GFP_KERNEL);
+       if (!h)
                return -ENOMEM;
 
-       handlr->root = RB_ROOT_CACHED;
-       handlr->ops = ops;
-       handlr->ops_arg = ops_arg;
-       INIT_HLIST_NODE(&handlr->mn.hlist);
-       spin_lock_init(&handlr->lock);
-       handlr->mn.ops = &mn_opts;
-       handlr->mm = mm;
-       INIT_WORK(&handlr->del_work, handle_remove);
-       INIT_LIST_HEAD(&handlr->del_list);
-       INIT_LIST_HEAD(&handlr->lru_list);
-       handlr->wq = wq;
-
-       ret = mmu_notifier_register(&handlr->mn, handlr->mm);
+       h->root = RB_ROOT_CACHED;
+       h->ops = ops;
+       h->ops_arg = ops_arg;
+       INIT_HLIST_NODE(&h->mn.hlist);
+       spin_lock_init(&h->lock);
+       h->mn.ops = &mn_opts;
+       INIT_WORK(&h->del_work, handle_remove);
+       INIT_LIST_HEAD(&h->del_list);
+       INIT_LIST_HEAD(&h->lru_list);
+       h->wq = wq;
+
+       ret = mmu_notifier_register(&h->mn, current->mm);
        if (ret) {
-               kfree(handlr);
+               kfree(h);
                return ret;
        }
 
-       *handler = handlr;
+       *handler = h;
        return 0;
 }
 
@@ -134,7 +122,7 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
        struct list_head del_list;
 
        /* Unregister first so we don't get any more notifications. */
-       mmu_notifier_unregister(&handler->mn, handler->mm);
+       mmu_notifier_unregister(&handler->mn, handler->mn.mm);
 
        /*
         * Make sure the wq delete handler is finished running.  It will not
@@ -166,6 +154,10 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
        int ret = 0;
 
        trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len);
+
+       if (current->mm != handler->mn.mm)
+               return -EPERM;
+
        spin_lock_irqsave(&handler->lock, flags);
        node = __mmu_rb_search(handler, mnode->addr, mnode->len);
        if (node) {
@@ -180,6 +172,7 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
                __mmu_int_rb_remove(mnode, &handler->root);
                list_del(&mnode->list); /* remove from LRU list */
        }
+       mnode->handler = handler;
 unlock:
        spin_unlock_irqrestore(&handler->lock, flags);
        return ret;
@@ -217,6 +210,9 @@ bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
        unsigned long flags;
        bool ret = false;
 
+       if (current->mm != handler->mn.mm)
+               return ret;
+
        spin_lock_irqsave(&handler->lock, flags);
        node = __mmu_rb_search(handler, addr, len);
        if (node) {
@@ -239,6 +235,9 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
        unsigned long flags;
        bool stop = false;
 
+       if (current->mm != handler->mn.mm)
+               return;
+
        INIT_LIST_HEAD(&del_list);
 
        spin_lock_irqsave(&handler->lock, flags);
@@ -272,6 +271,9 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
 {
        unsigned long flags;
 
+       if (current->mm != handler->mn.mm)
+               return;
+
        /* Validity of handler and node pointers has been checked by caller. */
        trace_hfi1_mmu_rb_remove(node->addr, node->len);
        spin_lock_irqsave(&handler->lock, flags);
index f04cec1..423aacc 100644 (file)
@@ -1,4 +1,5 @@
 /*
+ * Copyright(c) 2020 Cornelis Networks, Inc.
  * Copyright(c) 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
@@ -54,6 +55,7 @@ struct mmu_rb_node {
        unsigned long len;
        unsigned long __last;
        struct rb_node node;
+       struct mmu_rb_handler *handler;
        struct list_head list;
 };
 
@@ -71,7 +73,19 @@ struct mmu_rb_ops {
                     void *evict_arg, bool *stop);
 };
 
-int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
+struct mmu_rb_handler {
+       struct mmu_notifier mn;
+       struct rb_root_cached root;
+       void *ops_arg;
+       spinlock_t lock;        /* protect the RB tree */
+       struct mmu_rb_ops *ops;
+       struct list_head lru_list;
+       struct work_struct del_work;
+       struct list_head del_list;
+       struct workqueue_struct *wq;
+};
+
+int hfi1_mmu_rb_register(void *ops_arg,
                         struct mmu_rb_ops *ops,
                         struct workqueue_struct *wq,
                         struct mmu_rb_handler **handler);
index f81ca20..b94fc7f 100644 (file)
@@ -1,4 +1,5 @@
 /*
+ * Copyright(c) 2020 Cornelis Networks, Inc.
  * Copyright(c) 2015-2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
@@ -173,15 +174,18 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd,
 {
        struct page **pages;
        struct hfi1_devdata *dd = fd->uctxt->dd;
+       struct mm_struct *mm;
 
        if (mapped) {
                pci_unmap_single(dd->pcidev, node->dma_addr,
                                 node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
                pages = &node->pages[idx];
+               mm = mm_from_tid_node(node);
        } else {
                pages = &tidbuf->pages[idx];
+               mm = current->mm;
        }
-       hfi1_release_user_pages(fd->mm, pages, npages, mapped);
+       hfi1_release_user_pages(mm, pages, npages, mapped);
        fd->tid_n_pinned -= npages;
 }
 
@@ -216,12 +220,12 @@ static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf)
         * pages, accept the amount pinned so far and program only that.
         * User space knows how to deal with partially programmed buffers.
         */
-       if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) {
+       if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) {
                kfree(pages);
                return -ENOMEM;
        }
 
-       pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages);
+       pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages);
        if (pinned <= 0) {
                kfree(pages);
                return pinned;
@@ -756,7 +760,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
 
        if (fd->use_mn) {
                ret = mmu_interval_notifier_insert(
-                       &node->notifier, fd->mm,
+                       &node->notifier, current->mm,
                        tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE,
                        &tid_mn_ops);
                if (ret)
index 332abb4..d45c7b6 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _HFI1_USER_EXP_RCV_H
 #define _HFI1_USER_EXP_RCV_H
 /*
+ * Copyright(c) 2020 - Cornelis Networks, Inc.
  * Copyright(c) 2015 - 2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
@@ -95,4 +96,9 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
 int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
                              struct hfi1_tid_info *tinfo);
 
+static inline struct mm_struct *mm_from_tid_node(struct tid_rb_node *node)
+{
+       return node->notifier.mm;
+}
+
 #endif /* _HFI1_USER_EXP_RCV_H */
index a92346e..4a4956f 100644 (file)
@@ -1,4 +1,5 @@
 /*
+ * Copyright(c) 2020 - Cornelis Networks, Inc.
  * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
@@ -188,7 +189,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
        atomic_set(&pq->n_reqs, 0);
        init_waitqueue_head(&pq->wait);
        atomic_set(&pq->n_locked, 0);
-       pq->mm = fd->mm;
 
        iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
                    activate_packet_queue, NULL, NULL);
@@ -230,7 +230,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
 
        cq->nentries = hfi1_sdma_comp_ring_size;
 
-       ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq,
+       ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq,
                                   &pq->handler);
        if (ret) {
                dd_dev_err(dd, "Failed to register with MMU %d", ret);
@@ -980,13 +980,13 @@ static int pin_sdma_pages(struct user_sdma_request *req,
 
        npages -= node->npages;
 retry:
-       if (!hfi1_can_pin_pages(pq->dd, pq->mm,
+       if (!hfi1_can_pin_pages(pq->dd, current->mm,
                                atomic_read(&pq->n_locked), npages)) {
                cleared = sdma_cache_evict(pq, npages);
                if (cleared >= npages)
                        goto retry;
        }
-       pinned = hfi1_acquire_user_pages(pq->mm,
+       pinned = hfi1_acquire_user_pages(current->mm,
                                         ((unsigned long)iovec->iov.iov_base +
                                         (node->npages * PAGE_SIZE)), npages, 0,
                                         pages + node->npages);
@@ -995,7 +995,7 @@ retry:
                return pinned;
        }
        if (pinned != npages) {
-               unpin_vector_pages(pq->mm, pages, node->npages, pinned);
+               unpin_vector_pages(current->mm, pages, node->npages, pinned);
                return -EFAULT;
        }
        kfree(node->pages);
@@ -1008,7 +1008,8 @@ retry:
 static void unpin_sdma_pages(struct sdma_mmu_node *node)
 {
        if (node->npages) {
-               unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages);
+               unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0,
+                                  node->npages);
                atomic_sub(node->npages, &node->pq->n_locked);
        }
 }
index 9972e0e..1e8c02f 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _HFI1_USER_SDMA_H
 #define _HFI1_USER_SDMA_H
 /*
+ * Copyright(c) 2020 - Cornelis Networks, Inc.
  * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
@@ -133,7 +134,6 @@ struct hfi1_user_sdma_pkt_q {
        unsigned long unpinned;
        struct mmu_rb_handler *handler;
        atomic_t n_locked;
-       struct mm_struct *mm;
 };
 
 struct hfi1_user_sdma_comp_q {
@@ -250,4 +250,9 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
                                   struct iovec *iovec, unsigned long dim,
                                   unsigned long *count);
 
+static inline struct mm_struct *mm_from_sdma_node(struct sdma_mmu_node *node)
+{
+       return node->rb.handler->mn.mm;
+}
+
 #endif /* _HFI1_USER_SDMA_H */
index 6d30850..0468028 100644 (file)
@@ -2936,6 +2936,7 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
 
        roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
        roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
+       roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, 1);
 
        roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
        roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1);
@@ -4989,11 +4990,11 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
                                              V2_QPC_BYTE_28_AT_M,
                                              V2_QPC_BYTE_28_AT_S);
        qp_attr->retry_cnt = roce_get_field(context.byte_212_lsn,
-                                           V2_QPC_BYTE_212_RETRY_CNT_M,
-                                           V2_QPC_BYTE_212_RETRY_CNT_S);
+                                           V2_QPC_BYTE_212_RETRY_NUM_INIT_M,
+                                           V2_QPC_BYTE_212_RETRY_NUM_INIT_S);
        qp_attr->rnr_retry = roce_get_field(context.byte_244_rnr_rxack,
-                                           V2_QPC_BYTE_244_RNR_CNT_M,
-                                           V2_QPC_BYTE_244_RNR_CNT_S);
+                                           V2_QPC_BYTE_244_RNR_NUM_INIT_M,
+                                           V2_QPC_BYTE_244_RNR_NUM_INIT_S);
 
 done:
        qp_attr->cur_qp_state = qp_attr->qp_state;
index 29c9dd4..be7f2fe 100644 (file)
@@ -1661,7 +1661,7 @@ struct hns_roce_query_pf_caps_d {
        __le32 rsv_uars_rsv_qps;
 };
 #define V2_QUERY_PF_CAPS_D_NUM_SRQS_S 0
-#define V2_QUERY_PF_CAPS_D_NUM_SRQS_M GENMASK(20, 0)
+#define V2_QUERY_PF_CAPS_D_NUM_SRQS_M GENMASK(19, 0)
 
 #define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S 20
 #define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M GENMASK(21, 20)
index 2408b27..584932d 100644 (file)
 #define DRV_VERSION    __stringify(DRV_VERSION_MAJOR) "."              \
        __stringify(DRV_VERSION_MINOR) "." __stringify(DRV_VERSION_BUILD)
 
-static int push_mode;
-module_param(push_mode, int, 0644);
-MODULE_PARM_DESC(push_mode, "Low latency mode: 0=disabled (default), 1=enabled)");
-
 static int debug;
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "debug flags: 0=disabled (default), 0x7fffffff=all");
@@ -1580,7 +1576,6 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
        if (status)
                goto exit;
        iwdev->obj_next = iwdev->obj_mem;
-       iwdev->push_mode = push_mode;
 
        init_waitqueue_head(&iwdev->vchnl_waitq);
        init_waitqueue_head(&dev->vf_reqs);
index 581ecba..533f3ca 100644 (file)
@@ -167,39 +167,16 @@ static void i40iw_dealloc_ucontext(struct ib_ucontext *context)
  */
 static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 {
-       struct i40iw_ucontext *ucontext;
-       u64 db_addr_offset, push_offset, pfn;
-
-       ucontext = to_ucontext(context);
-       if (ucontext->iwdev->sc_dev.is_pf) {
-               db_addr_offset = I40IW_DB_ADDR_OFFSET;
-               push_offset = I40IW_PUSH_OFFSET;
-               if (vma->vm_pgoff)
-                       vma->vm_pgoff += I40IW_PF_FIRST_PUSH_PAGE_INDEX - 1;
-       } else {
-               db_addr_offset = I40IW_VF_DB_ADDR_OFFSET;
-               push_offset = I40IW_VF_PUSH_OFFSET;
-               if (vma->vm_pgoff)
-                       vma->vm_pgoff += I40IW_VF_FIRST_PUSH_PAGE_INDEX - 1;
-       }
+       struct i40iw_ucontext *ucontext = to_ucontext(context);
+       u64 dbaddr;
 
-       vma->vm_pgoff += db_addr_offset >> PAGE_SHIFT;
-
-       if (vma->vm_pgoff == (db_addr_offset >> PAGE_SHIFT)) {
-               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-       } else {
-               if ((vma->vm_pgoff - (push_offset >> PAGE_SHIFT)) % 2)
-                       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-               else
-                       vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-       }
+       if (vma->vm_pgoff || vma->vm_end - vma->vm_start != PAGE_SIZE)
+               return -EINVAL;
 
-       pfn = vma->vm_pgoff +
-             (pci_resource_start(ucontext->iwdev->ldev->pcidev, 0) >>
-              PAGE_SHIFT);
+       dbaddr = I40IW_DB_ADDR_OFFSET + pci_resource_start(ucontext->iwdev->ldev->pcidev, 0);
 
-       return rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE,
-                                vma->vm_page_prot, NULL);
+       return rdma_user_mmap_io(context, vma, dbaddr >> PAGE_SHIFT, PAGE_SIZE,
+                                pgprot_noncached(vma->vm_page_prot), NULL);
 }
 
 /**
index c3cfea2..119b257 100644 (file)
@@ -803,8 +803,10 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
        }
 
        mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
-       if (IS_ERR(mailbox))
+       if (IS_ERR(mailbox)) {
+               err = PTR_ERR(mailbox);
                goto err_out_arm;
+       }
 
        cq_context = mailbox->buf;
 
@@ -846,9 +848,9 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
        }
 
        spin_lock_irq(&dev->cq_table.lock);
-       if (mthca_array_set(&dev->cq_table.cq,
-                           cq->cqn & (dev->limits.num_cqs - 1),
-                           cq)) {
+       err = mthca_array_set(&dev->cq_table.cq,
+                             cq->cqn & (dev->limits.num_cqs - 1), cq);
+       if (err) {
                spin_unlock_irq(&dev->cq_table.lock);
                goto err_out_free_mr;
        }
index 019642f..511c95b 100644 (file)
@@ -1936,6 +1936,15 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
        }
 
        if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+               qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset;
+
+               /* calculate the db_rec_db2 data since it is constant so no
+                * need to reflect from user
+                */
+               qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid);
+               qp->urq.db_rec_db2_data.data.value =
+                       cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD);
+
                rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
                                          &qp->urq.db_rec_db2_data,
                                          DB_REC_WIDTH_32B,
index fa2a3fa..6895bac 100644 (file)
@@ -266,7 +266,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
        }
        ret = ib_device_set_netdev(&dev->ib_dev, dev->netdev, 1);
        if (ret)
-               return ret;
+               goto err_srq_free;
        spin_lock_init(&dev->srq_tbl_lock);
        rdma_set_device_sysfs_group(&dev->ib_dev, &pvrdma_attr_group);
 
index 9ef5f5c..c8e2680 100644 (file)
@@ -1,7 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config INFINIBAND_RDMAVT
        tristate "RDMA verbs transport library"
-       depends on X86_64 && ARCH_DMA_ADDR_T_64BIT
+       depends on INFINIBAND_VIRT_DMA
+       depends on X86_64
        depends on PCI
        select DMA_VIRT_OPS
        help
index a0c6c7d..8810bfa 100644 (file)
@@ -2,7 +2,7 @@
 config RDMA_RXE
        tristate "Software RDMA over Ethernet (RoCE) driver"
        depends on INET && PCI && INFINIBAND
-       depends on !64BIT || ARCH_DMA_ADDR_T_64BIT
+       depends on INFINIBAND_VIRT_DMA
        select NET_UDP_TUNNEL
        select CRYPTO_CRC32
        select DMA_VIRT_OPS
index b622fc6..3450ba5 100644 (file)
@@ -1,6 +1,7 @@
 config RDMA_SIW
        tristate "Software RDMA over TCP/IP (iWARP) driver"
        depends on INET && INFINIBAND && LIBCRC32C
+       depends on INFINIBAND_VIRT_DMA
        select DMA_VIRT_OPS
        help
        This driver implements the iWARP RDMA transport over
index c77cdb3..8c73377 100644 (file)
@@ -241,6 +241,7 @@ static const struct xpad_device {
        { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 },
        { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 },
        { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 },
+       { 0x1209, 0x2882, "Ardwiino Controller", 0, XTYPE_XBOX360 },
        { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x12ab, 0x0301, "PDP AFTERGLOW AX.1", 0, XTYPE_XBOX360 },
        { 0x12ab, 0x0303, "Mortal Kombat Klassic FightStick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
@@ -418,6 +419,7 @@ static const struct usb_device_id xpad_table[] = {
        XPAD_XBOXONE_VENDOR(0x0f0d),            /* Hori Controllers */
        XPAD_XBOX360_VENDOR(0x1038),            /* SteelSeries Controllers */
        XPAD_XBOX360_VENDOR(0x11c9),            /* Nacon GC100XF */
+       XPAD_XBOX360_VENDOR(0x1209),            /* Ardwiino Controllers */
        XPAD_XBOX360_VENDOR(0x12ab),            /* X-Box 360 dance pads */
        XPAD_XBOX360_VENDOR(0x1430),            /* RedOctane X-Box 360 controllers */
        XPAD_XBOX360_VENDOR(0x146b),            /* BigBen Interactive Controllers */
index 15d17c7..1f0d61b 100644 (file)
@@ -183,6 +183,7 @@ static void cros_ec_keyb_process(struct cros_ec_keyb *ckdev,
                                        "changed: [r%d c%d]: byte %02x\n",
                                        row, col, new_state);
 
+                               input_event(idev, EV_MSC, MSC_SCAN, pos);
                                input_report_key(idev, keycodes[pos],
                                                 new_state);
                        }
index 27126e6..d450f11 100644 (file)
@@ -99,7 +99,8 @@ static irqreturn_t sunkbd_interrupt(struct serio *serio,
        switch (data) {
 
        case SUNKBD_RET_RESET:
-               schedule_work(&sunkbd->tq);
+               if (sunkbd->enabled)
+                       schedule_work(&sunkbd->tq);
                sunkbd->reset = -1;
                break;
 
@@ -200,16 +201,12 @@ static int sunkbd_initialize(struct sunkbd *sunkbd)
 }
 
 /*
- * sunkbd_reinit() sets leds and beeps to a state the computer remembers they
- * were in.
+ * sunkbd_set_leds_beeps() sets leds and beeps to a state the computer remembers
+ * they were in.
  */
 
-static void sunkbd_reinit(struct work_struct *work)
+static void sunkbd_set_leds_beeps(struct sunkbd *sunkbd)
 {
-       struct sunkbd *sunkbd = container_of(work, struct sunkbd, tq);
-
-       wait_event_interruptible_timeout(sunkbd->wait, sunkbd->reset >= 0, HZ);
-
        serio_write(sunkbd->serio, SUNKBD_CMD_SETLED);
        serio_write(sunkbd->serio,
                (!!test_bit(LED_CAPSL,   sunkbd->dev->led) << 3) |
@@ -222,11 +219,39 @@ static void sunkbd_reinit(struct work_struct *work)
                SUNKBD_CMD_BELLOFF - !!test_bit(SND_BELL, sunkbd->dev->snd));
 }
 
+
+/*
+ * sunkbd_reinit() wait for the keyboard reset to complete and restores state
+ * of leds and beeps.
+ */
+
+static void sunkbd_reinit(struct work_struct *work)
+{
+       struct sunkbd *sunkbd = container_of(work, struct sunkbd, tq);
+
+       /*
+        * It is OK that we check sunkbd->enabled without pausing serio,
+        * as we only want to catch true->false transition that will
+        * happen once and we will be woken up for it.
+        */
+       wait_event_interruptible_timeout(sunkbd->wait,
+                                        sunkbd->reset >= 0 || !sunkbd->enabled,
+                                        HZ);
+
+       if (sunkbd->reset >= 0 && sunkbd->enabled)
+               sunkbd_set_leds_beeps(sunkbd);
+}
+
 static void sunkbd_enable(struct sunkbd *sunkbd, bool enable)
 {
        serio_pause_rx(sunkbd->serio);
        sunkbd->enabled = enable;
        serio_continue_rx(sunkbd->serio);
+
+       if (!enable) {
+               wake_up_interruptible(&sunkbd->wait);
+               cancel_work_sync(&sunkbd->tq);
+       }
 }
 
 /*
index 5fe92d4..4cc4e8f 100644 (file)
@@ -696,7 +696,7 @@ struct adxl34x *adxl34x_probe(struct device *dev, int irq,
        struct input_dev *input_dev;
        const struct adxl34x_platform_data *pdata;
        int err, range, i;
-       unsigned char revid;
+       int revid;
 
        if (!irq) {
                dev_err(dev, "no IRQ?\n");
index e413801..f515fae 100644 (file)
@@ -568,12 +568,15 @@ static int cm109_input_open(struct input_dev *idev)
        dev->ctl_data->byte[HID_OR2] = dev->keybit;
        dev->ctl_data->byte[HID_OR3] = 0x00;
 
+       dev->ctl_urb_pending = 1;
        error = usb_submit_urb(dev->urb_ctl, GFP_KERNEL);
-       if (error)
+       if (error) {
+               dev->ctl_urb_pending = 0;
                dev_err(&dev->intf->dev, "%s: usb_submit_urb (urb_ctl) failed %d\n",
                        __func__, error);
-       else
+       } else {
                dev->open = 1;
+       }
 
        mutex_unlock(&dev->pm_mutex);
 
index cae1a3f..cb6ec59 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/input.h>
 #include <linux/init.h>
+#include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/acpi.h>
 #include <linux/dmi.h>
@@ -82,6 +83,17 @@ static const struct dmi_system_id dmi_use_low_level_irq[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "One S1003"),
                },
        },
+       {
+               /*
+                * Lenovo Yoga Tab2 1051L, something messes with the home-button
+                * IRQ settings, leading to a non working home-button.
+                */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "60073"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "1051L"),
+               },
+       },
        {} /* Terminating entry */
 };
 
index c75b00c..36e3cd9 100644 (file)
@@ -78,7 +78,7 @@ struct elan_transport_ops {
        int (*iap_reset)(struct i2c_client *client);
 
        int (*prepare_fw_update)(struct i2c_client *client, u16 ic_type,
-                                u8 iap_version);
+                                u8 iap_version, u16 fw_page_size);
        int (*write_fw_block)(struct i2c_client *client, u16 fw_page_size,
                              const u8 *page, u16 checksum, int idx);
        int (*finish_fw_update)(struct i2c_client *client,
index c599e21..61ed3f5 100644 (file)
@@ -497,7 +497,8 @@ static int __elan_update_firmware(struct elan_tp_data *data,
        u16 sw_checksum = 0, fw_checksum = 0;
 
        error = data->ops->prepare_fw_update(client, data->ic_type,
-                                            data->iap_version);
+                                            data->iap_version,
+                                            data->fw_page_size);
        if (error)
                return error;
 
index 5a496d4..13dc097 100644 (file)
@@ -517,7 +517,7 @@ static int elan_i2c_set_flash_key(struct i2c_client *client)
        return 0;
 }
 
-static int elan_read_write_iap_type(struct i2c_client *client)
+static int elan_read_write_iap_type(struct i2c_client *client, u16 fw_page_size)
 {
        int error;
        u16 constant;
@@ -526,7 +526,7 @@ static int elan_read_write_iap_type(struct i2c_client *client)
 
        do {
                error = elan_i2c_write_cmd(client, ETP_I2C_IAP_TYPE_CMD,
-                                          ETP_I2C_IAP_TYPE_REG);
+                                          fw_page_size / 2);
                if (error) {
                        dev_err(&client->dev,
                                "cannot write iap type: %d\n", error);
@@ -543,7 +543,7 @@ static int elan_read_write_iap_type(struct i2c_client *client)
                constant = le16_to_cpup((__le16 *)val);
                dev_dbg(&client->dev, "iap type reg: 0x%04x\n", constant);
 
-               if (constant == ETP_I2C_IAP_TYPE_REG)
+               if (constant == fw_page_size / 2)
                        return 0;
 
        } while (--retry > 0);
@@ -553,7 +553,7 @@ static int elan_read_write_iap_type(struct i2c_client *client)
 }
 
 static int elan_i2c_prepare_fw_update(struct i2c_client *client, u16 ic_type,
-                                     u8 iap_version)
+                                     u8 iap_version, u16 fw_page_size)
 {
        struct device *dev = &client->dev;
        int error;
@@ -594,7 +594,7 @@ static int elan_i2c_prepare_fw_update(struct i2c_client *client, u16 ic_type,
        }
 
        if (ic_type >= 0x0D && iap_version >= 1) {
-               error = elan_read_write_iap_type(client);
+               error = elan_read_write_iap_type(client, fw_page_size);
                if (error)
                        return error;
        }
index 8ff8237..1820f1c 100644 (file)
@@ -340,7 +340,7 @@ static int elan_smbus_set_flash_key(struct i2c_client *client)
 }
 
 static int elan_smbus_prepare_fw_update(struct i2c_client *client, u16 ic_type,
-                                       u8 iap_version)
+                                       u8 iap_version, u16 fw_page_size)
 {
        struct device *dev = &client->dev;
        int len;
index a4c9b96..3a2dcf0 100644 (file)
@@ -219,6 +219,10 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = {
                        DMI_MATCH(DMI_SYS_VENDOR, "PEGATRON CORPORATION"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "C15B"),
                },
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ByteSpeed LLC"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "ByteSpeed Laptop C15B"),
+               },
        },
        { }
 };
@@ -608,6 +612,48 @@ static const struct dmi_system_id __initconst i8042_dmi_reset_table[] = {
                },
        },
        {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Aspire A114-31"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Aspire A314-31"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Aspire A315-31"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Aspire ES1-132"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Aspire ES1-332"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Aspire ES1-432"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate Spin B118-RN"),
+               },
+       },
+       {
                /* Advent 4211 */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "DIXONSXP"),
index d3eda48..abae23a 100644 (file)
@@ -122,6 +122,7 @@ module_param_named(unmask_kbd_data, i8042_unmask_kbd_data, bool, 0600);
 MODULE_PARM_DESC(unmask_kbd_data, "Unconditional enable (may reveal sensitive data) of normally sanitize-filtered kbd data traffic debug log [pre-condition: i8042.debug=1 enabled]");
 #endif
 
+static bool i8042_present;
 static bool i8042_bypass_aux_irq_test;
 static char i8042_kbd_firmware_id[128];
 static char i8042_aux_firmware_id[128];
@@ -343,6 +344,9 @@ int i8042_command(unsigned char *param, int command)
        unsigned long flags;
        int retval;
 
+       if (!i8042_present)
+               return -1;
+
        spin_lock_irqsave(&i8042_lock, flags);
        retval = __i8042_command(param, command);
        spin_unlock_irqrestore(&i8042_lock, flags);
@@ -1467,7 +1471,8 @@ static int __init i8042_setup_aux(void)
        if (error)
                goto err_free_ports;
 
-       if (aux_enable())
+       error = aux_enable();
+       if (error)
                goto err_free_irq;
 
        i8042_aux_irq_registered = true;
@@ -1612,12 +1617,15 @@ static int __init i8042_init(void)
 
        err = i8042_platform_init();
        if (err)
-               return err;
+               return (err == -ENODEV) ? 0 : err;
 
        err = i8042_controller_check();
        if (err)
                goto err_platform_exit;
 
+       /* Set this before creating the dev to allow i8042_command to work right away */
+       i8042_present = true;
+
        pdev = platform_create_bundle(&i8042_driver, i8042_probe, NULL, 0, NULL, 0);
        if (IS_ERR(pdev)) {
                err = PTR_ERR(pdev);
@@ -1636,6 +1644,9 @@ static int __init i8042_init(void)
 
 static void __exit i8042_exit(void)
 {
+       if (!i8042_present)
+               return;
+
        platform_device_unregister(i8042_platform_device);
        platform_driver_unregister(&i8042_driver);
        i8042_platform_exit();
index f012fe7..cc18f54 100644 (file)
@@ -96,6 +96,7 @@ config TOUCHSCREEN_AD7879_SPI
 config TOUCHSCREEN_ADC
        tristate "Generic ADC based resistive touchscreen"
        depends on IIO
+       select IIO_BUFFER
        select IIO_BUFFER_CB
        help
          Say Y here if you want to use the generic ADC
index 98f17fa..b6f7536 100644 (file)
@@ -2183,11 +2183,11 @@ static int mxt_initialize(struct mxt_data *data)
                msleep(MXT_FW_RESET_TIME);
        }
 
-       error = mxt_acquire_irq(data);
+       error = mxt_check_retrigen(data);
        if (error)
                return error;
 
-       error = mxt_check_retrigen(data);
+       error = mxt_acquire_irq(data);
        if (error)
                return error;
 
index 02c75ea..6612f9e 100644 (file)
@@ -193,6 +193,18 @@ static const struct dmi_system_id rotated_screen[] = {
                },
        },
        {
+               .ident = "Teclast X98 Pro",
+               .matches = {
+                       /*
+                        * Only match BIOS date, because the manufacturers
+                        * BIOS does not report the board name at all
+                        * (sometimes)...
+                        */
+                       DMI_MATCH(DMI_BOARD_VENDOR, "TECLAST"),
+                       DMI_MATCH(DMI_BIOS_DATE, "10/28/2015"),
+               },
+       },
+       {
                .ident = "WinBook TW100",
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "WinBook"),
index e694a9b..603a948 100644 (file)
@@ -137,45 +137,25 @@ struct raydium_data {
        bool wake_irq_enabled;
 };
 
-static int raydium_i2c_xfer(struct i2c_client *client,
-                           u32 addr, void *data, size_t len, bool is_read)
-{
-       struct raydium_bank_switch_header {
-               u8 cmd;
-               __be32 be_addr;
-       } __packed header = {
-               .cmd = RM_CMD_BANK_SWITCH,
-               .be_addr = cpu_to_be32(addr),
-       };
-
-       u8 reg_addr = addr & 0xff;
-
-       struct i2c_msg xfer[] = {
-               {
-                       .addr = client->addr,
-                       .len = sizeof(header),
-                       .buf = (u8 *)&header,
-               },
-               {
-                       .addr = client->addr,
-                       .len = 1,
-                       .buf = &reg_addr,
-               },
-               {
-                       .addr = client->addr,
-                       .len = len,
-                       .buf = data,
-                       .flags = is_read ? I2C_M_RD : 0,
-               }
-       };
+/*
+ * Header to be sent for RM_CMD_BANK_SWITCH command. This is used by
+ * raydium_i2c_{read|send} below.
+ */
+struct __packed raydium_bank_switch_header {
+       u8 cmd;
+       __be32 be_addr;
+};
 
+static int raydium_i2c_xfer(struct i2c_client *client, u32 addr,
+                           struct i2c_msg *xfer, size_t xfer_count)
+{
+       int ret;
        /*
         * If address is greater than 255, then RM_CMD_BANK_SWITCH needs to be
         * sent first. Else, skip the header i.e. xfer[0].
         */
        int xfer_start_idx = (addr > 0xff) ? 0 : 1;
-       size_t xfer_count = ARRAY_SIZE(xfer) - xfer_start_idx;
-       int ret;
+       xfer_count -= xfer_start_idx;
 
        ret = i2c_transfer(client->adapter, &xfer[xfer_start_idx], xfer_count);
        if (likely(ret == xfer_count))
@@ -189,10 +169,46 @@ static int raydium_i2c_send(struct i2c_client *client,
 {
        int tries = 0;
        int error;
+       u8 *tx_buf;
+       u8 reg_addr = addr & 0xff;
+
+       tx_buf = kmalloc(len + 1, GFP_KERNEL);
+       if (!tx_buf)
+               return -ENOMEM;
+
+       tx_buf[0] = reg_addr;
+       memcpy(tx_buf + 1, data, len);
 
        do {
-               error = raydium_i2c_xfer(client, addr, (void *)data, len,
-                                        false);
+               struct raydium_bank_switch_header header = {
+                       .cmd = RM_CMD_BANK_SWITCH,
+                       .be_addr = cpu_to_be32(addr),
+               };
+
+               /*
+                * Perform as a single i2c_transfer transaction to ensure that
+                * no other I2C transactions are initiated on the bus to any
+                * other device in between. Initiating transacations to other
+                * devices after RM_CMD_BANK_SWITCH is sent is known to cause
+                * issues. This is also why regmap infrastructure cannot be used
+                * for this driver. Regmap handles page(bank) switch and reads
+                * as separate i2c_transfer() operations. This can result in
+                * problems if the Raydium device is on a shared I2C bus.
+                */
+               struct i2c_msg xfer[] = {
+                       {
+                               .addr = client->addr,
+                               .len = sizeof(header),
+                               .buf = (u8 *)&header,
+                       },
+                       {
+                               .addr = client->addr,
+                               .len = len + 1,
+                               .buf = tx_buf,
+                       },
+               };
+
+               error = raydium_i2c_xfer(client, addr, xfer, ARRAY_SIZE(xfer));
                if (likely(!error))
                        return 0;
 
@@ -206,12 +222,46 @@ static int raydium_i2c_send(struct i2c_client *client,
 static int raydium_i2c_read(struct i2c_client *client,
                            u32 addr, void *data, size_t len)
 {
-       size_t xfer_len;
        int error;
 
        while (len) {
-               xfer_len = min_t(size_t, len, RM_MAX_READ_SIZE);
-               error = raydium_i2c_xfer(client, addr, data, xfer_len, true);
+               u8 reg_addr = addr & 0xff;
+               struct raydium_bank_switch_header header = {
+                       .cmd = RM_CMD_BANK_SWITCH,
+                       .be_addr = cpu_to_be32(addr),
+               };
+               size_t xfer_len = min_t(size_t, len, RM_MAX_READ_SIZE);
+
+               /*
+                * Perform as a single i2c_transfer transaction to ensure that
+                * no other I2C transactions are initiated on the bus to any
+                * other device in between. Initiating transacations to other
+                * devices after RM_CMD_BANK_SWITCH is sent is known to cause
+                * issues. This is also why regmap infrastructure cannot be used
+                * for this driver. Regmap handles page(bank) switch and writes
+                * as separate i2c_transfer() operations. This can result in
+                * problems if the Raydium device is on a shared I2C bus.
+                */
+               struct i2c_msg xfer[] = {
+                       {
+                               .addr = client->addr,
+                               .len = sizeof(header),
+                               .buf = (u8 *)&header,
+                       },
+                       {
+                               .addr = client->addr,
+                               .len = 1,
+                               .buf = &reg_addr,
+                       },
+                       {
+                               .addr = client->addr,
+                               .len = xfer_len,
+                               .buf = data,
+                               .flags = I2C_M_RD,
+                       }
+               };
+
+               error = raydium_i2c_xfer(client, addr, xfer, ARRAY_SIZE(xfer));
                if (unlikely(error))
                        return error;
 
index 974a667..5ad519c 100644 (file)
@@ -1083,7 +1083,6 @@ static int of_count_icc_providers(struct device_node *np)
                        count++;
                count += of_count_icc_providers(child);
        }
-       of_node_put(np);
 
        return count;
 }
index 42c6c55..e8371d4 100644 (file)
@@ -182,7 +182,7 @@ DEFINE_QNODE(mas_pcnoc_sdcc_1, MSM8916_MASTER_SDCC_1, 8, -1, -1, MSM8916_PNOC_IN
 DEFINE_QNODE(mas_pcnoc_sdcc_2, MSM8916_MASTER_SDCC_2, 8, -1, -1, MSM8916_PNOC_INT_1);
 DEFINE_QNODE(mas_qdss_bam, MSM8916_MASTER_QDSS_BAM, 8, -1, -1, MSM8916_SNOC_QDSS_INT);
 DEFINE_QNODE(mas_qdss_etr, MSM8916_MASTER_QDSS_ETR, 8, -1, -1, MSM8916_SNOC_QDSS_INT);
-DEFINE_QNODE(mas_snoc_cfg, MSM8916_MASTER_SNOC_CFG, 4, 20, -1, MSM8916_SNOC_QDSS_INT);
+DEFINE_QNODE(mas_snoc_cfg, MSM8916_MASTER_SNOC_CFG, 4, -1, -1, MSM8916_SNOC_QDSS_INT);
 DEFINE_QNODE(mas_spdm, MSM8916_MASTER_SPDM, 4, -1, -1, MSM8916_PNOC_MAS_0);
 DEFINE_QNODE(mas_tcu0, MSM8916_MASTER_TCU0, 8, -1, -1, MSM8916_SLAVE_EBI_CH0, MSM8916_BIMC_SNOC_MAS, MSM8916_SLAVE_AMPSS_L2);
 DEFINE_QNODE(mas_tcu1, MSM8916_MASTER_TCU1, 8, -1, -1, MSM8916_SLAVE_EBI_CH0, MSM8916_BIMC_SNOC_MAS, MSM8916_SLAVE_AMPSS_L2);
@@ -208,14 +208,14 @@ DEFINE_QNODE(pcnoc_snoc_mas, MSM8916_PNOC_SNOC_MAS, 8, 29, -1, MSM8916_PNOC_SNOC
 DEFINE_QNODE(pcnoc_snoc_slv, MSM8916_PNOC_SNOC_SLV, 8, -1, 45, MSM8916_SNOC_INT_0, MSM8916_SNOC_INT_BIMC, MSM8916_SNOC_INT_1);
 DEFINE_QNODE(qdss_int, MSM8916_SNOC_QDSS_INT, 8, -1, -1, MSM8916_SNOC_INT_0, MSM8916_SNOC_INT_BIMC);
 DEFINE_QNODE(slv_apps_l2, MSM8916_SLAVE_AMPSS_L2, 8, -1, -1, 0);
-DEFINE_QNODE(slv_apss, MSM8916_SLAVE_APSS, 4, -1, 20, 0);
+DEFINE_QNODE(slv_apss, MSM8916_SLAVE_APSS, 4, -1, -1, 0);
 DEFINE_QNODE(slv_audio, MSM8916_SLAVE_LPASS, 4, -1, -1, 0);
 DEFINE_QNODE(slv_bimc_cfg, MSM8916_SLAVE_BIMC_CFG, 4, -1, -1, 0);
 DEFINE_QNODE(slv_blsp_1, MSM8916_SLAVE_BLSP_1, 4, -1, -1, 0);
 DEFINE_QNODE(slv_boot_rom, MSM8916_SLAVE_BOOT_ROM, 4, -1, -1, 0);
 DEFINE_QNODE(slv_camera_cfg, MSM8916_SLAVE_CAMERA_CFG, 4, -1, -1, 0);
-DEFINE_QNODE(slv_cats_0, MSM8916_SLAVE_CATS_128, 16, -1, 106, 0);
-DEFINE_QNODE(slv_cats_1, MSM8916_SLAVE_OCMEM_64, 8, -1, 107, 0);
+DEFINE_QNODE(slv_cats_0, MSM8916_SLAVE_CATS_128, 16, -1, -1, 0);
+DEFINE_QNODE(slv_cats_1, MSM8916_SLAVE_OCMEM_64, 8, -1, -1, 0);
 DEFINE_QNODE(slv_clk_ctl, MSM8916_SLAVE_CLK_CTL, 4, -1, -1, 0);
 DEFINE_QNODE(slv_crypto_0_cfg, MSM8916_SLAVE_CRYPTO_0_CFG, 4, -1, -1, 0);
 DEFINE_QNODE(slv_dehr_cfg, MSM8916_SLAVE_DEHR_CFG, 4, -1, -1, 0);
@@ -239,7 +239,7 @@ DEFINE_QNODE(slv_sdcc_2, MSM8916_SLAVE_SDCC_2, 4, -1, -1, 0);
 DEFINE_QNODE(slv_security, MSM8916_SLAVE_SECURITY, 4, -1, -1, 0);
 DEFINE_QNODE(slv_snoc_cfg, MSM8916_SLAVE_SNOC_CFG, 4, -1, -1, 0);
 DEFINE_QNODE(slv_spdm, MSM8916_SLAVE_SPDM, 4, -1, -1, 0);
-DEFINE_QNODE(slv_srvc_snoc, MSM8916_SLAVE_SRVC_SNOC, 8, -1, 29, 0);
+DEFINE_QNODE(slv_srvc_snoc, MSM8916_SLAVE_SRVC_SNOC, 8, -1, -1, 0);
 DEFINE_QNODE(slv_tcsr, MSM8916_SLAVE_TCSR, 4, -1, -1, 0);
 DEFINE_QNODE(slv_tlmm, MSM8916_SLAVE_TLMM, 4, -1, -1, 0);
 DEFINE_QNODE(slv_usb_hs, MSM8916_SLAVE_USB_HS, 4, -1, -1, 0);
@@ -249,7 +249,7 @@ DEFINE_QNODE(snoc_bimc_0_slv, MSM8916_SNOC_BIMC_0_SLV, 8, -1, 24, MSM8916_SLAVE_
 DEFINE_QNODE(snoc_bimc_1_mas, MSM8916_SNOC_BIMC_1_MAS, 16, -1, -1, MSM8916_SNOC_BIMC_1_SLV);
 DEFINE_QNODE(snoc_bimc_1_slv, MSM8916_SNOC_BIMC_1_SLV, 8, -1, -1, MSM8916_SLAVE_EBI_CH0);
 DEFINE_QNODE(snoc_int_0, MSM8916_SNOC_INT_0, 8, 99, 130, MSM8916_SLAVE_QDSS_STM, MSM8916_SLAVE_IMEM, MSM8916_SNOC_PNOC_MAS);
-DEFINE_QNODE(snoc_int_1, MSM8916_SNOC_INT_1, 8, 100, 131, MSM8916_SLAVE_APSS, MSM8916_SLAVE_CATS_128, MSM8916_SLAVE_OCMEM_64);
+DEFINE_QNODE(snoc_int_1, MSM8916_SNOC_INT_1, 8, -1, -1, MSM8916_SLAVE_APSS, MSM8916_SLAVE_CATS_128, MSM8916_SLAVE_OCMEM_64);
 DEFINE_QNODE(snoc_int_bimc, MSM8916_SNOC_INT_BIMC, 8, 101, 132, MSM8916_SNOC_BIMC_0_MAS);
 DEFINE_QNODE(snoc_pcnoc_mas, MSM8916_SNOC_PNOC_MAS, 8, -1, -1, MSM8916_SNOC_PNOC_SLV);
 DEFINE_QNODE(snoc_pcnoc_slv, MSM8916_SNOC_PNOC_SLV, 8, -1, -1, MSM8916_PNOC_INT_0);
index 3a313e1..da68ce3 100644 (file)
@@ -618,6 +618,8 @@ static int msm8974_icc_set(struct icc_node *src, struct icc_node *dst)
 
        do_div(rate, src_qn->buswidth);
 
+       rate = min_t(u32, rate, INT_MAX);
+
        if (src_qn->rate == rate)
                return 0;
 
@@ -635,6 +637,14 @@ static int msm8974_icc_set(struct icc_node *src, struct icc_node *dst)
        return 0;
 }
 
+static int msm8974_get_bw(struct icc_node *node, u32 *avg, u32 *peak)
+{
+       *avg = 0;
+       *peak = 0;
+
+       return 0;
+}
+
 static int msm8974_icc_probe(struct platform_device *pdev)
 {
        const struct msm8974_icc_desc *desc;
@@ -688,6 +698,7 @@ static int msm8974_icc_probe(struct platform_device *pdev)
        provider->aggregate = icc_std_aggregate;
        provider->xlate = of_icc_xlate_onecell;
        provider->data = data;
+       provider->get_bw = msm8974_get_bw;
 
        ret = icc_provider_add(provider);
        if (ret) {
@@ -758,6 +769,7 @@ static struct platform_driver msm8974_noc_driver = {
        .driver = {
                .name = "qnoc-msm8974",
                .of_match_table = msm8974_noc_of_match,
+               .sync_state = icc_sync_state,
        },
 };
 module_platform_driver(msm8974_noc_driver);
index d4769a5..9820709 100644 (file)
@@ -157,8 +157,8 @@ struct qcom_icc_desc {
        }
 
 DEFINE_QNODE(mas_apps_proc, QCS404_MASTER_AMPSS_M0, 8, 0, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV);
-DEFINE_QNODE(mas_oxili, QCS404_MASTER_GRAPHICS_3D, 8, 6, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV);
-DEFINE_QNODE(mas_mdp, QCS404_MASTER_MDP_PORT0, 8, 8, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV);
+DEFINE_QNODE(mas_oxili, QCS404_MASTER_GRAPHICS_3D, 8, -1, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV);
+DEFINE_QNODE(mas_mdp, QCS404_MASTER_MDP_PORT0, 8, -1, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV);
 DEFINE_QNODE(mas_snoc_bimc_1, QCS404_SNOC_BIMC_1_MAS, 8, 76, -1, QCS404_SLAVE_EBI_CH0);
 DEFINE_QNODE(mas_tcu_0, QCS404_MASTER_TCU_0, 8, -1, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV);
 DEFINE_QNODE(mas_spdm, QCS404_MASTER_SPDM, 4, -1, -1, QCS404_PNOC_INT_3);
index 8964770..494b42a 100644 (file)
 #define DTE_IRQ_REMAP_INTCTL_MASK      (0x3ULL << 60)
 #define DTE_IRQ_TABLE_LEN_MASK (0xfULL << 1)
 #define DTE_IRQ_REMAP_INTCTL    (2ULL << 60)
-#define DTE_IRQ_TABLE_LEN       (8ULL << 1)
+#define DTE_IRQ_TABLE_LEN       (9ULL << 1)
 #define DTE_IRQ_REMAP_ENABLE    1ULL
 
 #define PAGE_MODE_NONE    0x00
index 82e4af8..23a790f 100644 (file)
@@ -29,6 +29,7 @@
 #include <asm/iommu_table.h>
 #include <asm/io_apic.h>
 #include <asm/irq_remapping.h>
+#include <asm/set_memory.h>
 
 #include <linux/crash_dump.h>
 
@@ -672,11 +673,27 @@ static void __init free_command_buffer(struct amd_iommu *iommu)
        free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
 }
 
+static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
+                                        gfp_t gfp, size_t size)
+{
+       int order = get_order(size);
+       void *buf = (void *)__get_free_pages(gfp, order);
+
+       if (buf &&
+           iommu_feature(iommu, FEATURE_SNP) &&
+           set_memory_4k((unsigned long)buf, (1 << order))) {
+               free_pages((unsigned long)buf, order);
+               buf = NULL;
+       }
+
+       return buf;
+}
+
 /* allocates the memory where the IOMMU will log its events to */
 static int __init alloc_event_buffer(struct amd_iommu *iommu)
 {
-       iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-                                                 get_order(EVT_BUFFER_SIZE));
+       iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
+                                             EVT_BUFFER_SIZE);
 
        return iommu->evt_buf ? 0 : -ENOMEM;
 }
@@ -715,8 +732,8 @@ static void __init free_event_buffer(struct amd_iommu *iommu)
 /* allocates the memory where the IOMMU will log its events to */
 static int __init alloc_ppr_log(struct amd_iommu *iommu)
 {
-       iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-                                                 get_order(PPR_LOG_SIZE));
+       iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
+                                             PPR_LOG_SIZE);
 
        return iommu->ppr_log ? 0 : -ENOMEM;
 }
@@ -838,7 +855,7 @@ static int iommu_init_ga(struct amd_iommu *iommu)
 
 static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
 {
-       iommu->cmd_sem = (void *)get_zeroed_page(GFP_KERNEL);
+       iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1);
 
        return iommu->cmd_sem ? 0 : -ENOMEM;
 }
index be43180..702fbaa 100644 (file)
@@ -69,6 +69,10 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
 {
        struct qcom_smmu *qsmmu;
 
+       /* Check to make sure qcom_scm has finished probing */
+       if (!qcom_scm_is_available())
+               return ERR_PTR(-EPROBE_DEFER);
+
        qsmmu = devm_kzalloc(smmu->dev, sizeof(*qsmmu), GFP_KERNEL);
        if (!qsmmu)
                return ERR_PTR(-ENOMEM);
index b2e8044..b46dbfa 100644 (file)
@@ -335,7 +335,9 @@ static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
 
 static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
 {
-       dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&pdev->physfn->dev));
+       struct pci_dev *physfn = pci_physfn(pdev);
+
+       dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev));
 }
 
 static int dmar_pci_bus_notifier(struct notifier_block *nb,
@@ -984,7 +986,8 @@ static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
                warn_invalid_dmar(phys_addr, " returns all ones");
                goto unmap;
        }
-       iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
+       if (ecap_vcs(iommu->ecap))
+               iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
 
        /* the registers might be more than one page */
        map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
index c662201..a49afa1 100644 (file)
@@ -179,7 +179,7 @@ static int rwbf_quirk;
  * (used when kernel is launched w/ TXT)
  */
 static int force_on = 0;
-int intel_iommu_tboot_noforce;
+static int intel_iommu_tboot_noforce;
 static int no_platform_optin;
 
 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
@@ -1833,7 +1833,7 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
                if (ecap_prs(iommu->ecap))
                        intel_svm_finish_prq(iommu);
        }
-       if (ecap_vcs(iommu->ecap) && vccap_pasid(iommu->vccap))
+       if (vccap_pasid(iommu->vccap))
                ioasid_unregister_allocator(&iommu->pasid_allocator);
 
 #endif
@@ -3212,7 +3212,7 @@ static void register_pasid_allocator(struct intel_iommu *iommu)
         * is active. All vIOMMU allocators will eventually be calling the same
         * host allocator.
         */
-       if (!ecap_vcs(iommu->ecap) || !vccap_pasid(iommu->vccap))
+       if (!vccap_pasid(iommu->vccap))
                return;
 
        pr_info("Register custom PASID allocator\n");
@@ -4884,7 +4884,8 @@ int __init intel_iommu_init(void)
         * Intel IOMMU is required for a TXT/tboot launch or platform
         * opt in, so enforce that.
         */
-       force_on = tboot_force_iommu() || platform_optin_force_iommu();
+       force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
+                   platform_optin_force_iommu();
 
        if (iommu_init_mempool()) {
                if (force_on)
index b53446b..0f4dc25 100644 (file)
@@ -264,16 +264,18 @@ int iommu_probe_device(struct device *dev)
         */
        iommu_alloc_default_domain(group, dev);
 
-       if (group->default_domain)
+       if (group->default_domain) {
                ret = __iommu_attach_device(group->default_domain, dev);
+               if (ret) {
+                       iommu_group_put(group);
+                       goto err_release;
+               }
+       }
 
        iommu_create_device_direct_mappings(group, dev);
 
        iommu_group_put(group);
 
-       if (ret)
-               goto err_release;
-
        if (ops->probe_finalize)
                ops->probe_finalize(dev);
 
index 7db6024..34391b0 100644 (file)
@@ -42,7 +42,6 @@
 #define ITS_FLAGS_CMDQ_NEEDS_FLUSHING          (1ULL << 0)
 #define ITS_FLAGS_WORKAROUND_CAVIUM_22375      (1ULL << 1)
 #define ITS_FLAGS_WORKAROUND_CAVIUM_23144      (1ULL << 2)
-#define ITS_FLAGS_SAVE_SUSPEND_STATE           (1ULL << 3)
 
 #define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING    (1 << 0)
 #define RDIST_FLAGS_RD_TABLES_PREALLOCATED     (1 << 1)
@@ -4745,9 +4744,6 @@ static int its_save_disable(void)
        list_for_each_entry(its, &its_nodes, entry) {
                void __iomem *base;
 
-               if (!(its->flags & ITS_FLAGS_SAVE_SUSPEND_STATE))
-                       continue;
-
                base = its->base;
                its->ctlr_save = readl_relaxed(base + GITS_CTLR);
                err = its_force_quiescent(base);
@@ -4766,9 +4762,6 @@ err:
                list_for_each_entry_continue_reverse(its, &its_nodes, entry) {
                        void __iomem *base;
 
-                       if (!(its->flags & ITS_FLAGS_SAVE_SUSPEND_STATE))
-                               continue;
-
                        base = its->base;
                        writel_relaxed(its->ctlr_save, base + GITS_CTLR);
                }
@@ -4788,9 +4781,6 @@ static void its_restore_enable(void)
                void __iomem *base;
                int i;
 
-               if (!(its->flags & ITS_FLAGS_SAVE_SUSPEND_STATE))
-                       continue;
-
                base = its->base;
 
                /*
@@ -4798,7 +4788,10 @@ static void its_restore_enable(void)
                 * don't restore it since writing to CBASER or BASER<n>
                 * registers is undefined according to the GIC v3 ITS
                 * Specification.
+                *
+                * Firmware resuming with the ITS enabled is terminally broken.
                 */
+               WARN_ON(readl_relaxed(base + GITS_CTLR) & GITS_CTLR_ENABLE);
                ret = its_force_quiescent(base);
                if (ret) {
                        pr_err("ITS@%pa: failed to quiesce on resume: %d\n",
@@ -5078,9 +5071,6 @@ static int __init its_probe_one(struct resource *res,
                ctlr |= GITS_CTLR_ImDe;
        writel_relaxed(ctlr, its->base + GITS_CTLR);
 
-       if (GITS_TYPER_HCC(typer))
-               its->flags |= ITS_FLAGS_SAVE_SUSPEND_STATE;
-
        err = its_init_domain(handle, its);
        if (err)
                goto out_free_tables;
index 1d02762..abd011f 100644 (file)
@@ -136,7 +136,7 @@ static int exiu_domain_translate(struct irq_domain *domain,
                if (fwspec->param_count != 2)
                        return -EINVAL;
                *hwirq = fwspec->param[0];
-               *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
+               *type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK;
        }
        return 0;
 }
index 9644424..4bc453f 100644 (file)
@@ -712,10 +712,6 @@ static bool block_size_is_power_of_two(struct cache *cache)
        return cache->sectors_per_block_shift >= 0;
 }
 
-/* gcc on ARM generates spurious references to __udivdi3 and __umoddi3 */
-#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6
-__always_inline
-#endif
 static dm_block_t block_div(dm_block_t b, uint32_t n)
 {
        do_div(b, n);
index 3fc3757..5a7a1b9 100644 (file)
@@ -3462,7 +3462,7 @@ static int get_mac(struct crypto_shash **hash, struct alg_spec *a, char **error,
        int r;
 
        if (a->alg_string) {
-               *hash = crypto_alloc_shash(a->alg_string, 0, 0);
+               *hash = crypto_alloc_shash(a->alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
                if (IS_ERR(*hash)) {
                        *error = error_alg;
                        r = PTR_ERR(*hash);
@@ -3519,7 +3519,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
                struct journal_completion comp;
 
                comp.ic = ic;
-               ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, 0);
+               ic->journal_crypt = crypto_alloc_skcipher(ic->journal_crypt_alg.alg_string, 0, CRYPTO_ALG_ALLOCATES_MEMORY);
                if (IS_ERR(ic->journal_crypt)) {
                        *error = "Invalid journal cipher";
                        r = PTR_ERR(ic->journal_crypt);
index 9c1f7c4..dc8568a 100644 (file)
@@ -3728,6 +3728,17 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
        blk_limits_io_min(limits, chunk_size_bytes);
        blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
+
+       /*
+        * RAID10 personality requires bio splitting,
+        * RAID0/1/4/5/6 don't and process large discard bios properly.
+        */
+       if (rs_is_raid10(rs)) {
+               limits->discard_granularity = max(chunk_size_bytes,
+                                                 limits->discard_granularity);
+               limits->max_discard_sectors = min_not_zero(rs->md.chunk_sectors,
+                                                          limits->max_discard_sectors);
+       }
 }
 
 static void raid_postsuspend(struct dm_target *ti)
index ce543b7..7eeb7c4 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/mutex.h>
 #include <linux/delay.h>
 #include <linux/atomic.h>
-#include <linux/lcm.h>
 #include <linux/blk-mq.h>
 #include <linux/mount.h>
 #include <linux/dax.h>
@@ -1247,12 +1246,6 @@ void dm_table_event_callback(struct dm_table *t,
 
 void dm_table_event(struct dm_table *t)
 {
-       /*
-        * You can no longer call dm_table_event() from interrupt
-        * context, use a bottom half instead.
-        */
-       BUG_ON(in_interrupt());
-
        mutex_lock(&_event_lock);
        if (t->event_fn)
                t->event_fn(t->event_context);
@@ -1455,10 +1448,6 @@ int dm_calculate_queue_limits(struct dm_table *table,
                        zone_sectors = ti_limits.chunk_sectors;
                }
 
-               /* Stack chunk_sectors if target-specific splitting is required */
-               if (ti->max_io_len)
-                       ti_limits.chunk_sectors = lcm_not_zero(ti->max_io_len,
-                                                              ti_limits.chunk_sectors);
                /* Set I/O hints portion of queue limits */
                if (ti->type->io_hints)
                        ti->type->io_hints(ti, &ti_limits);
index 9ae4ce7..d5223a0 100644 (file)
@@ -319,7 +319,7 @@ err1:
 #else
 static int persistent_memory_claim(struct dm_writecache *wc)
 {
-       BUG();
+       return -EOPNOTSUPP;
 }
 #endif
 
@@ -2041,7 +2041,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
        struct wc_memory_superblock s;
 
        static struct dm_arg _args[] = {
-               {0, 10, "Invalid number of feature args"},
+               {0, 16, "Invalid number of feature args"},
        };
 
        as.argc = argc;
@@ -2479,6 +2479,8 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
                        extra_args += 2;
                if (wc->autocommit_time_set)
                        extra_args += 2;
+               if (wc->max_age != MAX_AGE_UNSPECIFIED)
+                       extra_args += 2;
                if (wc->cleaner)
                        extra_args++;
                if (wc->writeback_fua_set)
index c18fc25..4e0cbfe 100644 (file)
@@ -476,8 +476,10 @@ static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
                return -EAGAIN;
 
        map = dm_get_live_table(md, &srcu_idx);
-       if (!map)
-               return -EIO;
+       if (!map) {
+               ret = -EIO;
+               goto out;
+       }
 
        do {
                struct dm_target *tgt;
@@ -507,7 +509,6 @@ out:
 
 static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
                            struct block_device **bdev)
-       __acquires(md->io_barrier)
 {
        struct dm_target *tgt;
        struct dm_table *map;
@@ -541,7 +542,6 @@ retry:
 }
 
 static void dm_unprepare_ioctl(struct mapped_device *md, int srcu_idx)
-       __releases(md->io_barrier)
 {
        dm_put_live_table(md, srcu_idx);
 }
@@ -1037,15 +1037,18 @@ static sector_t max_io_len(struct dm_target *ti, sector_t sector)
        sector_t max_len;
 
        /*
-        * Does the target need to split even further?
-        * - q->limits.chunk_sectors reflects ti->max_io_len so
-        *   blk_max_size_offset() provides required splitting.
-        * - blk_max_size_offset() also respects q->limits.max_sectors
+        * Does the target need to split IO even further?
+        * - varied (per target) IO splitting is a tenet of DM; this
+        *   explains why stacked chunk_sectors based splitting via
+        *   blk_max_size_offset() isn't possible here. So pass in
+        *   ti->max_io_len to override stacked chunk_sectors.
         */
-       max_len = blk_max_size_offset(ti->table->md->queue,
-                                     target_offset);
-       if (len > max_len)
-               len = max_len;
+       if (ti->max_io_len) {
+               max_len = blk_max_size_offset(ti->table->md->queue,
+                                             target_offset, ti->max_io_len);
+               if (len > max_len)
+                       len = max_len;
+       }
 
        return len;
 }
@@ -1196,11 +1199,9 @@ static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
                 * ->zero_page_range() is mandatory dax operation. If we are
                 *  here, something is wrong.
                 */
-               dm_put_live_table(md, srcu_idx);
                goto out;
        }
        ret = ti->type->dax_zero_page_range(ti, pgoff, nr_pages);
-
  out:
        dm_put_live_table(md, srcu_idx);
 
index 98bac4f..0037c6e 100644 (file)
@@ -8582,26 +8582,6 @@ void md_write_end(struct mddev *mddev)
 
 EXPORT_SYMBOL(md_write_end);
 
-/* This is used by raid0 and raid10 */
-void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
-                       struct bio *bio, sector_t start, sector_t size)
-{
-       struct bio *discard_bio = NULL;
-
-       if (__blkdev_issue_discard(rdev->bdev, start, size,
-               GFP_NOIO, 0, &discard_bio) || !discard_bio)
-               return;
-
-       bio_chain(discard_bio, bio);
-       bio_clone_blkg_association(discard_bio, bio);
-       if (mddev->gendisk)
-               trace_block_bio_remap(bdev_get_queue(rdev->bdev),
-                       discard_bio, disk_devt(mddev->gendisk),
-                       bio->bi_iter.bi_sector);
-       submit_bio_noacct(discard_bio);
-}
-EXPORT_SYMBOL(md_submit_discard_bio);
-
 /* md_allow_write(mddev)
  * Calling this ensures that the array is marked 'active' so that writes
  * may proceed without blocking.  It is important to call this before
index ccfb698..bb645bc 100644 (file)
@@ -311,7 +311,7 @@ struct mddev {
        int                             external;       /* metadata is
                                                         * managed externally */
        char                            metadata_type[17]; /* externally set*/
-       int                             chunk_sectors;
+       unsigned int                    chunk_sectors;
        time64_t                        ctime, utime;
        int                             level, layout;
        char                            clevel[16];
@@ -339,7 +339,7 @@ struct mddev {
         */
        sector_t                        reshape_position;
        int                             delta_disks, new_level, new_layout;
-       int                             new_chunk_sectors;
+       unsigned int                    new_chunk_sectors;
        int                             reshape_backwards;
 
        struct md_thread                *thread;        /* management thread */
@@ -713,8 +713,6 @@ extern void md_write_end(struct mddev *mddev);
 extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
 extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
 extern void md_finish_reshape(struct mddev *mddev);
-extern void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
-                       struct bio *bio, sector_t start, sector_t size);
 
 extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
 extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
index 6f44177..35843df 100644 (file)
@@ -477,6 +477,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
 
        for (disk = 0; disk < zone->nb_dev; disk++) {
                sector_t dev_start, dev_end;
+               struct bio *discard_bio = NULL;
                struct md_rdev *rdev;
 
                if (disk < start_disk_index)
@@ -499,9 +500,18 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
 
                rdev = conf->devlist[(zone - conf->strip_zone) *
                        conf->strip_zone[0].nb_dev + disk];
-               md_submit_discard_bio(mddev, rdev, bio,
+               if (__blkdev_issue_discard(rdev->bdev,
                        dev_start + zone->dev_start + rdev->data_offset,
-                       dev_end - dev_start);
+                       dev_end - dev_start, GFP_NOIO, 0, &discard_bio) ||
+                   !discard_bio)
+                       continue;
+               bio_chain(discard_bio, bio);
+               bio_clone_blkg_association(discard_bio, bio);
+               if (mddev->gendisk)
+                       trace_block_bio_remap(bdev_get_queue(rdev->bdev),
+                               discard_bio, disk_devt(mddev->gendisk),
+                               bio->bi_iter.bi_sector);
+               submit_bio_noacct(discard_bio);
        }
        bio_endio(bio);
 }
index b7bca67..3b598a3 100644 (file)
@@ -91,7 +91,7 @@ static inline struct r10bio *get_resync_r10bio(struct bio *bio)
 static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
        struct r10conf *conf = data;
-       int size = offsetof(struct r10bio, devs[conf->geo.raid_disks]);
+       int size = offsetof(struct r10bio, devs[conf->copies]);
 
        /* allocate a r10bio with room for raid_disks entries in the
         * bios array */
@@ -238,7 +238,7 @@ static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio)
 {
        int i;
 
-       for (i = 0; i < conf->geo.raid_disks; i++) {
+       for (i = 0; i < conf->copies; i++) {
                struct bio **bio = & r10_bio->devs[i].bio;
                if (!BIO_SPECIAL(*bio))
                        bio_put(*bio);
@@ -327,7 +327,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
        int slot;
        int repl = 0;
 
-       for (slot = 0; slot < conf->geo.raid_disks; slot++) {
+       for (slot = 0; slot < conf->copies; slot++) {
                if (r10_bio->devs[slot].bio == bio)
                        break;
                if (r10_bio->devs[slot].repl_bio == bio) {
@@ -336,6 +336,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
                }
        }
 
+       BUG_ON(slot == conf->copies);
        update_head_pos(slot, r10_bio);
 
        if (slotp)
@@ -1275,75 +1276,12 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
        }
 }
 
-static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
-{
-       int i;
-       struct r10conf *conf = mddev->private;
-       struct md_rdev *blocked_rdev;
-
-retry_wait:
-       blocked_rdev = NULL;
-       rcu_read_lock();
-       for (i = 0; i < conf->copies; i++) {
-               struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
-               struct md_rdev *rrdev = rcu_dereference(
-                       conf->mirrors[i].replacement);
-               if (rdev == rrdev)
-                       rrdev = NULL;
-               if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
-                       atomic_inc(&rdev->nr_pending);
-                       blocked_rdev = rdev;
-                       break;
-               }
-               if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
-                       atomic_inc(&rrdev->nr_pending);
-                       blocked_rdev = rrdev;
-                       break;
-               }
-
-               if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
-                       sector_t first_bad;
-                       sector_t dev_sector = r10_bio->devs[i].addr;
-                       int bad_sectors;
-                       int is_bad;
-
-                       /* Discard request doesn't care the write result
-                        * so it doesn't need to wait blocked disk here.
-                        */
-                       if (!r10_bio->sectors)
-                               continue;
-
-                       is_bad = is_badblock(rdev, dev_sector, r10_bio->sectors,
-                                            &first_bad, &bad_sectors);
-                       if (is_bad < 0) {
-                               /* Mustn't write here until the bad block
-                                * is acknowledged
-                                */
-                               atomic_inc(&rdev->nr_pending);
-                               set_bit(BlockedBadBlocks, &rdev->flags);
-                               blocked_rdev = rdev;
-                               break;
-                       }
-               }
-       }
-       rcu_read_unlock();
-
-       if (unlikely(blocked_rdev)) {
-               /* Have to wait for this device to get unblocked, then retry */
-               allow_barrier(conf);
-               raid10_log(conf->mddev, "%s wait rdev %d blocked",
-                               __func__, blocked_rdev->raid_disk);
-               md_wait_for_blocked_rdev(blocked_rdev, mddev);
-               wait_barrier(conf);
-               goto retry_wait;
-       }
-}
-
 static void raid10_write_request(struct mddev *mddev, struct bio *bio,
                                 struct r10bio *r10_bio)
 {
        struct r10conf *conf = mddev->private;
        int i;
+       struct md_rdev *blocked_rdev;
        sector_t sectors;
        int max_sectors;
 
@@ -1401,9 +1339,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 
        r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
        raid10_find_phys(conf, r10_bio);
-
-       wait_blocked_dev(mddev, r10_bio);
-
+retry_write:
+       blocked_rdev = NULL;
        rcu_read_lock();
        max_sectors = r10_bio->sectors;
 
@@ -1414,6 +1351,16 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
                        conf->mirrors[d].replacement);
                if (rdev == rrdev)
                        rrdev = NULL;
+               if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+                       atomic_inc(&rdev->nr_pending);
+                       blocked_rdev = rdev;
+                       break;
+               }
+               if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
+                       atomic_inc(&rrdev->nr_pending);
+                       blocked_rdev = rrdev;
+                       break;
+               }
                if (rdev && (test_bit(Faulty, &rdev->flags)))
                        rdev = NULL;
                if (rrdev && (test_bit(Faulty, &rrdev->flags)))
@@ -1434,6 +1381,15 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 
                        is_bad = is_badblock(rdev, dev_sector, max_sectors,
                                             &first_bad, &bad_sectors);
+                       if (is_bad < 0) {
+                               /* Mustn't write here until the bad block
+                                * is acknowledged
+                                */
+                               atomic_inc(&rdev->nr_pending);
+                               set_bit(BlockedBadBlocks, &rdev->flags);
+                               blocked_rdev = rdev;
+                               break;
+                       }
                        if (is_bad && first_bad <= dev_sector) {
                                /* Cannot write here at all */
                                bad_sectors -= (dev_sector - first_bad);
@@ -1469,6 +1425,35 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
        }
        rcu_read_unlock();
 
+       if (unlikely(blocked_rdev)) {
+               /* Have to wait for this device to get unblocked, then retry */
+               int j;
+               int d;
+
+               for (j = 0; j < i; j++) {
+                       if (r10_bio->devs[j].bio) {
+                               d = r10_bio->devs[j].devnum;
+                               rdev_dec_pending(conf->mirrors[d].rdev, mddev);
+                       }
+                       if (r10_bio->devs[j].repl_bio) {
+                               struct md_rdev *rdev;
+                               d = r10_bio->devs[j].devnum;
+                               rdev = conf->mirrors[d].replacement;
+                               if (!rdev) {
+                                       /* Race with remove_disk */
+                                       smp_mb();
+                                       rdev = conf->mirrors[d].rdev;
+                               }
+                               rdev_dec_pending(rdev, mddev);
+                       }
+               }
+               allow_barrier(conf);
+               raid10_log(conf->mddev, "wait rdev %d blocked", blocked_rdev->raid_disk);
+               md_wait_for_blocked_rdev(blocked_rdev, mddev);
+               wait_barrier(conf);
+               goto retry_write;
+       }
+
        if (max_sectors < r10_bio->sectors)
                r10_bio->sectors = max_sectors;
 
@@ -1508,7 +1493,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
        r10_bio->mddev = mddev;
        r10_bio->sector = bio->bi_iter.bi_sector;
        r10_bio->state = 0;
-       memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->geo.raid_disks);
+       memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->copies);
 
        if (bio_data_dir(bio) == READ)
                raid10_read_request(mddev, bio, r10_bio);
@@ -1516,296 +1501,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
                raid10_write_request(mddev, bio, r10_bio);
 }
 
-static struct bio *raid10_split_bio(struct r10conf *conf,
-                       struct bio *bio, sector_t sectors, bool want_first)
-{
-       struct bio *split;
-
-       split = bio_split(bio, sectors, GFP_NOIO, &conf->bio_split);
-       bio_chain(split, bio);
-       allow_barrier(conf);
-       if (want_first) {
-               submit_bio_noacct(bio);
-               bio = split;
-       } else
-               submit_bio_noacct(split);
-       wait_barrier(conf);
-
-       return bio;
-}
-
-static void raid_end_discard_bio(struct r10bio *r10bio)
-{
-       struct r10conf *conf = r10bio->mddev->private;
-       struct r10bio *first_r10bio;
-
-       while (atomic_dec_and_test(&r10bio->remaining)) {
-
-               allow_barrier(conf);
-
-               if (!test_bit(R10BIO_Discard, &r10bio->state)) {
-                       first_r10bio = (struct r10bio *)r10bio->master_bio;
-                       free_r10bio(r10bio);
-                       r10bio = first_r10bio;
-               } else {
-                       md_write_end(r10bio->mddev);
-                       bio_endio(r10bio->master_bio);
-                       free_r10bio(r10bio);
-                       break;
-               }
-       }
-}
-
-static void raid10_end_discard_request(struct bio *bio)
-{
-       struct r10bio *r10_bio = bio->bi_private;
-       struct r10conf *conf = r10_bio->mddev->private;
-       struct md_rdev *rdev = NULL;
-       int dev;
-       int slot, repl;
-
-       /*
-        * We don't care the return value of discard bio
-        */
-       if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
-               set_bit(R10BIO_Uptodate, &r10_bio->state);
-
-       dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
-       if (repl)
-               rdev = conf->mirrors[dev].replacement;
-       if (!rdev) {
-               /* raid10_remove_disk uses smp_mb to make sure rdev is set to
-                * replacement before setting replacement to NULL. It can read
-                * rdev first without barrier protect even replacment is NULL
-                */
-               smp_rmb();
-               rdev = conf->mirrors[dev].rdev;
-       }
-
-       raid_end_discard_bio(r10_bio);
-       rdev_dec_pending(rdev, conf->mddev);
-}
-
-/* There are some limitations to handle discard bio
- * 1st, the discard size is bigger than stripe_size*2.
- * 2st, if the discard bio spans reshape progress, we use the old way to
- * handle discard bio
- */
-static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
-{
-       struct r10conf *conf = mddev->private;
-       struct geom *geo = &conf->geo;
-       struct r10bio *r10_bio, *first_r10bio;
-       int far_copies = geo->far_copies;
-       bool first_copy = true;
-
-       int disk;
-       sector_t chunk;
-       unsigned int stripe_size;
-       sector_t split_size;
-
-       sector_t bio_start, bio_end;
-       sector_t first_stripe_index, last_stripe_index;
-       sector_t start_disk_offset;
-       unsigned int start_disk_index;
-       sector_t end_disk_offset;
-       unsigned int end_disk_index;
-       unsigned int remainder;
-
-       if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
-               return -EAGAIN;
-
-       wait_barrier(conf);
-
-       /* Check reshape again to avoid reshape happens after checking
-        * MD_RECOVERY_RESHAPE and before wait_barrier
-        */
-       if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
-               goto out;
-
-       stripe_size = geo->raid_disks << geo->chunk_shift;
-       bio_start = bio->bi_iter.bi_sector;
-       bio_end = bio_end_sector(bio);
-
-       /* Maybe one discard bio is smaller than strip size or across one stripe
-        * and discard region is larger than one stripe size. For far offset layout,
-        * if the discard region is not aligned with stripe size, there is hole
-        * when we submit discard bio to member disk. For simplicity, we only
-        * handle discard bio which discard region is bigger than stripe_size*2
-        */
-       if (bio_sectors(bio) < stripe_size*2)
-               goto out;
-
-       /* For far and far offset layout, if bio is not aligned with stripe size,
-        * it splits the part that is not aligned with strip size.
-        */
-       div_u64_rem(bio_start, stripe_size, &remainder);
-       if ((far_copies > 1) && remainder) {
-               split_size = stripe_size - remainder;
-               bio = raid10_split_bio(conf, bio, split_size, false);
-       }
-       div_u64_rem(bio_end, stripe_size, &remainder);
-       if ((far_copies > 1) && remainder) {
-               split_size = bio_sectors(bio) - remainder;
-               bio = raid10_split_bio(conf, bio, split_size, true);
-       }
-
-       bio_start = bio->bi_iter.bi_sector;
-       bio_end = bio_end_sector(bio);
-
-       /* raid10 uses chunk as the unit to store data. It's similar like raid0.
-        * One stripe contains the chunks from all member disk (one chunk from
-        * one disk at the same HBA address). For layout detail, see 'man md 4'
-        */
-       chunk = bio_start >> geo->chunk_shift;
-       chunk *= geo->near_copies;
-       first_stripe_index = chunk;
-       start_disk_index = sector_div(first_stripe_index, geo->raid_disks);
-       if (geo->far_offset)
-               first_stripe_index *= geo->far_copies;
-       start_disk_offset = (bio_start & geo->chunk_mask) +
-                               (first_stripe_index << geo->chunk_shift);
-
-       chunk = bio_end >> geo->chunk_shift;
-       chunk *= geo->near_copies;
-       last_stripe_index = chunk;
-       end_disk_index = sector_div(last_stripe_index, geo->raid_disks);
-       if (geo->far_offset)
-               last_stripe_index *= geo->far_copies;
-       end_disk_offset = (bio_end & geo->chunk_mask) +
-                               (last_stripe_index << geo->chunk_shift);
-
-retry_discard:
-       r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO);
-       r10_bio->mddev = mddev;
-       r10_bio->state = 0;
-       r10_bio->sectors = 0;
-       memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks);
-       wait_blocked_dev(mddev, r10_bio);
-
-       /* For far layout it needs more than one r10bio to cover all regions.
-        * Inspired by raid10_sync_request, we can use the first r10bio->master_bio
-        * to record the discard bio. Other r10bio->master_bio record the first
-        * r10bio. The first r10bio only release after all other r10bios finish.
-        * The discard bio returns only first r10bio finishes
-        */
-       if (first_copy) {
-               r10_bio->master_bio = bio;
-               set_bit(R10BIO_Discard, &r10_bio->state);
-               first_copy = false;
-               first_r10bio = r10_bio;
-       } else
-               r10_bio->master_bio = (struct bio *)first_r10bio;
-
-       rcu_read_lock();
-       for (disk = 0; disk < geo->raid_disks; disk++) {
-               struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
-               struct md_rdev *rrdev = rcu_dereference(
-                       conf->mirrors[disk].replacement);
-
-               r10_bio->devs[disk].bio = NULL;
-               r10_bio->devs[disk].repl_bio = NULL;
-
-               if (rdev && (test_bit(Faulty, &rdev->flags)))
-                       rdev = NULL;
-               if (rrdev && (test_bit(Faulty, &rrdev->flags)))
-                       rrdev = NULL;
-               if (!rdev && !rrdev)
-                       continue;
-
-               if (rdev) {
-                       r10_bio->devs[disk].bio = bio;
-                       atomic_inc(&rdev->nr_pending);
-               }
-               if (rrdev) {
-                       r10_bio->devs[disk].repl_bio = bio;
-                       atomic_inc(&rrdev->nr_pending);
-               }
-       }
-       rcu_read_unlock();
-
-       atomic_set(&r10_bio->remaining, 1);
-       for (disk = 0; disk < geo->raid_disks; disk++) {
-               sector_t dev_start, dev_end;
-               struct bio *mbio, *rbio = NULL;
-               struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
-               struct md_rdev *rrdev = rcu_dereference(
-                       conf->mirrors[disk].replacement);
-
-               /*
-                * Now start to calculate the start and end address for each disk.
-                * The space between dev_start and dev_end is the discard region.
-                *
-                * For dev_start, it needs to consider three conditions:
-                * 1st, the disk is before start_disk, you can imagine the disk in
-                * the next stripe. So the dev_start is the start address of next
-                * stripe.
-                * 2st, the disk is after start_disk, it means the disk is at the
-                * same stripe of first disk
-                * 3st, the first disk itself, we can use start_disk_offset directly
-                */
-               if (disk < start_disk_index)
-                       dev_start = (first_stripe_index + 1) * mddev->chunk_sectors;
-               else if (disk > start_disk_index)
-                       dev_start = first_stripe_index * mddev->chunk_sectors;
-               else
-                       dev_start = start_disk_offset;
-
-               if (disk < end_disk_index)
-                       dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
-               else if (disk > end_disk_index)
-                       dev_end = last_stripe_index * mddev->chunk_sectors;
-               else
-                       dev_end = end_disk_offset;
-
-               /* It only handles discard bio which size is >= stripe size, so
-                * dev_end > dev_start all the time
-                */
-               if (r10_bio->devs[disk].bio) {
-                       mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
-                       mbio->bi_end_io = raid10_end_discard_request;
-                       mbio->bi_private = r10_bio;
-                       r10_bio->devs[disk].bio = mbio;
-                       r10_bio->devs[disk].devnum = disk;
-                       atomic_inc(&r10_bio->remaining);
-                       md_submit_discard_bio(mddev, rdev, mbio,
-                                       dev_start + choose_data_offset(r10_bio, rdev),
-                                       dev_end - dev_start);
-                       bio_endio(mbio);
-               }
-               if (r10_bio->devs[disk].repl_bio) {
-                       rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
-                       rbio->bi_end_io = raid10_end_discard_request;
-                       rbio->bi_private = r10_bio;
-                       r10_bio->devs[disk].repl_bio = rbio;
-                       r10_bio->devs[disk].devnum = disk;
-                       atomic_inc(&r10_bio->remaining);
-                       md_submit_discard_bio(mddev, rrdev, rbio,
-                                       dev_start + choose_data_offset(r10_bio, rrdev),
-                                       dev_end - dev_start);
-                       bio_endio(rbio);
-               }
-       }
-
-       if (!geo->far_offset && --far_copies) {
-               first_stripe_index += geo->stride >> geo->chunk_shift;
-               start_disk_offset += geo->stride;
-               last_stripe_index += geo->stride >> geo->chunk_shift;
-               end_disk_offset += geo->stride;
-               atomic_inc(&first_r10bio->remaining);
-               raid_end_discard_bio(r10_bio);
-               wait_barrier(conf);
-               goto retry_discard;
-       }
-
-       raid_end_discard_bio(r10_bio);
-
-       return 0;
-out:
-       allow_barrier(conf);
-       return -EAGAIN;
-}
-
 static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
 {
        struct r10conf *conf = mddev->private;
@@ -1820,10 +1515,6 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
        if (!md_write_start(mddev, bio))
                return false;
 
-       if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
-               if (!raid10_handle_discard(mddev, bio))
-                       return true;
-
        /*
         * If this request crosses a chunk boundary, we need to split
         * it.
@@ -4063,7 +3754,7 @@ static int raid10_run(struct mddev *mddev)
 
        if (mddev->queue) {
                blk_queue_max_discard_sectors(mddev->queue,
-                                             UINT_MAX);
+                                             mddev->chunk_sectors);
                blk_queue_max_write_same_sectors(mddev->queue, 0);
                blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
                blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
index 1461fd5..79cd2b7 100644 (file)
@@ -179,6 +179,5 @@ enum r10bio_state {
        R10BIO_Previous,
 /* failfast devices did receive failfast requests. */
        R10BIO_FailFast,
-       R10BIO_Discard,
 };
 #endif
index e4d8446..04b13cd 100644 (file)
@@ -88,13 +88,15 @@ enum pulse8_msgcodes {
        MSGCODE_SET_PHYSICAL_ADDRESS,   /* 0x20 */
        MSGCODE_GET_DEVICE_TYPE,
        MSGCODE_SET_DEVICE_TYPE,
-       MSGCODE_GET_HDMI_VERSION,
+       MSGCODE_GET_HDMI_VERSION,       /* Removed in FW >= 10 */
        MSGCODE_SET_HDMI_VERSION,
        MSGCODE_GET_OSD_NAME,
        MSGCODE_SET_OSD_NAME,
        MSGCODE_WRITE_EEPROM,
        MSGCODE_GET_ADAPTER_TYPE,       /* 0x28 */
        MSGCODE_SET_ACTIVE_SOURCE,
+       MSGCODE_GET_AUTO_POWER_ON,      /* New for FW >= 10 */
+       MSGCODE_SET_AUTO_POWER_ON,
 
        MSGCODE_FRAME_EOM = 0x80,
        MSGCODE_FRAME_ACK = 0x40,
@@ -143,6 +145,8 @@ static const char * const pulse8_msgnames[] = {
        "WRITE_EEPROM",
        "GET_ADAPTER_TYPE",
        "SET_ACTIVE_SOURCE",
+       "GET_AUTO_POWER_ON",
+       "SET_AUTO_POWER_ON",
 };
 
 static const char *pulse8_msgname(u8 cmd)
@@ -579,12 +583,14 @@ static int pulse8_cec_adap_log_addr(struct cec_adapter *adap, u8 log_addr)
        if (err)
                goto unlock;
 
-       cmd[0] = MSGCODE_SET_HDMI_VERSION;
-       cmd[1] = adap->log_addrs.cec_version;
-       err = pulse8_send_and_wait(pulse8, cmd, 2,
-                                  MSGCODE_COMMAND_ACCEPTED, 0);
-       if (err)
-               goto unlock;
+       if (pulse8->vers < 10) {
+               cmd[0] = MSGCODE_SET_HDMI_VERSION;
+               cmd[1] = adap->log_addrs.cec_version;
+               err = pulse8_send_and_wait(pulse8, cmd, 2,
+                                          MSGCODE_COMMAND_ACCEPTED, 0);
+               if (err)
+                       goto unlock;
+       }
 
        if (adap->log_addrs.osd_name[0]) {
                size_t osd_len = strlen(adap->log_addrs.osd_name);
@@ -650,7 +656,6 @@ static void pulse8_disconnect(struct serio *serio)
        struct pulse8 *pulse8 = serio_get_drvdata(serio);
 
        cec_unregister_adapter(pulse8->adap);
-       pulse8->serio = NULL;
        serio_set_drvdata(serio, NULL);
        serio_close(serio);
 }
@@ -692,6 +697,14 @@ static int pulse8_setup(struct pulse8 *pulse8, struct serio *serio,
        dev_dbg(pulse8->dev, "Autonomous mode: %s",
                data[0] ? "on" : "off");
 
+       if (pulse8->vers >= 10) {
+               cmd[0] = MSGCODE_GET_AUTO_POWER_ON;
+               err = pulse8_send_and_wait(pulse8, cmd, 1, cmd[0], 1);
+               if (!err)
+                       dev_dbg(pulse8->dev, "Auto Power On: %s",
+                               data[0] ? "on" : "off");
+       }
+
        cmd[0] = MSGCODE_GET_DEVICE_TYPE;
        err = pulse8_send_and_wait(pulse8, cmd, 1, cmd[0], 1);
        if (err)
@@ -753,12 +766,15 @@ static int pulse8_setup(struct pulse8 *pulse8, struct serio *serio,
        dev_dbg(pulse8->dev, "Physical address: %x.%x.%x.%x\n",
                cec_phys_addr_exp(*pa));
 
-       cmd[0] = MSGCODE_GET_HDMI_VERSION;
-       err = pulse8_send_and_wait(pulse8, cmd, 1, cmd[0], 1);
-       if (err)
-               return err;
-       log_addrs->cec_version = data[0];
-       dev_dbg(pulse8->dev, "CEC version: %d\n", log_addrs->cec_version);
+       log_addrs->cec_version = CEC_OP_CEC_VERSION_1_4;
+       if (pulse8->vers < 10) {
+               cmd[0] = MSGCODE_GET_HDMI_VERSION;
+               err = pulse8_send_and_wait(pulse8, cmd, 1, cmd[0], 1);
+               if (err)
+                       return err;
+               log_addrs->cec_version = data[0];
+               dev_dbg(pulse8->dev, "CEC version: %d\n", log_addrs->cec_version);
+       }
 
        cmd[0] = MSGCODE_GET_OSD_NAME;
        err = pulse8_send_and_wait(pulse8, cmd, 1, cmd[0], 0);
@@ -830,8 +846,10 @@ static int pulse8_connect(struct serio *serio, struct serio_driver *drv)
        pulse8->adap = cec_allocate_adapter(&pulse8_cec_adap_ops, pulse8,
                                            dev_name(&serio->dev), caps, 1);
        err = PTR_ERR_OR_ZERO(pulse8->adap);
-       if (err < 0)
-               goto free_device;
+       if (err < 0) {
+               kfree(pulse8);
+               return err;
+       }
 
        pulse8->dev = &serio->dev;
        serio_set_drvdata(serio, pulse8);
@@ -874,8 +892,6 @@ close_serio:
        serio_close(serio);
 delete_adap:
        cec_delete_adapter(pulse8->adap);
-free_device:
-       kfree(pulse8);
        return err;
 }
 
index 4eab6d8..89e3839 100644 (file)
@@ -414,6 +414,17 @@ static int __vb2_queue_alloc(struct vb2_queue *q, enum vb2_memory memory,
                vb->index = q->num_buffers + buffer;
                vb->type = q->type;
                vb->memory = memory;
+               /*
+                * We need to set these flags here so that the videobuf2 core
+                * will call ->prepare()/->finish() cache sync/flush on vb2
+                * buffers when appropriate. However, we can avoid explicit
+                * ->prepare() and ->finish() cache sync for DMABUF buffers,
+                * because DMA exporter takes care of it.
+                */
+               if (q->memory != VB2_MEMORY_DMABUF) {
+                       vb->need_cache_sync_on_prepare = 1;
+                       vb->need_cache_sync_on_finish = 1;
+               }
                for (plane = 0; plane < num_planes; ++plane) {
                        vb->planes[plane].length = plane_sizes[plane];
                        vb->planes[plane].min_length = plane_sizes[plane];
index a3cb104..7e152bb 100644 (file)
@@ -253,17 +253,31 @@ config VIDEO_MEDIATEK_VCODEC
        depends on MTK_IOMMU || COMPILE_TEST
        depends on VIDEO_DEV && VIDEO_V4L2
        depends on ARCH_MEDIATEK || COMPILE_TEST
+       depends on VIDEO_MEDIATEK_VPU || MTK_SCP
+       # The two following lines ensure we have the same state ("m" or "y") as
+       # our dependencies, to avoid missing symbols during link.
+       depends on VIDEO_MEDIATEK_VPU || !VIDEO_MEDIATEK_VPU
+       depends on MTK_SCP || !MTK_SCP
        select VIDEOBUF2_DMA_CONTIG
        select V4L2_MEM2MEM_DEV
-       select VIDEO_MEDIATEK_VPU
-       select MTK_SCP
+       select VIDEO_MEDIATEK_VCODEC_VPU if VIDEO_MEDIATEK_VPU
+       select VIDEO_MEDIATEK_VCODEC_SCP if MTK_SCP
        help
-           Mediatek video codec driver provides HW capability to
-           encode and decode in a range of video formats
-           This driver rely on VPU driver to communicate with VPU.
+         Mediatek video codec driver provides HW capability to
+         encode and decode in a range of video formats on MT8173
+         and MT8183.
+
+         Note that support for MT8173 requires VIDEO_MEDIATEK_VPU to
+         also be selected. Support for MT8183 depends on MTK_SCP.
+
+         To compile this driver as modules, choose M here: the
+         modules will be called mtk-vcodec-dec and mtk-vcodec-enc.
+
+config VIDEO_MEDIATEK_VCODEC_VPU
+       bool
 
-           To compile this driver as modules, choose M here: the
-           modules will be called mtk-vcodec-dec and mtk-vcodec-enc.
+config VIDEO_MEDIATEK_VCODEC_SCP
+       bool
 
 config VIDEO_MEM2MEM_DEINTERLACE
        tristate "Deinterlace support"
index cd902b1..63fce1b 100644 (file)
@@ -307,6 +307,7 @@ static int mmpcam_platform_remove(struct platform_device *pdev)
  * Suspend/resume support.
  */
 
+#ifdef CONFIG_PM
 static int mmpcam_runtime_resume(struct device *dev)
 {
        struct mmp_camera *cam = dev_get_drvdata(dev);
@@ -352,6 +353,7 @@ static int __maybe_unused mmpcam_resume(struct device *dev)
                return mccic_resume(&cam->mcam);
        return 0;
 }
+#endif
 
 static const struct dev_pm_ops mmpcam_pm_ops = {
        SET_RUNTIME_PM_OPS(mmpcam_runtime_suspend, mmpcam_runtime_resume, NULL)
index f679c6e..4618d43 100644 (file)
@@ -24,4 +24,12 @@ mtk-vcodec-enc-y := venc/venc_vp8_if.o \
 
 mtk-vcodec-common-y := mtk_vcodec_intr.o \
                mtk_vcodec_util.o \
-               mtk_vcodec_fw.o
+               mtk_vcodec_fw.o \
+
+ifneq ($(CONFIG_VIDEO_MEDIATEK_VCODEC_VPU),)
+mtk-vcodec-common-y += mtk_vcodec_fw_vpu.o
+endif
+
+ifneq ($(CONFIG_VIDEO_MEDIATEK_VCODEC_SCP),)
+mtk-vcodec-common-y += mtk_vcodec_fw_scp.o
+endif
index d14bc20..145686d 100644 (file)
@@ -241,7 +241,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev)
        }
        dma_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(32));
 
-       dev->fw_handler = mtk_vcodec_fw_select(dev, fw_type, VPU_RST_DEC);
+       dev->fw_handler = mtk_vcodec_fw_select(dev, fw_type, DECODER);
        if (IS_ERR(dev->fw_handler))
                return PTR_ERR(dev->fw_handler);
 
index dcfa2c2..3be8a04 100644 (file)
@@ -293,7 +293,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev)
        }
        dma_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(32));
 
-       dev->fw_handler = mtk_vcodec_fw_select(dev, fw_type, VPU_RST_ENC);
+       dev->fw_handler = mtk_vcodec_fw_select(dev, fw_type, ENCODER);
        if (IS_ERR(dev->fw_handler))
                return PTR_ERR(dev->fw_handler);
 
index 6c2a256..94b39ae 100644 (file)
 // SPDX-License-Identifier: GPL-2.0
 
 #include "mtk_vcodec_fw.h"
+#include "mtk_vcodec_fw_priv.h"
 #include "mtk_vcodec_util.h"
 #include "mtk_vcodec_drv.h"
 
-struct mtk_vcodec_fw_ops {
-       int (*load_firmware)(struct mtk_vcodec_fw *fw);
-       unsigned int (*get_vdec_capa)(struct mtk_vcodec_fw *fw);
-       unsigned int (*get_venc_capa)(struct mtk_vcodec_fw *fw);
-       void * (*map_dm_addr)(struct mtk_vcodec_fw *fw, u32 dtcm_dmem_addr);
-       int (*ipi_register)(struct mtk_vcodec_fw *fw, int id,
-                           mtk_vcodec_ipi_handler handler, const char *name, void *priv);
-       int (*ipi_send)(struct mtk_vcodec_fw *fw, int id, void *buf,
-                       unsigned int len, unsigned int wait);
-};
-
-struct mtk_vcodec_fw {
-       enum mtk_vcodec_fw_type type;
-       const struct mtk_vcodec_fw_ops *ops;
-       struct platform_device *pdev;
-       struct mtk_scp *scp;
-};
-
-static int mtk_vcodec_vpu_load_firmware(struct mtk_vcodec_fw *fw)
-{
-       return vpu_load_firmware(fw->pdev);
-}
-
-static unsigned int mtk_vcodec_vpu_get_vdec_capa(struct mtk_vcodec_fw *fw)
-{
-       return vpu_get_vdec_hw_capa(fw->pdev);
-}
-
-static unsigned int mtk_vcodec_vpu_get_venc_capa(struct mtk_vcodec_fw *fw)
-{
-       return vpu_get_venc_hw_capa(fw->pdev);
-}
-
-static void *mtk_vcodec_vpu_map_dm_addr(struct mtk_vcodec_fw *fw,
-                                       u32 dtcm_dmem_addr)
-{
-       return vpu_mapping_dm_addr(fw->pdev, dtcm_dmem_addr);
-}
-
-static int mtk_vcodec_vpu_set_ipi_register(struct mtk_vcodec_fw *fw, int id,
-                                          mtk_vcodec_ipi_handler handler,
-                                          const char *name, void *priv)
-{
-       /*
-        * The handler we receive takes a void * as its first argument. We
-        * cannot change this because it needs to be passed down to the rproc
-        * subsystem when SCP is used. VPU takes a const argument, which is
-        * more constrained, so the conversion below is safe.
-        */
-       ipi_handler_t handler_const = (ipi_handler_t)handler;
-
-       return vpu_ipi_register(fw->pdev, id, handler_const, name, priv);
-}
-
-static int mtk_vcodec_vpu_ipi_send(struct mtk_vcodec_fw *fw, int id, void *buf,
-                                  unsigned int len, unsigned int wait)
-{
-       return vpu_ipi_send(fw->pdev, id, buf, len);
-}
-
-static const struct mtk_vcodec_fw_ops mtk_vcodec_vpu_msg = {
-       .load_firmware = mtk_vcodec_vpu_load_firmware,
-       .get_vdec_capa = mtk_vcodec_vpu_get_vdec_capa,
-       .get_venc_capa = mtk_vcodec_vpu_get_venc_capa,
-       .map_dm_addr = mtk_vcodec_vpu_map_dm_addr,
-       .ipi_register = mtk_vcodec_vpu_set_ipi_register,
-       .ipi_send = mtk_vcodec_vpu_ipi_send,
-};
-
-static int mtk_vcodec_scp_load_firmware(struct mtk_vcodec_fw *fw)
-{
-       return rproc_boot(scp_get_rproc(fw->scp));
-}
-
-static unsigned int mtk_vcodec_scp_get_vdec_capa(struct mtk_vcodec_fw *fw)
-{
-       return scp_get_vdec_hw_capa(fw->scp);
-}
-
-static unsigned int mtk_vcodec_scp_get_venc_capa(struct mtk_vcodec_fw *fw)
-{
-       return scp_get_venc_hw_capa(fw->scp);
-}
-
-static void *mtk_vcodec_vpu_scp_dm_addr(struct mtk_vcodec_fw *fw,
-                                       u32 dtcm_dmem_addr)
-{
-       return scp_mapping_dm_addr(fw->scp, dtcm_dmem_addr);
-}
-
-static int mtk_vcodec_scp_set_ipi_register(struct mtk_vcodec_fw *fw, int id,
-                                          mtk_vcodec_ipi_handler handler,
-                                          const char *name, void *priv)
-{
-       return scp_ipi_register(fw->scp, id, handler, priv);
-}
-
-static int mtk_vcodec_scp_ipi_send(struct mtk_vcodec_fw *fw, int id, void *buf,
-                                  unsigned int len, unsigned int wait)
-{
-       return scp_ipi_send(fw->scp, id, buf, len, wait);
-}
-
-static const struct mtk_vcodec_fw_ops mtk_vcodec_rproc_msg = {
-       .load_firmware = mtk_vcodec_scp_load_firmware,
-       .get_vdec_capa = mtk_vcodec_scp_get_vdec_capa,
-       .get_venc_capa = mtk_vcodec_scp_get_venc_capa,
-       .map_dm_addr = mtk_vcodec_vpu_scp_dm_addr,
-       .ipi_register = mtk_vcodec_scp_set_ipi_register,
-       .ipi_send = mtk_vcodec_scp_ipi_send,
-};
-
-static void mtk_vcodec_reset_handler(void *priv)
-{
-       struct mtk_vcodec_dev *dev = priv;
-       struct mtk_vcodec_ctx *ctx;
-
-       mtk_v4l2_err("Watchdog timeout!!");
-
-       mutex_lock(&dev->dev_mutex);
-       list_for_each_entry(ctx, &dev->ctx_list, list) {
-               ctx->state = MTK_STATE_ABORT;
-               mtk_v4l2_debug(0, "[%d] Change to state MTK_STATE_ABORT",
-                              ctx->id);
-       }
-       mutex_unlock(&dev->dev_mutex);
-}
-
 struct mtk_vcodec_fw *mtk_vcodec_fw_select(struct mtk_vcodec_dev *dev,
                                           enum mtk_vcodec_fw_type type,
-                                          enum rst_id rst_id)
+                                          enum mtk_vcodec_fw_use fw_use)
 {
-       const struct mtk_vcodec_fw_ops *ops;
-       struct mtk_vcodec_fw *fw;
-       struct platform_device *fw_pdev = NULL;
-       struct mtk_scp *scp = NULL;
-
        switch (type) {
        case VPU:
-               ops = &mtk_vcodec_vpu_msg;
-               fw_pdev = vpu_get_plat_device(dev->plat_dev);
-               if (!fw_pdev) {
-                       mtk_v4l2_err("firmware device is not ready");
-                       return ERR_PTR(-EINVAL);
-               }
-               vpu_wdt_reg_handler(fw_pdev, mtk_vcodec_reset_handler,
-                                   dev, rst_id);
-               break;
+               return mtk_vcodec_fw_vpu_init(dev, fw_use);
        case SCP:
-               ops = &mtk_vcodec_rproc_msg;
-               scp = scp_get(dev->plat_dev);
-               if (!scp) {
-                       mtk_v4l2_err("could not get vdec scp handle");
-                       return ERR_PTR(-EPROBE_DEFER);
-               }
-               break;
+               return mtk_vcodec_fw_scp_init(dev);
        default:
                mtk_v4l2_err("invalid vcodec fw type");
                return ERR_PTR(-EINVAL);
        }
-
-       fw = devm_kzalloc(&dev->plat_dev->dev, sizeof(*fw), GFP_KERNEL);
-       if (!fw)
-               return ERR_PTR(-EINVAL);
-
-       fw->type = type;
-       fw->ops = ops;
-       fw->pdev = fw_pdev;
-       fw->scp = scp;
-
-       return fw;
 }
 EXPORT_SYMBOL_GPL(mtk_vcodec_fw_select);
 
 void mtk_vcodec_fw_release(struct mtk_vcodec_fw *fw)
 {
-       switch (fw->type) {
-       case VPU:
-               put_device(&fw->pdev->dev);
-               break;
-       case SCP:
-               scp_put(fw->scp);
-               break;
-       }
+       fw->ops->release(fw);
 }
 EXPORT_SYMBOL_GPL(mtk_vcodec_fw_release);
 
index fadbbe6..539bb62 100644 (file)
@@ -15,6 +15,11 @@ enum mtk_vcodec_fw_type {
        SCP,
 };
 
+enum mtk_vcodec_fw_use {
+       DECODER,
+       ENCODER,
+};
+
 struct mtk_vcodec_fw;
 
 typedef void (*mtk_vcodec_ipi_handler) (void *data,
@@ -22,7 +27,7 @@ typedef void (*mtk_vcodec_ipi_handler) (void *data,
 
 struct mtk_vcodec_fw *mtk_vcodec_fw_select(struct mtk_vcodec_dev *dev,
                                           enum mtk_vcodec_fw_type type,
-                                          enum rst_id rst_id);
+                                          enum mtk_vcodec_fw_use fw_use);
 void mtk_vcodec_fw_release(struct mtk_vcodec_fw *fw);
 
 int mtk_vcodec_fw_load_firmware(struct mtk_vcodec_fw *fw);
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_priv.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_priv.h
new file mode 100644 (file)
index 0000000..b41e661
--- /dev/null
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _MTK_VCODEC_FW_PRIV_H_
+#define _MTK_VCODEC_FW_PRIV_H_
+
+#include "mtk_vcodec_fw.h"
+
+struct mtk_vcodec_dev;
+
+struct mtk_vcodec_fw {
+       enum mtk_vcodec_fw_type type;
+       const struct mtk_vcodec_fw_ops *ops;
+       struct platform_device *pdev;
+       struct mtk_scp *scp;
+};
+
+struct mtk_vcodec_fw_ops {
+       int (*load_firmware)(struct mtk_vcodec_fw *fw);
+       unsigned int (*get_vdec_capa)(struct mtk_vcodec_fw *fw);
+       unsigned int (*get_venc_capa)(struct mtk_vcodec_fw *fw);
+       void *(*map_dm_addr)(struct mtk_vcodec_fw *fw, u32 dtcm_dmem_addr);
+       int (*ipi_register)(struct mtk_vcodec_fw *fw, int id,
+                           mtk_vcodec_ipi_handler handler, const char *name,
+                           void *priv);
+       int (*ipi_send)(struct mtk_vcodec_fw *fw, int id, void *buf,
+                       unsigned int len, unsigned int wait);
+       void (*release)(struct mtk_vcodec_fw *fw);
+};
+
+#if IS_ENABLED(CONFIG_VIDEO_MEDIATEK_VCODEC_VPU)
+struct mtk_vcodec_fw *mtk_vcodec_fw_vpu_init(struct mtk_vcodec_dev *dev,
+                                            enum mtk_vcodec_fw_use fw_use);
+#else
+static inline struct mtk_vcodec_fw *
+mtk_vcodec_fw_vpu_init(struct mtk_vcodec_dev *dev,
+                      enum mtk_vcodec_fw_use fw_use)
+{
+       return ERR_PTR(-ENODEV);
+}
+#endif /* CONFIG_VIDEO_MEDIATEK_VCODEC_VPU */
+
+#if IS_ENABLED(CONFIG_VIDEO_MEDIATEK_VCODEC_SCP)
+struct mtk_vcodec_fw *mtk_vcodec_fw_scp_init(struct mtk_vcodec_dev *dev);
+#else
+static inline struct mtk_vcodec_fw *
+mtk_vcodec_fw_scp_init(struct mtk_vcodec_dev *dev)
+{
+       return ERR_PTR(-ENODEV);
+}
+#endif /* CONFIG_VIDEO_MEDIATEK_VCODEC_SCP */
+
+#endif /* _MTK_VCODEC_FW_PRIV_H_ */
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_scp.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_scp.c
new file mode 100644 (file)
index 0000000..d8e66b6
--- /dev/null
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "mtk_vcodec_fw_priv.h"
+#include "mtk_vcodec_util.h"
+#include "mtk_vcodec_drv.h"
+
+static int mtk_vcodec_scp_load_firmware(struct mtk_vcodec_fw *fw)
+{
+       return rproc_boot(scp_get_rproc(fw->scp));
+}
+
+static unsigned int mtk_vcodec_scp_get_vdec_capa(struct mtk_vcodec_fw *fw)
+{
+       return scp_get_vdec_hw_capa(fw->scp);
+}
+
+static unsigned int mtk_vcodec_scp_get_venc_capa(struct mtk_vcodec_fw *fw)
+{
+       return scp_get_venc_hw_capa(fw->scp);
+}
+
+static void *mtk_vcodec_vpu_scp_dm_addr(struct mtk_vcodec_fw *fw,
+                                       u32 dtcm_dmem_addr)
+{
+       return scp_mapping_dm_addr(fw->scp, dtcm_dmem_addr);
+}
+
+static int mtk_vcodec_scp_set_ipi_register(struct mtk_vcodec_fw *fw, int id,
+                                          mtk_vcodec_ipi_handler handler,
+                                          const char *name, void *priv)
+{
+       return scp_ipi_register(fw->scp, id, handler, priv);
+}
+
+static int mtk_vcodec_scp_ipi_send(struct mtk_vcodec_fw *fw, int id, void *buf,
+                                  unsigned int len, unsigned int wait)
+{
+       return scp_ipi_send(fw->scp, id, buf, len, wait);
+}
+
+static void mtk_vcodec_scp_release(struct mtk_vcodec_fw *fw)
+{
+       scp_put(fw->scp);
+}
+
+static const struct mtk_vcodec_fw_ops mtk_vcodec_rproc_msg = {
+       .load_firmware = mtk_vcodec_scp_load_firmware,
+       .get_vdec_capa = mtk_vcodec_scp_get_vdec_capa,
+       .get_venc_capa = mtk_vcodec_scp_get_venc_capa,
+       .map_dm_addr = mtk_vcodec_vpu_scp_dm_addr,
+       .ipi_register = mtk_vcodec_scp_set_ipi_register,
+       .ipi_send = mtk_vcodec_scp_ipi_send,
+       .release = mtk_vcodec_scp_release,
+};
+
+struct mtk_vcodec_fw *mtk_vcodec_fw_scp_init(struct mtk_vcodec_dev *dev)
+{
+       struct mtk_vcodec_fw *fw;
+       struct mtk_scp *scp;
+
+       scp = scp_get(dev->plat_dev);
+       if (!scp) {
+               mtk_v4l2_err("could not get vdec scp handle");
+               return ERR_PTR(-EPROBE_DEFER);
+       }
+
+       fw = devm_kzalloc(&dev->plat_dev->dev, sizeof(*fw), GFP_KERNEL);
+       fw->type = SCP;
+       fw->ops = &mtk_vcodec_rproc_msg;
+       fw->scp = scp;
+
+       return fw;
+}
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_vpu.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_fw_vpu.c
new file mode 100644 (file)
index 0000000..cd27f63
--- /dev/null
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "mtk_vcodec_fw_priv.h"
+#include "mtk_vcodec_util.h"
+#include "mtk_vcodec_drv.h"
+
+static int mtk_vcodec_vpu_load_firmware(struct mtk_vcodec_fw *fw)
+{
+       return vpu_load_firmware(fw->pdev);
+}
+
+static unsigned int mtk_vcodec_vpu_get_vdec_capa(struct mtk_vcodec_fw *fw)
+{
+       return vpu_get_vdec_hw_capa(fw->pdev);
+}
+
+static unsigned int mtk_vcodec_vpu_get_venc_capa(struct mtk_vcodec_fw *fw)
+{
+       return vpu_get_venc_hw_capa(fw->pdev);
+}
+
+static void *mtk_vcodec_vpu_map_dm_addr(struct mtk_vcodec_fw *fw,
+                                       u32 dtcm_dmem_addr)
+{
+       return vpu_mapping_dm_addr(fw->pdev, dtcm_dmem_addr);
+}
+
+static int mtk_vcodec_vpu_set_ipi_register(struct mtk_vcodec_fw *fw, int id,
+                                          mtk_vcodec_ipi_handler handler,
+                                          const char *name, void *priv)
+{
+       /*
+        * The handler we receive takes a void * as its first argument. We
+        * cannot change this because it needs to be passed down to the rproc
+        * subsystem when SCP is used. VPU takes a const argument, which is
+        * more constrained, so the conversion below is safe.
+        */
+       ipi_handler_t handler_const = (ipi_handler_t)handler;
+
+       return vpu_ipi_register(fw->pdev, id, handler_const, name, priv);
+}
+
+static int mtk_vcodec_vpu_ipi_send(struct mtk_vcodec_fw *fw, int id, void *buf,
+                                  unsigned int len, unsigned int wait)
+{
+       return vpu_ipi_send(fw->pdev, id, buf, len);
+}
+
+static void mtk_vcodec_vpu_release(struct mtk_vcodec_fw *fw)
+{
+       put_device(&fw->pdev->dev);
+}
+
+static void mtk_vcodec_vpu_reset_handler(void *priv)
+{
+       struct mtk_vcodec_dev *dev = priv;
+       struct mtk_vcodec_ctx *ctx;
+
+       mtk_v4l2_err("Watchdog timeout!!");
+
+       mutex_lock(&dev->dev_mutex);
+       list_for_each_entry(ctx, &dev->ctx_list, list) {
+               ctx->state = MTK_STATE_ABORT;
+               mtk_v4l2_debug(0, "[%d] Change to state MTK_STATE_ABORT",
+                              ctx->id);
+       }
+       mutex_unlock(&dev->dev_mutex);
+}
+
+static const struct mtk_vcodec_fw_ops mtk_vcodec_vpu_msg = {
+       .load_firmware = mtk_vcodec_vpu_load_firmware,
+       .get_vdec_capa = mtk_vcodec_vpu_get_vdec_capa,
+       .get_venc_capa = mtk_vcodec_vpu_get_venc_capa,
+       .map_dm_addr = mtk_vcodec_vpu_map_dm_addr,
+       .ipi_register = mtk_vcodec_vpu_set_ipi_register,
+       .ipi_send = mtk_vcodec_vpu_ipi_send,
+       .release = mtk_vcodec_vpu_release,
+};
+
+struct mtk_vcodec_fw *mtk_vcodec_fw_vpu_init(struct mtk_vcodec_dev *dev,
+                                            enum mtk_vcodec_fw_use fw_use)
+{
+       struct platform_device *fw_pdev;
+       struct mtk_vcodec_fw *fw;
+       enum rst_id rst_id;
+
+       switch (fw_use) {
+       case ENCODER:
+               rst_id = VPU_RST_ENC;
+               break;
+       case DECODER:
+       default:
+               rst_id = VPU_RST_DEC;
+               break;
+       }
+
+       fw_pdev = vpu_get_plat_device(dev->plat_dev);
+       if (!fw_pdev) {
+               mtk_v4l2_err("firmware device is not ready");
+               return ERR_PTR(-EINVAL);
+       }
+       vpu_wdt_reg_handler(fw_pdev, mtk_vcodec_vpu_reset_handler, dev, rst_id);
+
+       fw = devm_kzalloc(&dev->plat_dev->dev, sizeof(*fw), GFP_KERNEL);
+       fw->type = VPU;
+       fw->ops = &mtk_vcodec_vpu_msg;
+       fw->pdev = fw_pdev;
+
+       return fw;
+}
index 7b79a33..05c9fbd 100644 (file)
@@ -243,8 +243,19 @@ struct venc_controls {
 
        u32 header_mode;
 
-       u32 profile;
-       u32 level;
+       struct {
+               u32 h264;
+               u32 mpeg4;
+               u32 hevc;
+               u32 vp8;
+               u32 vp9;
+       } profile;
+       struct {
+               u32 h264;
+               u32 mpeg4;
+               u32 hevc;
+               u32 vp9;
+       } level;
 };
 
 struct venus_buffer {
index 57877ea..a9538c2 100644 (file)
@@ -794,7 +794,7 @@ skip_pmdomains:
        return 0;
 
 opp_dl_add_err:
-       dev_pm_domain_detach(core->opp_pmdomain, true);
+       dev_pm_opp_detach_genpd(core->opp_table);
 opp_attach_err:
        if (core->pd_dl_venus) {
                device_link_del(core->pd_dl_venus);
@@ -832,7 +832,7 @@ skip_pmdomains:
        if (core->opp_dl_venus)
                device_link_del(core->opp_dl_venus);
 
-       dev_pm_domain_detach(core->opp_pmdomain, true);
+       dev_pm_opp_detach_genpd(core->opp_table);
 }
 
 static int core_get_v4(struct device *dev)
index f8b1484..4724652 100644 (file)
@@ -537,6 +537,7 @@ static int venc_set_properties(struct venus_inst *inst)
        struct hfi_quantization quant;
        struct hfi_quantization_range quant_range;
        u32 ptype, rate_control, bitrate;
+       u32 profile, level;
        int ret;
 
        ret = venus_helper_set_work_mode(inst, VIDC_WORK_MODE_2);
@@ -684,7 +685,35 @@ static int venc_set_properties(struct venus_inst *inst)
        if (ret)
                return ret;
 
-       ret = venus_helper_set_profile_level(inst, ctr->profile, ctr->level);
+       switch (inst->hfi_codec) {
+       case HFI_VIDEO_CODEC_H264:
+               profile = ctr->profile.h264;
+               level = ctr->level.h264;
+               break;
+       case HFI_VIDEO_CODEC_MPEG4:
+               profile = ctr->profile.mpeg4;
+               level = ctr->level.mpeg4;
+               break;
+       case HFI_VIDEO_CODEC_VP8:
+               profile = ctr->profile.vp8;
+               level = 0;
+               break;
+       case HFI_VIDEO_CODEC_VP9:
+               profile = ctr->profile.vp9;
+               level = ctr->level.vp9;
+               break;
+       case HFI_VIDEO_CODEC_HEVC:
+               profile = ctr->profile.hevc;
+               level = ctr->level.hevc;
+               break;
+       case HFI_VIDEO_CODEC_MPEG2:
+       default:
+               profile = 0;
+               level = 0;
+               break;
+       }
+
+       ret = venus_helper_set_profile_level(inst, profile, level);
        if (ret)
                return ret;
 
index 0708b3b..cf860e6 100644 (file)
@@ -103,15 +103,25 @@ static int venc_op_s_ctrl(struct v4l2_ctrl *ctrl)
                ctr->h264_entropy_mode = ctrl->val;
                break;
        case V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE:
+               ctr->profile.mpeg4 = ctrl->val;
+               break;
        case V4L2_CID_MPEG_VIDEO_H264_PROFILE:
+               ctr->profile.h264 = ctrl->val;
+               break;
        case V4L2_CID_MPEG_VIDEO_HEVC_PROFILE:
+               ctr->profile.hevc = ctrl->val;
+               break;
        case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:
-               ctr->profile = ctrl->val;
+               ctr->profile.vp8 = ctrl->val;
                break;
        case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL:
+               ctr->level.mpeg4 = ctrl->val;
+               break;
        case V4L2_CID_MPEG_VIDEO_H264_LEVEL:
+               ctr->level.h264 = ctrl->val;
+               break;
        case V4L2_CID_MPEG_VIDEO_HEVC_LEVEL:
-               ctr->level = ctrl->val;
+               ctr->level.hevc = ctrl->val;
                break;
        case V4L2_CID_MPEG_VIDEO_H264_I_FRAME_QP:
                ctr->h264_i_qp = ctrl->val;
index 5051a5e..65a136c 100644 (file)
@@ -151,15 +151,12 @@ static inline u32 mtk_chk_period(struct mtk_ir *ir)
 {
        u32 val;
 
-       /* Period of raw software sampling in ns */
-       val = DIV_ROUND_CLOSEST(1000000000ul,
-                               clk_get_rate(ir->bus) / ir->data->div);
-
        /*
         * Period for software decoder used in the
         * unit of raw software sampling
         */
-       val = DIV_ROUND_CLOSEST(MTK_IR_SAMPLE, val);
+       val = DIV_ROUND_CLOSEST(clk_get_rate(ir->bus),
+                               USEC_PER_SEC * ir->data->div / MTK_IR_SAMPLE);
 
        dev_dbg(ir->dev, "@pwm clk  = \t%lu\n",
                clk_get_rate(ir->bus) / ir->data->div);
@@ -412,7 +409,7 @@ static int mtk_ir_probe(struct platform_device *pdev)
        mtk_irq_enable(ir, MTK_IRINT_EN);
 
        dev_info(dev, "Initialized MT7623 IR driver, sample period = %dus\n",
-                DIV_ROUND_CLOSEST(MTK_IR_SAMPLE, 1000));
+                MTK_IR_SAMPLE);
 
        return 0;
 
index 74b0549..fc64d0c 100644 (file)
@@ -4,36 +4,43 @@
  * validate the existing APIs in the media subsystem. It can also aid
  * developers working on userspace applications.
  *
- * When this module is loaded, it will attempt to modprobe 'dvb_vidtv_tuner' and 'dvb_vidtv_demod'.
+ * When this module is loaded, it will attempt to modprobe 'dvb_vidtv_tuner'
+ * and 'dvb_vidtv_demod'.
  *
  * Copyright (C) 2020 Daniel W. S. Almeida
  */
 
+#include <linux/dev_printk.h>
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
-#include <linux/dev_printk.h>
 #include <linux/time.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
 #include "vidtv_bridge.h"
+#include "vidtv_common.h"
 #include "vidtv_demod.h"
-#include "vidtv_tuner.h"
-#include "vidtv_ts.h"
 #include "vidtv_mux.h"
-#include "vidtv_common.h"
+#include "vidtv_ts.h"
+#include "vidtv_tuner.h"
 
-//#define MUX_BUF_MAX_SZ
-//#define MUX_BUF_MIN_SZ
+#define MUX_BUF_MIN_SZ 90164
+#define MUX_BUF_MAX_SZ (MUX_BUF_MIN_SZ * 10)
 #define TUNER_DEFAULT_ADDR 0x68
 #define DEMOD_DEFAULT_ADDR 0x60
+#define VIDTV_DEFAULT_NETWORK_ID 0xff44
+#define VIDTV_DEFAULT_NETWORK_NAME "LinuxTV.org"
+#define VIDTV_DEFAULT_TS_ID 0x4081
 
-/* LNBf fake parameters: ranges used by an Universal (extended) European LNBf */
-#define LNB_CUT_FREQUENCY      11700000
-#define LNB_LOW_FREQ           9750000
-#define LNB_HIGH_FREQ          10600000
-
+/*
+ * The LNBf fake parameters here are the ranges used by an
+ * Universal (extended) European LNBf, which is likely the most common LNBf
+ * found on Satellite digital TV system nowadays.
+ */
+#define LNB_CUT_FREQUENCY      11700000        /* high IF frequency */
+#define LNB_LOW_FREQ           9750000         /* low IF frequency */
+#define LNB_HIGH_FREQ          10600000        /* transition frequency */
 
 static unsigned int drop_tslock_prob_on_low_snr;
 module_param(drop_tslock_prob_on_low_snr, uint, 0);
@@ -92,7 +99,8 @@ MODULE_PARM_DESC(si_period_msec, "How often to send SI packets. Default: 40ms");
 
 static unsigned int pcr_period_msec = 40;
 module_param(pcr_period_msec, uint, 0);
-MODULE_PARM_DESC(pcr_period_msec, "How often to send PCR packets. Default: 40ms");
+MODULE_PARM_DESC(pcr_period_msec,
+                "How often to send PCR packets. Default: 40ms");
 
 static unsigned int mux_rate_kbytes_sec = 4096;
 module_param(mux_rate_kbytes_sec, uint, 0);
@@ -104,16 +112,14 @@ MODULE_PARM_DESC(pcr_pid, "PCR PID for all channels: defaults to 0x200");
 
 static unsigned int mux_buf_sz_pkts;
 module_param(mux_buf_sz_pkts, uint, 0);
-MODULE_PARM_DESC(mux_buf_sz_pkts, "Size for the internal mux buffer in multiples of 188 bytes");
-
-#define MUX_BUF_MIN_SZ 90164
-#define MUX_BUF_MAX_SZ (MUX_BUF_MIN_SZ * 10)
+MODULE_PARM_DESC(mux_buf_sz_pkts,
+                "Size for the internal mux buffer in multiples of 188 bytes");
 
 static u32 vidtv_bridge_mux_buf_sz_for_mux_rate(void)
 {
        u32 max_elapsed_time_msecs =  VIDTV_MAX_SLEEP_USECS / USEC_PER_MSEC;
-       u32 nbytes_expected;
        u32 mux_buf_sz = mux_buf_sz_pkts * TS_PACKET_LEN;
+       u32 nbytes_expected;
 
        nbytes_expected = mux_rate_kbytes_sec;
        nbytes_expected *= max_elapsed_time_msecs;
@@ -143,14 +149,12 @@ static bool vidtv_bridge_check_demod_lock(struct vidtv_dvb *dvb, u32 n)
                          FE_HAS_LOCK);
 }
 
-static void
-vidtv_bridge_on_new_pkts_avail(void *priv, u8 *buf, u32 npkts)
+/*
+ * called on a separate thread by the mux when new packets become available
+ */
+static void vidtv_bridge_on_new_pkts_avail(void *priv, u8 *buf, u32 npkts)
 {
-       /*
-        * called on a separate thread by the mux when new packets become
-        * available
-        */
-       struct vidtv_dvb *dvb = (struct vidtv_dvb *)priv;
+       struct vidtv_dvb *dvb = priv;
 
        /* drop packets if we lose the lock */
        if (vidtv_bridge_check_demod_lock(dvb, 0))
@@ -159,7 +163,17 @@ vidtv_bridge_on_new_pkts_avail(void *priv, u8 *buf, u32 npkts)
 
 static int vidtv_start_streaming(struct vidtv_dvb *dvb)
 {
-       struct vidtv_mux_init_args mux_args = {0};
+       struct vidtv_mux_init_args mux_args = {
+               .mux_rate_kbytes_sec         = mux_rate_kbytes_sec,
+               .on_new_packets_available_cb = vidtv_bridge_on_new_pkts_avail,
+               .pcr_period_usecs            = pcr_period_msec * USEC_PER_MSEC,
+               .si_period_usecs             = si_period_msec * USEC_PER_MSEC,
+               .pcr_pid                     = pcr_pid,
+               .transport_stream_id         = VIDTV_DEFAULT_TS_ID,
+               .network_id                  = VIDTV_DEFAULT_NETWORK_ID,
+               .network_name                = VIDTV_DEFAULT_NETWORK_NAME,
+               .priv                        = dvb,
+       };
        struct device *dev = &dvb->pdev->dev;
        u32 mux_buf_sz;
 
@@ -168,19 +182,17 @@ static int vidtv_start_streaming(struct vidtv_dvb *dvb)
                return 0;
        }
 
-       mux_buf_sz = (mux_buf_sz_pkts) ? mux_buf_sz_pkts : vidtv_bridge_mux_buf_sz_for_mux_rate();
+       if (mux_buf_sz_pkts)
+               mux_buf_sz = mux_buf_sz_pkts;
+       else
+               mux_buf_sz = vidtv_bridge_mux_buf_sz_for_mux_rate();
 
-       mux_args.mux_rate_kbytes_sec         = mux_rate_kbytes_sec;
-       mux_args.on_new_packets_available_cb = vidtv_bridge_on_new_pkts_avail;
-       mux_args.mux_buf_sz                  = mux_buf_sz;
-       mux_args.pcr_period_usecs            = pcr_period_msec * 1000;
-       mux_args.si_period_usecs             = si_period_msec * 1000;
-       mux_args.pcr_pid                     = pcr_pid;
-       mux_args.transport_stream_id         = VIDTV_DEFAULT_TS_ID;
-       mux_args.priv                        = dvb;
+       mux_args.mux_buf_sz  = mux_buf_sz;
 
        dvb->streaming = true;
-       dvb->mux = vidtv_mux_init(dvb->fe[0], dev, mux_args);
+       dvb->mux = vidtv_mux_init(dvb->fe[0], dev, &mux_args);
+       if (!dvb->mux)
+               return -ENOMEM;
        vidtv_mux_start_thread(dvb->mux);
 
        dev_dbg_ratelimited(dev, "Started streaming\n");
@@ -204,8 +216,8 @@ static int vidtv_start_feed(struct dvb_demux_feed *feed)
 {
        struct dvb_demux *demux = feed->demux;
        struct vidtv_dvb *dvb   = demux->priv;
-       int rc;
        int ret;
+       int rc;
 
        if (!demux->dmx.frontend)
                return -EINVAL;
@@ -243,9 +255,9 @@ static int vidtv_stop_feed(struct dvb_demux_feed *feed)
 
 static struct dvb_frontend *vidtv_get_frontend_ptr(struct i2c_client *c)
 {
-       /* the demod will set this when its probe function runs */
        struct vidtv_demod_state *state = i2c_get_clientdata(c);
 
+       /* the demod will set this when its probe function runs */
        return &state->frontend;
 }
 
@@ -253,6 +265,11 @@ static int vidtv_master_xfer(struct i2c_adapter *i2c_adap,
                             struct i2c_msg msgs[],
                             int num)
 {
+       /*
+        * Right now, this virtual driver doesn't really send or receive
+        * messages from I2C. A real driver will require an implementation
+        * here.
+        */
        return 0;
 }
 
@@ -320,11 +337,10 @@ static int vidtv_bridge_dmxdev_init(struct vidtv_dvb *dvb)
 
 static int vidtv_bridge_probe_demod(struct vidtv_dvb *dvb, u32 n)
 {
-       struct vidtv_demod_config cfg = {};
-
-       cfg.drop_tslock_prob_on_low_snr     = drop_tslock_prob_on_low_snr;
-       cfg.recover_tslock_prob_on_good_snr = recover_tslock_prob_on_good_snr;
-
+       struct vidtv_demod_config cfg = {
+               .drop_tslock_prob_on_low_snr     = drop_tslock_prob_on_low_snr,
+               .recover_tslock_prob_on_good_snr = recover_tslock_prob_on_good_snr,
+       };
        dvb->i2c_client_demod[n] = dvb_module_probe("dvb_vidtv_demod",
                                                    NULL,
                                                    &dvb->i2c_adapter,
@@ -343,14 +359,14 @@ static int vidtv_bridge_probe_demod(struct vidtv_dvb *dvb, u32 n)
 
 static int vidtv_bridge_probe_tuner(struct vidtv_dvb *dvb, u32 n)
 {
-       struct vidtv_tuner_config cfg = {};
+       struct vidtv_tuner_config cfg = {
+               .fe                       = dvb->fe[n],
+               .mock_power_up_delay_msec = mock_power_up_delay_msec,
+               .mock_tune_delay_msec     = mock_tune_delay_msec,
+       };
        u32 freq;
        int i;
 
-       cfg.fe                       = dvb->fe[n];
-       cfg.mock_power_up_delay_msec = mock_power_up_delay_msec;
-       cfg.mock_tune_delay_msec     = mock_tune_delay_msec;
-
        /* TODO: check if the frequencies are at a valid range */
 
        memcpy(cfg.vidtv_valid_dvb_t_freqs,
@@ -389,9 +405,7 @@ static int vidtv_bridge_probe_tuner(struct vidtv_dvb *dvb, u32 n)
 
 static int vidtv_bridge_dvb_init(struct vidtv_dvb *dvb)
 {
-       int ret;
-       int i;
-       int j;
+       int ret, i, j;
 
        ret = vidtv_bridge_i2c_register_adap(dvb);
        if (ret < 0)
index 78fe847..2528ada 100644 (file)
 #include <linux/i2c.h>
 #include <linux/platform_device.h>
 #include <linux/types.h>
+
 #include <media/dmxdev.h>
 #include <media/dvb_demux.h>
 #include <media/dvb_frontend.h>
+
 #include "vidtv_mux.h"
 
 /**
@@ -32,7 +34,7 @@
  * @adapter: Represents a DTV adapter. See 'dvb_register_adapter'.
  * @demux: The demux used by the dvb_dmx_swfilter_packets() call.
  * @dmx_dev: Represents a demux device.
- * @dmx_frontend: The frontends associated with the demux.
+ * @dmx_fe: The frontends associated with the demux.
  * @i2c_adapter: The i2c_adapter associated with the bridge driver.
  * @i2c_client_demod: The i2c_clients associated with the demodulator modules.
  * @i2c_client_tuner: The i2c_clients associated with the tuner modules.
index f2b97cf..7838e62 100644 (file)
@@ -9,6 +9,7 @@
  * When vidtv boots, it will create some hardcoded channels.
  * Their services will be concatenated to populate the SDT.
  * Their programs will be concatenated to populate the PAT
+ * Their events will be concatenated to populate the EIT
  * For each program in the PAT, a PMT section will be created
  * The PMT section for a channel will be assigned its streams.
  * Every stream will have its corresponding encoder polled to produce TS packets
  * Copyright (C) 2020 Daniel W. S. Almeida
  */
 
-#include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/dev_printk.h>
 #include <linux/ratelimit.h>
+#include <linux/slab.h>
+#include <linux/types.h>
 
 #include "vidtv_channel.h"
-#include "vidtv_psi.h"
+#include "vidtv_common.h"
 #include "vidtv_encoder.h"
 #include "vidtv_mux.h"
-#include "vidtv_common.h"
+#include "vidtv_psi.h"
 #include "vidtv_s302m.h"
 
 static void vidtv_channel_encoder_destroy(struct vidtv_encoder *e)
 {
-       struct vidtv_encoder *curr = e;
        struct vidtv_encoder *tmp = NULL;
+       struct vidtv_encoder *curr = e;
 
        while (curr) {
                /* forward the call to the derived type */
@@ -44,55 +45,88 @@ static void vidtv_channel_encoder_destroy(struct vidtv_encoder *e)
 }
 
 #define ENCODING_ISO8859_15 "\x0b"
+#define TS_NIT_PID     0x10
 
+/*
+ * init an audio only channel with a s302m encoder
+ */
 struct vidtv_channel
 *vidtv_channel_s302m_init(struct vidtv_channel *head, u16 transport_stream_id)
 {
-       /*
-        * init an audio only channel with a s302m encoder
-        */
-       const u16 s302m_service_id          = 0x880;
-       const u16 s302m_program_num         = 0x880;
-       const u16 s302m_program_pid         = 0x101; /* packet id for PMT*/
-       const u16 s302m_es_pid              = 0x111; /* packet id for the ES */
        const __be32 s302m_fid              = cpu_to_be32(VIDTV_S302M_FORMAT_IDENTIFIER);
-
-       char *name = ENCODING_ISO8859_15 "Beethoven";
+       char *event_text = ENCODING_ISO8859_15 "Bagatelle No. 25 in A minor for solo piano, also known as F\xfcr Elise, composed by Ludwig van Beethoven";
+       char *event_name = ENCODING_ISO8859_15 "Ludwig van Beethoven: F\xfcr Elise";
+       struct vidtv_s302m_encoder_init_args encoder_args = {};
+       char *iso_language_code = ENCODING_ISO8859_15 "eng";
        char *provider = ENCODING_ISO8859_15 "LinuxTV.org";
+       char *name = ENCODING_ISO8859_15 "Beethoven";
+       const u16 s302m_es_pid              = 0x111; /* packet id for the ES */
+       const u16 s302m_program_pid         = 0x101; /* packet id for PMT*/
+       const u16 s302m_service_id          = 0x880;
+       const u16 s302m_program_num         = 0x880;
+       const u16 s302m_beethoven_event_id  = 1;
+       struct vidtv_channel *s302m;
 
-       struct vidtv_channel *s302m = kzalloc(sizeof(*s302m), GFP_KERNEL);
-       struct vidtv_s302m_encoder_init_args encoder_args = {};
+       s302m = kzalloc(sizeof(*s302m), GFP_KERNEL);
+       if (!s302m)
+               return NULL;
 
        s302m->name = kstrdup(name, GFP_KERNEL);
+       if (!s302m->name)
+               goto free_s302m;
 
-       s302m->service = vidtv_psi_sdt_service_init(NULL, s302m_service_id);
+       s302m->service = vidtv_psi_sdt_service_init(NULL, s302m_service_id, false, true);
+       if (!s302m->service)
+               goto free_name;
 
        s302m->service->descriptor = (struct vidtv_psi_desc *)
                                     vidtv_psi_service_desc_init(NULL,
-                                                                DIGITAL_TELEVISION_SERVICE,
+                                                                DIGITAL_RADIO_SOUND_SERVICE,
                                                                 name,
                                                                 provider);
+       if (!s302m->service->descriptor)
+               goto free_service;
 
        s302m->transport_stream_id = transport_stream_id;
 
        s302m->program = vidtv_psi_pat_program_init(NULL,
                                                    s302m_service_id,
                                                    s302m_program_pid);
+       if (!s302m->program)
+               goto free_service;
 
        s302m->program_num = s302m_program_num;
 
        s302m->streams = vidtv_psi_pmt_stream_init(NULL,
                                                   STREAM_PRIVATE_DATA,
                                                   s302m_es_pid);
+       if (!s302m->streams)
+               goto free_program;
 
        s302m->streams->descriptor = (struct vidtv_psi_desc *)
                                     vidtv_psi_registration_desc_init(NULL,
                                                                      s302m_fid,
                                                                      NULL,
                                                                      0);
+       if (!s302m->streams->descriptor)
+               goto free_streams;
+
        encoder_args.es_pid = s302m_es_pid;
 
        s302m->encoders = vidtv_s302m_encoder_init(encoder_args);
+       if (!s302m->encoders)
+               goto free_streams;
+
+       s302m->events = vidtv_psi_eit_event_init(NULL, s302m_beethoven_event_id);
+       if (!s302m->events)
+               goto free_encoders;
+       s302m->events->descriptor = (struct vidtv_psi_desc *)
+                                   vidtv_psi_short_event_desc_init(NULL,
+                                                                   iso_language_code,
+                                                                   event_name,
+                                                                   event_text);
+       if (!s302m->events->descriptor)
+               goto free_events;
 
        if (head) {
                while (head->next)
@@ -102,6 +136,68 @@ struct vidtv_channel
        }
 
        return s302m;
+
+free_events:
+       vidtv_psi_eit_event_destroy(s302m->events);
+free_encoders:
+       vidtv_s302m_encoder_destroy(s302m->encoders);
+free_streams:
+       vidtv_psi_pmt_stream_destroy(s302m->streams);
+free_program:
+       vidtv_psi_pat_program_destroy(s302m->program);
+free_service:
+       vidtv_psi_sdt_service_destroy(s302m->service);
+free_name:
+       kfree(s302m->name);
+free_s302m:
+       kfree(s302m);
+
+       return NULL;
+}
+
+static struct vidtv_psi_table_eit_event
+*vidtv_channel_eit_event_cat_into_new(struct vidtv_mux *m)
+{
+       /* Concatenate the events */
+       const struct vidtv_channel *cur_chnl = m->channels;
+       struct vidtv_psi_table_eit_event *curr = NULL;
+       struct vidtv_psi_table_eit_event *head = NULL;
+       struct vidtv_psi_table_eit_event *tail = NULL;
+       struct vidtv_psi_desc *desc = NULL;
+       u16 event_id;
+
+       if (!cur_chnl)
+               return NULL;
+
+       while (cur_chnl) {
+               curr = cur_chnl->events;
+
+               if (!curr)
+                       dev_warn_ratelimited(m->dev,
+                                            "No events found for channel %s\n",
+                                            cur_chnl->name);
+
+               while (curr) {
+                       event_id = be16_to_cpu(curr->event_id);
+                       tail = vidtv_psi_eit_event_init(tail, event_id);
+                       if (!tail) {
+                               vidtv_psi_eit_event_destroy(head);
+                               return NULL;
+                       }
+
+                       desc = vidtv_psi_desc_clone(curr->descriptor);
+                       vidtv_psi_desc_assign(&tail->descriptor, desc);
+
+                       if (!head)
+                               head = tail;
+
+                       curr = curr->next;
+               }
+
+               cur_chnl = cur_chnl->next;
+       }
+
+       return head;
 }
 
 static struct vidtv_psi_table_sdt_service
@@ -125,13 +221,21 @@ static struct vidtv_psi_table_sdt_service
 
                if (!curr)
                        dev_warn_ratelimited(m->dev,
-                                            "No services found for channel %s\n", cur_chnl->name);
+                                            "No services found for channel %s\n",
+                                            cur_chnl->name);
 
                while (curr) {
                        service_id = be16_to_cpu(curr->service_id);
-                       tail = vidtv_psi_sdt_service_init(tail, service_id);
+                       tail = vidtv_psi_sdt_service_init(tail,
+                                                         service_id,
+                                                         curr->EIT_schedule,
+                                                         curr->EIT_present_following);
+                       if (!tail)
+                               goto free;
 
                        desc = vidtv_psi_desc_clone(curr->descriptor);
+                       if (!desc)
+                               goto free_tail;
                        vidtv_psi_desc_assign(&tail->descriptor, desc);
 
                        if (!head)
@@ -144,6 +248,12 @@ static struct vidtv_psi_table_sdt_service
        }
 
        return head;
+
+free_tail:
+       vidtv_psi_sdt_service_destroy(tail);
+free:
+       vidtv_psi_sdt_service_destroy(head);
+       return NULL;
 }
 
 static struct vidtv_psi_table_pat_program*
@@ -174,6 +284,10 @@ vidtv_channel_pat_prog_cat_into_new(struct vidtv_mux *m)
                        tail = vidtv_psi_pat_program_init(tail,
                                                          serv_id,
                                                          pid);
+                       if (!tail) {
+                               vidtv_psi_pat_program_destroy(head);
+                               return NULL;
+                       }
 
                        if (!head)
                                head = tail;
@@ -183,30 +297,30 @@ vidtv_channel_pat_prog_cat_into_new(struct vidtv_mux *m)
 
                cur_chnl = cur_chnl->next;
        }
+       /* Add the NIT table */
+       vidtv_psi_pat_program_init(tail, 0, TS_NIT_PID);
 
        return head;
 }
 
+/*
+ * Match channels to their respective PMT sections, then assign the
+ * streams
+ */
 static void
 vidtv_channel_pmt_match_sections(struct vidtv_channel *channels,
                                 struct vidtv_psi_table_pmt **sections,
                                 u32 nsections)
 {
-       /*
-        * Match channels to their respective PMT sections, then assign the
-        * streams
-        */
        struct vidtv_psi_table_pmt *curr_section = NULL;
-       struct vidtv_channel *cur_chnl = channels;
-
-       struct vidtv_psi_table_pmt_stream *s = NULL;
        struct vidtv_psi_table_pmt_stream *head = NULL;
        struct vidtv_psi_table_pmt_stream *tail = NULL;
-
+       struct vidtv_psi_table_pmt_stream *s = NULL;
+       struct vidtv_channel *cur_chnl = channels;
        struct vidtv_psi_desc *desc = NULL;
-       u32 j;
-       u16 curr_id;
        u16 e_pid; /* elementary stream pid */
+       u16 curr_id;
+       u32 j;
 
        while (cur_chnl) {
                for (j = 0; j < nsections; ++j) {
@@ -232,7 +346,8 @@ vidtv_channel_pmt_match_sections(struct vidtv_channel *channels,
                                                head = tail;
 
                                        desc = vidtv_psi_desc_clone(s->descriptor);
-                                       vidtv_psi_desc_assign(&tail->descriptor, desc);
+                                       vidtv_psi_desc_assign(&tail->descriptor,
+                                                             desc);
 
                                        s = s->next;
                                }
@@ -246,17 +361,103 @@ vidtv_channel_pmt_match_sections(struct vidtv_channel *channels,
        }
 }
 
-void vidtv_channel_si_init(struct vidtv_mux *m)
+static void
+vidtv_channel_destroy_service_list(struct vidtv_psi_desc_service_list_entry *e)
+{
+       struct vidtv_psi_desc_service_list_entry *tmp;
+
+       while (e) {
+               tmp = e;
+               e = e->next;
+               kfree(tmp);
+       }
+}
+
+static struct vidtv_psi_desc_service_list_entry
+*vidtv_channel_build_service_list(struct vidtv_psi_table_sdt_service *s)
 {
+       struct vidtv_psi_desc_service_list_entry *curr_e = NULL;
+       struct vidtv_psi_desc_service_list_entry *head_e = NULL;
+       struct vidtv_psi_desc_service_list_entry *prev_e = NULL;
+       struct vidtv_psi_desc *desc = s->descriptor;
+       struct vidtv_psi_desc_service *s_desc;
+
+       while (s) {
+               while (desc) {
+                       if (s->descriptor->type != SERVICE_DESCRIPTOR)
+                               goto next_desc;
+
+                       s_desc = (struct vidtv_psi_desc_service *)desc;
+
+                       curr_e = kzalloc(sizeof(*curr_e), GFP_KERNEL);
+                       if (!curr_e) {
+                               vidtv_channel_destroy_service_list(head_e);
+                               return NULL;
+                       }
+
+                       curr_e->service_id = s->service_id;
+                       curr_e->service_type = s_desc->service_type;
+
+                       if (!head_e)
+                               head_e = curr_e;
+                       if (prev_e)
+                               prev_e->next = curr_e;
+
+                       prev_e = curr_e;
+
+next_desc:
+                       desc = desc->next;
+               }
+               s = s->next;
+       }
+       return head_e;
+}
+
+int vidtv_channel_si_init(struct vidtv_mux *m)
+{
+       struct vidtv_psi_desc_service_list_entry *service_list = NULL;
        struct vidtv_psi_table_pat_program *programs = NULL;
        struct vidtv_psi_table_sdt_service *services = NULL;
+       struct vidtv_psi_table_eit_event *events = NULL;
 
        m->si.pat = vidtv_psi_pat_table_init(m->transport_stream_id);
+       if (!m->si.pat)
+               return -ENOMEM;
 
-       m->si.sdt = vidtv_psi_sdt_table_init(m->transport_stream_id);
+       m->si.sdt = vidtv_psi_sdt_table_init(m->network_id,
+                                            m->transport_stream_id);
+       if (!m->si.sdt)
+               goto free_pat;
 
        programs = vidtv_channel_pat_prog_cat_into_new(m);
+       if (!programs)
+               goto free_sdt;
        services = vidtv_channel_sdt_serv_cat_into_new(m);
+       if (!services)
+               goto free_programs;
+
+       events = vidtv_channel_eit_event_cat_into_new(m);
+       if (!events)
+               goto free_services;
+
+       /* look for a service descriptor for every service */
+       service_list = vidtv_channel_build_service_list(services);
+       if (!service_list)
+               goto free_events;
+
+       /* use these descriptors to build the NIT */
+       m->si.nit = vidtv_psi_nit_table_init(m->network_id,
+                                            m->transport_stream_id,
+                                            m->network_name,
+                                            service_list);
+       if (!m->si.nit)
+               goto free_service_list;
+
+       m->si.eit = vidtv_psi_eit_table_init(m->network_id,
+                                            m->transport_stream_id,
+                                            programs->service_id);
+       if (!m->si.eit)
+               goto free_nit;
 
        /* assemble all programs and assign to PAT */
        vidtv_psi_pat_program_assign(m->si.pat, programs);
@@ -264,31 +465,65 @@ void vidtv_channel_si_init(struct vidtv_mux *m)
        /* assemble all services and assign to SDT */
        vidtv_psi_sdt_service_assign(m->si.sdt, services);
 
-       m->si.pmt_secs = vidtv_psi_pmt_create_sec_for_each_pat_entry(m->si.pat, m->pcr_pid);
+       /* assemble all events and assign to EIT */
+       vidtv_psi_eit_event_assign(m->si.eit, events);
+
+       m->si.pmt_secs = vidtv_psi_pmt_create_sec_for_each_pat_entry(m->si.pat,
+                                                                    m->pcr_pid);
+       if (!m->si.pmt_secs)
+               goto free_eit;
 
        vidtv_channel_pmt_match_sections(m->channels,
                                         m->si.pmt_secs,
-                                        m->si.pat->programs);
+                                        m->si.pat->num_pmt);
+
+       vidtv_channel_destroy_service_list(service_list);
+
+       return 0;
+
+free_eit:
+       vidtv_psi_eit_table_destroy(m->si.eit);
+free_nit:
+       vidtv_psi_nit_table_destroy(m->si.nit);
+free_service_list:
+       vidtv_channel_destroy_service_list(service_list);
+free_events:
+       vidtv_psi_eit_event_destroy(events);
+free_services:
+       vidtv_psi_sdt_service_destroy(services);
+free_programs:
+       vidtv_psi_pat_program_destroy(programs);
+free_sdt:
+       vidtv_psi_sdt_table_destroy(m->si.sdt);
+free_pat:
+       vidtv_psi_pat_table_destroy(m->si.pat);
+       return 0;
 }
 
 void vidtv_channel_si_destroy(struct vidtv_mux *m)
 {
        u32 i;
-       u16 num_programs = m->si.pat->programs;
 
-       vidtv_psi_pat_table_destroy(m->si.pat);
-
-       for (i = 0; i < num_programs; ++i)
+       for (i = 0; i < m->si.pat->num_pmt; ++i)
                vidtv_psi_pmt_table_destroy(m->si.pmt_secs[i]);
 
+       vidtv_psi_pat_table_destroy(m->si.pat);
+
        kfree(m->si.pmt_secs);
        vidtv_psi_sdt_table_destroy(m->si.sdt);
+       vidtv_psi_nit_table_destroy(m->si.nit);
+       vidtv_psi_eit_table_destroy(m->si.eit);
 }
 
-void vidtv_channels_init(struct vidtv_mux *m)
+int vidtv_channels_init(struct vidtv_mux *m)
 {
        /* this is the place to add new 'channels' for vidtv */
        m->channels = vidtv_channel_s302m_init(NULL, m->transport_stream_id);
+
+       if (!m->channels)
+               return -ENOMEM;
+
+       return 0;
 }
 
 void vidtv_channels_destroy(struct vidtv_mux *m)
@@ -302,6 +537,7 @@ void vidtv_channels_destroy(struct vidtv_mux *m)
                vidtv_psi_pat_program_destroy(curr->program);
                vidtv_psi_pmt_stream_destroy(curr->streams);
                vidtv_channel_encoder_destroy(curr->encoders);
+               vidtv_psi_eit_event_destroy(curr->events);
 
                tmp = curr;
                curr = curr->next;
index 2c3cba4..fff2e50 100644 (file)
@@ -9,6 +9,7 @@
  * When vidtv boots, it will create some hardcoded channels.
  * Their services will be concatenated to populate the SDT.
  * Their programs will be concatenated to populate the PAT
+ * Their events will be concatenated to populate the EIT
  * For each program in the PAT, a PMT section will be created
  * The PMT section for a channel will be assigned its streams.
  * Every stream will have its corresponding encoder polled to produce TS packets
 #define VIDTV_CHANNEL_H
 
 #include <linux/types.h>
-#include "vidtv_psi.h"
+
 #include "vidtv_encoder.h"
 #include "vidtv_mux.h"
+#include "vidtv_psi.h"
 
 /**
  * struct vidtv_channel - A 'channel' abstraction
@@ -37,6 +39,7 @@
  * Every stream will have its corresponding encoder polled to produce TS packets
  * These packets may be interleaved by the mux and then delivered to the bridge
  *
+ * @name: name of the channel
  * @transport_stream_id: a number to identify the TS, chosen at will.
  * @service: A _single_ service. Will be concatenated into the SDT.
  * @program_num: The link between PAT, PMT and SDT.
@@ -44,6 +47,7 @@
  * Will be concatenated into the PAT.
  * @streams: A stream loop used to populate the PMT section for 'program'
  * @encoders: A encoder loop. There must be one encoder for each stream.
+ * @events: Optional event information. This will feed into the EIT.
  * @next: Optionally chain this channel.
  */
 struct vidtv_channel {
@@ -54,6 +58,7 @@ struct vidtv_channel {
        struct vidtv_psi_table_pat_program *program;
        struct vidtv_psi_table_pmt_stream *streams;
        struct vidtv_encoder *encoders;
+       struct vidtv_psi_table_eit_event *events;
        struct vidtv_channel *next;
 };
 
@@ -61,14 +66,14 @@ struct vidtv_channel {
  * vidtv_channel_si_init - Init the PSI tables from the channels in the mux
  * @m: The mux containing the channels.
  */
-void vidtv_channel_si_init(struct vidtv_mux *m);
+int vidtv_channel_si_init(struct vidtv_mux *m);
 void vidtv_channel_si_destroy(struct vidtv_mux *m);
 
 /**
  * vidtv_channels_init - Init hardcoded, fake 'channels'.
  * @m: The mux to store the channels into.
  */
-void vidtv_channels_init(struct vidtv_mux *m);
+int vidtv_channels_init(struct vidtv_mux *m);
 struct vidtv_channel
 *vidtv_channel_s302m_init(struct vidtv_channel *head, u16 transport_stream_id);
 void vidtv_channels_destroy(struct vidtv_mux *m);
index 818e7f2..42f63fd 100644 (file)
@@ -16,7 +16,6 @@
 #define CLOCK_UNIT_27MHZ 27000000
 #define VIDTV_SLEEP_USECS 10000
 #define VIDTV_MAX_SLEEP_USECS (2 * VIDTV_SLEEP_USECS)
-#define VIDTV_DEFAULT_TS_ID 0x744
 
 u32 vidtv_memcpy(void *to,
                 size_t to_offset,
index eba7fe1..b7823d9 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
+
 #include <media/dvb_frontend.h>
 
 #include "vidtv_demod.h"
@@ -192,7 +193,6 @@ static void vidtv_demod_update_stats(struct dvb_frontend *fe)
 
        c->cnr.stat[0].svalue = state->tuner_cnr;
        c->cnr.stat[0].svalue -= prandom_u32_max(state->tuner_cnr / 50);
-
 }
 
 static int vidtv_demod_read_status(struct dvb_frontend *fe,
index 87651b0..2b84046 100644 (file)
@@ -12,6 +12,7 @@
 #define VIDTV_DEMOD_H
 
 #include <linux/dvb/frontend.h>
+
 #include <media/dvb_frontend.h>
 
 /**
@@ -19,6 +20,9 @@
  * modulation and fec_inner
  * @modulation: see enum fe_modulation
  * @fec: see enum fe_fec_rate
+ * @cnr_ok: S/N threshold to consider the signal as OK. Below that, there's
+ *          a chance of losing sync.
+ * @cnr_good: S/N threshold to consider the signal strong.
  *
  * This struct matches values for 'good' and 'ok' CNRs given the combination
  * of modulation and fec_inner in use. We might simulate some noise if the
@@ -52,13 +56,8 @@ struct vidtv_demod_config {
  * struct vidtv_demod_state - The demodulator state
  * @frontend: The frontend structure allocated by the demod.
  * @config: The config used to init the demod.
- * @poll_snr: The task responsible for periodically checking the simulated
- * signal quality, eventually dropping or reacquiring the TS lock.
  * @status: the demod status.
- * @cold_start: Whether the demod has not been init yet.
- * @poll_snr_thread_running: Whether the task responsible for periodically
- * checking the simulated signal quality is running.
- * @poll_snr_thread_restart: Whether we should restart the poll_snr task.
+ * @tuner_cnr: current S/N ratio for the signal carrier
  */
 struct vidtv_demod_state {
        struct dvb_frontend frontend;
index 65d81da..50e3cf4 100644 (file)
@@ -28,7 +28,7 @@ struct vidtv_access_unit {
        struct vidtv_access_unit *next;
 };
 
-/* Some musical notes, used by a tone generator */
+/* Some musical notes, used by a tone generator. Values are in Hz */
 enum musical_notes {
        NOTE_SILENT = 0,
 
@@ -103,14 +103,16 @@ enum musical_notes {
  * @encoder_buf_sz: The encoder buffer size, in bytes
  * @encoder_buf_offset: Our byte position in the encoder buffer.
  * @sample_count: How many samples we have encoded in total.
+ * @access_units: encoder payload units, used for clock references
  * @src_buf: The source of raw data to be encoded, encoder might set a
  * default if null.
+ * @src_buf_sz: size of @src_buf.
  * @src_buf_offset: Our position in the source buffer.
  * @is_video_encoder: Whether this a video encoder (as opposed to audio)
  * @ctx: Encoder-specific state.
  * @stream_id: Examples: Audio streams (0xc0-0xdf), Video streams
  * (0xe0-0xef).
- * @es_id: The TS PID to use for the elementary stream in this encoder.
+ * @es_pid: The TS PID to use for the elementary stream in this encoder.
  * @encode: Prepare enough AUs for the given amount of time.
  * @clear: Clear the encoder output.
  * @sync: Attempt to synchronize with this encoder.
@@ -131,9 +133,6 @@ struct vidtv_encoder {
        u32 encoder_buf_offset;
 
        u64 sample_count;
-       int last_duration;
-       int note_offset;
-       enum musical_notes last_tone;
 
        struct vidtv_access_unit *access_units;
 
index 082740a..b51e6a3 100644 (file)
  * Copyright (C) 2020 Daniel W. S. Almeida
  */
 
-#include <linux/types.h>
-#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/dev_printk.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
-#include <linux/dev_printk.h>
+#include <linux/math64.h>
 #include <linux/ratelimit.h>
-#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/types.h>
 #include <linux/vmalloc.h>
-#include <linux/math64.h>
 
-#include "vidtv_mux.h"
-#include "vidtv_ts.h"
-#include "vidtv_pes.h"
-#include "vidtv_encoder.h"
 #include "vidtv_channel.h"
 #include "vidtv_common.h"
+#include "vidtv_encoder.h"
+#include "vidtv_mux.h"
+#include "vidtv_pes.h"
 #include "vidtv_psi.h"
+#include "vidtv_ts.h"
 
 static struct vidtv_mux_pid_ctx
 *vidtv_mux_get_pid_ctx(struct vidtv_mux *m, u16 pid)
@@ -47,33 +47,56 @@ static struct vidtv_mux_pid_ctx
        struct vidtv_mux_pid_ctx *ctx;
 
        ctx = vidtv_mux_get_pid_ctx(m, pid);
-
        if (ctx)
-               goto end;
+               return ctx;
+
+       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+       if (!ctx)
+               return NULL;
 
-       ctx      = kzalloc(sizeof(*ctx), GFP_KERNEL);
        ctx->pid = pid;
        ctx->cc  = 0;
        hash_add(m->pid_ctx, &ctx->h, pid);
 
-end:
        return ctx;
 }
 
-static void vidtv_mux_pid_ctx_init(struct vidtv_mux *m)
+static void vidtv_mux_pid_ctx_destroy(struct vidtv_mux *m)
+{
+       struct vidtv_mux_pid_ctx *ctx;
+       struct hlist_node *tmp;
+       int bkt;
+
+       hash_for_each_safe(m->pid_ctx, bkt, tmp, ctx, h) {
+               hash_del(&ctx->h);
+               kfree(ctx);
+       }
+}
+
+static int vidtv_mux_pid_ctx_init(struct vidtv_mux *m)
 {
        struct vidtv_psi_table_pat_program *p = m->si.pat->program;
        u16 pid;
 
        hash_init(m->pid_ctx);
        /* push the pcr pid ctx */
-       vidtv_mux_create_pid_ctx_once(m, m->pcr_pid);
-       /* push the null packet pid ctx */
-       vidtv_mux_create_pid_ctx_once(m, TS_NULL_PACKET_PID);
+       if (!vidtv_mux_create_pid_ctx_once(m, m->pcr_pid))
+               return -ENOMEM;
+       /* push the NULL packet pid ctx */
+       if (!vidtv_mux_create_pid_ctx_once(m, TS_NULL_PACKET_PID))
+               goto free;
        /* push the PAT pid ctx */
-       vidtv_mux_create_pid_ctx_once(m, VIDTV_PAT_PID);
+       if (!vidtv_mux_create_pid_ctx_once(m, VIDTV_PAT_PID))
+               goto free;
        /* push the SDT pid ctx */
-       vidtv_mux_create_pid_ctx_once(m, VIDTV_SDT_PID);
+       if (!vidtv_mux_create_pid_ctx_once(m, VIDTV_SDT_PID))
+               goto free;
+       /* push the NIT pid ctx */
+       if (!vidtv_mux_create_pid_ctx_once(m, VIDTV_NIT_PID))
+               goto free;
+       /* push the EIT pid ctx */
+       if (!vidtv_mux_create_pid_ctx_once(m, VIDTV_EIT_PID))
+               goto free;
 
        /* add a ctx for all PMT sections */
        while (p) {
@@ -81,18 +104,12 @@ static void vidtv_mux_pid_ctx_init(struct vidtv_mux *m)
                vidtv_mux_create_pid_ctx_once(m, pid);
                p = p->next;
        }
-}
 
-static void vidtv_mux_pid_ctx_destroy(struct vidtv_mux *m)
-{
-       int bkt;
-       struct vidtv_mux_pid_ctx *ctx;
-       struct hlist_node *tmp;
+       return 0;
 
-       hash_for_each_safe(m->pid_ctx, bkt, tmp, ctx, h) {
-               hash_del(&ctx->h);
-               kfree(ctx);
-       }
+free:
+       vidtv_mux_pid_ctx_destroy(m);
+       return -ENOMEM;
 }
 
 static void vidtv_mux_update_clk(struct vidtv_mux *m)
@@ -112,32 +129,53 @@ static void vidtv_mux_update_clk(struct vidtv_mux *m)
 
 static u32 vidtv_mux_push_si(struct vidtv_mux *m)
 {
+       struct vidtv_psi_pat_write_args pat_args = {
+               .buf                = m->mux_buf,
+               .buf_sz             = m->mux_buf_sz,
+               .pat                = m->si.pat,
+       };
+       struct vidtv_psi_pmt_write_args pmt_args = {
+               .buf                = m->mux_buf,
+               .buf_sz             = m->mux_buf_sz,
+               .pcr_pid            = m->pcr_pid,
+       };
+       struct vidtv_psi_sdt_write_args sdt_args = {
+               .buf                = m->mux_buf,
+               .buf_sz             = m->mux_buf_sz,
+               .sdt                = m->si.sdt,
+       };
+       struct vidtv_psi_nit_write_args nit_args = {
+               .buf                = m->mux_buf,
+               .buf_sz             = m->mux_buf_sz,
+               .nit                = m->si.nit,
+
+       };
+       struct vidtv_psi_eit_write_args eit_args = {
+               .buf                = m->mux_buf,
+               .buf_sz             = m->mux_buf_sz,
+               .eit                = m->si.eit,
+       };
        u32 initial_offset = m->mux_buf_offset;
-
        struct vidtv_mux_pid_ctx *pat_ctx;
        struct vidtv_mux_pid_ctx *pmt_ctx;
        struct vidtv_mux_pid_ctx *sdt_ctx;
-
-       struct vidtv_psi_pat_write_args pat_args = {};
-       struct vidtv_psi_pmt_write_args pmt_args = {};
-       struct vidtv_psi_sdt_write_args sdt_args = {};
-
-       u32 nbytes; /* the number of bytes written by this function */
+       struct vidtv_mux_pid_ctx *nit_ctx;
+       struct vidtv_mux_pid_ctx *eit_ctx;
+       u32 nbytes;
        u16 pmt_pid;
        u32 i;
 
        pat_ctx = vidtv_mux_get_pid_ctx(m, VIDTV_PAT_PID);
        sdt_ctx = vidtv_mux_get_pid_ctx(m, VIDTV_SDT_PID);
+       nit_ctx = vidtv_mux_get_pid_ctx(m, VIDTV_NIT_PID);
+       eit_ctx = vidtv_mux_get_pid_ctx(m, VIDTV_EIT_PID);
 
-       pat_args.buf                = m->mux_buf;
        pat_args.offset             = m->mux_buf_offset;
-       pat_args.pat                = m->si.pat;
-       pat_args.buf_sz             = m->mux_buf_sz;
        pat_args.continuity_counter = &pat_ctx->cc;
 
-       m->mux_buf_offset += vidtv_psi_pat_write_into(pat_args);
+       m->mux_buf_offset += vidtv_psi_pat_write_into(&pat_args);
 
-       for (i = 0; i < m->si.pat->programs; ++i) {
+       for (i = 0; i < m->si.pat->num_pmt; ++i) {
                pmt_pid = vidtv_psi_pmt_get_pid(m->si.pmt_secs[i],
                                                m->si.pat);
 
@@ -149,25 +187,29 @@ static u32 vidtv_mux_push_si(struct vidtv_mux *m)
 
                pmt_ctx = vidtv_mux_get_pid_ctx(m, pmt_pid);
 
-               pmt_args.buf                = m->mux_buf;
                pmt_args.offset             = m->mux_buf_offset;
                pmt_args.pmt                = m->si.pmt_secs[i];
                pmt_args.pid                = pmt_pid;
-               pmt_args.buf_sz             = m->mux_buf_sz;
                pmt_args.continuity_counter = &pmt_ctx->cc;
-               pmt_args.pcr_pid            = m->pcr_pid;
 
                /* write each section into buffer */
-               m->mux_buf_offset += vidtv_psi_pmt_write_into(pmt_args);
+               m->mux_buf_offset += vidtv_psi_pmt_write_into(&pmt_args);
        }
 
-       sdt_args.buf                = m->mux_buf;
        sdt_args.offset             = m->mux_buf_offset;
-       sdt_args.sdt                = m->si.sdt;
-       sdt_args.buf_sz             = m->mux_buf_sz;
        sdt_args.continuity_counter = &sdt_ctx->cc;
 
-       m->mux_buf_offset += vidtv_psi_sdt_write_into(sdt_args);
+       m->mux_buf_offset += vidtv_psi_sdt_write_into(&sdt_args);
+
+       nit_args.offset             = m->mux_buf_offset;
+       nit_args.continuity_counter = &nit_ctx->cc;
+
+       m->mux_buf_offset += vidtv_psi_nit_write_into(&nit_args);
+
+       eit_args.offset             = m->mux_buf_offset;
+       eit_args.continuity_counter = &eit_ctx->cc;
+
+       m->mux_buf_offset += vidtv_psi_eit_write_into(&eit_args);
 
        nbytes = m->mux_buf_offset - initial_offset;
 
@@ -230,23 +272,29 @@ static bool vidtv_mux_should_push_si(struct vidtv_mux *m)
 static u32 vidtv_mux_packetize_access_units(struct vidtv_mux *m,
                                            struct vidtv_encoder *e)
 {
-       u32 nbytes = 0;
-
-       struct pes_write_args args = {};
-       u32 initial_offset = m->mux_buf_offset;
+       struct pes_write_args args = {
+               .dest_buf           = m->mux_buf,
+               .dest_buf_sz        = m->mux_buf_sz,
+               .pid                = be16_to_cpu(e->es_pid),
+               .encoder_id         = e->id,
+               .stream_id          = be16_to_cpu(e->stream_id),
+               .send_pts           = true,  /* forbidden value '01'... */
+               .send_dts           = false, /* ...for PTS_DTS flags    */
+       };
        struct vidtv_access_unit *au = e->access_units;
-
+       u32 initial_offset = m->mux_buf_offset;
+       struct vidtv_mux_pid_ctx *pid_ctx;
+       u32 nbytes = 0;
        u8 *buf = NULL;
-       struct vidtv_mux_pid_ctx *pid_ctx = vidtv_mux_create_pid_ctx_once(m,
-                                                                         be16_to_cpu(e->es_pid));
 
-       args.dest_buf           = m->mux_buf;
-       args.dest_buf_sz        = m->mux_buf_sz;
-       args.pid                = be16_to_cpu(e->es_pid);
-       args.encoder_id         = e->id;
+       /* see SMPTE 302M clause 6.4 */
+       if (args.encoder_id == S302M) {
+               args.send_dts = false;
+               args.send_pts = true;
+       }
+
+       pid_ctx = vidtv_mux_create_pid_ctx_once(m, be16_to_cpu(e->es_pid));
        args.continuity_counter = &pid_ctx->cc;
-       args.stream_id          = be16_to_cpu(e->stream_id);
-       args.send_pts           = true;
 
        while (au) {
                buf                  = e->encoder_buf + au->offset;
@@ -256,7 +304,7 @@ static u32 vidtv_mux_packetize_access_units(struct vidtv_mux *m,
                args.pts             = au->pts;
                args.pcr             = m->timing.clk;
 
-               m->mux_buf_offset += vidtv_pes_write_into(args);
+               m->mux_buf_offset += vidtv_pes_write_into(&args);
 
                au = au->next;
        }
@@ -273,10 +321,10 @@ static u32 vidtv_mux_packetize_access_units(struct vidtv_mux *m,
 
 static u32 vidtv_mux_poll_encoders(struct vidtv_mux *m)
 {
-       u32 nbytes = 0;
-       u32 au_nbytes;
        struct vidtv_channel *cur_chnl = m->channels;
        struct vidtv_encoder *e = NULL;
+       u32 nbytes = 0;
+       u32 au_nbytes;
 
        while (cur_chnl) {
                e = cur_chnl->encoders;
@@ -300,18 +348,19 @@ static u32 vidtv_mux_poll_encoders(struct vidtv_mux *m)
 
 static u32 vidtv_mux_pad_with_nulls(struct vidtv_mux *m, u32 npkts)
 {
-       struct null_packet_write_args args = {};
+       struct null_packet_write_args args = {
+               .dest_buf           = m->mux_buf,
+               .buf_sz             = m->mux_buf_sz,
+               .dest_offset        = m->mux_buf_offset,
+       };
        u32 initial_offset = m->mux_buf_offset;
-       u32 nbytes; /* the number of bytes written by this function */
-       u32 i;
        struct vidtv_mux_pid_ctx *ctx;
+       u32 nbytes;
+       u32 i;
 
        ctx = vidtv_mux_get_pid_ctx(m, TS_NULL_PACKET_PID);
 
-       args.dest_buf           = m->mux_buf;
-       args.buf_sz             = m->mux_buf_sz;
        args.continuity_counter = &ctx->cc;
-       args.dest_offset        = m->mux_buf_offset;
 
        for (i = 0; i < npkts; ++i) {
                m->mux_buf_offset += vidtv_ts_null_write_into(args);
@@ -343,9 +392,9 @@ static void vidtv_mux_tick(struct work_struct *work)
                                           struct vidtv_mux,
                                           mpeg_thread);
        struct dtv_frontend_properties *c = &m->fe->dtv_property_cache;
+       u32 tot_bits = 0;
        u32 nbytes;
        u32 npkts;
-       u32 tot_bits = 0;
 
        while (m->streaming) {
                nbytes = 0;
@@ -427,40 +476,62 @@ void vidtv_mux_stop_thread(struct vidtv_mux *m)
 
 struct vidtv_mux *vidtv_mux_init(struct dvb_frontend *fe,
                                 struct device *dev,
-                                struct vidtv_mux_init_args args)
+                                struct vidtv_mux_init_args *args)
 {
-       struct vidtv_mux *m = kzalloc(sizeof(*m), GFP_KERNEL);
+       struct vidtv_mux *m;
+
+       m = kzalloc(sizeof(*m), GFP_KERNEL);
+       if (!m)
+               return NULL;
 
        m->dev = dev;
        m->fe = fe;
-       m->timing.pcr_period_usecs = args.pcr_period_usecs;
-       m->timing.si_period_usecs  = args.si_period_usecs;
+       m->timing.pcr_period_usecs = args->pcr_period_usecs;
+       m->timing.si_period_usecs  = args->si_period_usecs;
+
+       m->mux_rate_kbytes_sec = args->mux_rate_kbytes_sec;
 
-       m->mux_rate_kbytes_sec = args.mux_rate_kbytes_sec;
+       m->on_new_packets_available_cb = args->on_new_packets_available_cb;
 
-       m->on_new_packets_available_cb = args.on_new_packets_available_cb;
+       m->mux_buf = vzalloc(args->mux_buf_sz);
+       if (!m->mux_buf)
+               goto free_mux;
 
-       m->mux_buf = vzalloc(args.mux_buf_sz);
-       m->mux_buf_sz = args.mux_buf_sz;
+       m->mux_buf_sz = args->mux_buf_sz;
 
-       m->pcr_pid = args.pcr_pid;
-       m->transport_stream_id = args.transport_stream_id;
-       m->priv = args.priv;
+       m->pcr_pid = args->pcr_pid;
+       m->transport_stream_id = args->transport_stream_id;
+       m->priv = args->priv;
+       m->network_id = args->network_id;
+       m->network_name = kstrdup(args->network_name, GFP_KERNEL);
        m->timing.current_jiffies = get_jiffies_64();
 
-       if (args.channels)
-               m->channels = args.channels;
+       if (args->channels)
+               m->channels = args->channels;
        else
-               vidtv_channels_init(m);
+               if (vidtv_channels_init(m) < 0)
+                       goto free_mux_buf;
 
        /* will alloc data for pmt_sections after initializing pat */
-       vidtv_channel_si_init(m);
+       if (vidtv_channel_si_init(m) < 0)
+               goto free_channels;
 
        INIT_WORK(&m->mpeg_thread, vidtv_mux_tick);
 
-       vidtv_mux_pid_ctx_init(m);
+       if (vidtv_mux_pid_ctx_init(m) < 0)
+               goto free_channel_si;
 
        return m;
+
+free_channel_si:
+       vidtv_channel_si_destroy(m);
+free_channels:
+       vidtv_channels_destroy(m);
+free_mux_buf:
+       vfree(m->mux_buf);
+free_mux:
+       kfree(m);
+       return NULL;
 }
 
 void vidtv_mux_destroy(struct vidtv_mux *m)
@@ -469,6 +540,7 @@ void vidtv_mux_destroy(struct vidtv_mux *m)
        vidtv_mux_pid_ctx_destroy(m);
        vidtv_channel_si_destroy(m);
        vidtv_channels_destroy(m);
+       kfree(m->network_name);
        vfree(m->mux_buf);
        kfree(m);
 }
index 2caa606..ad82eb7 100644 (file)
 #ifndef VIDTV_MUX_H
 #define VIDTV_MUX_H
 
-#include <linux/types.h>
 #include <linux/hashtable.h>
+#include <linux/types.h>
 #include <linux/workqueue.h>
+
 #include <media/dvb_frontend.h>
 
 #include "vidtv_psi.h"
@@ -58,12 +59,16 @@ struct vidtv_mux_timing {
  * @pat: The PAT in use by the muxer.
  * @pmt_secs: The PMT sections in use by the muxer. One for each program in the PAT.
  * @sdt: The SDT in use by the muxer.
+ * @nit: The NIT in use by the muxer.
+ * @eit: the EIT in use by the muxer.
  */
 struct vidtv_mux_si {
        /* the SI tables */
        struct vidtv_psi_table_pat *pat;
        struct vidtv_psi_table_pmt **pmt_secs; /* the PMT sections */
        struct vidtv_psi_table_sdt *sdt;
+       struct vidtv_psi_table_nit *nit;
+       struct vidtv_psi_table_eit *eit;
 };
 
 /**
@@ -82,8 +87,10 @@ struct vidtv_mux_pid_ctx {
 
 /**
  * struct vidtv_mux - A muxer abstraction loosely based in libavcodec/mpegtsenc.c
- * @mux_rate_kbytes_sec: The bit rate for the TS, in kbytes.
+ * @fe: The frontend structure allocated by the muxer.
+ * @dev: pointer to struct device.
  * @timing: Keeps track of timing related information.
+ * @mux_rate_kbytes_sec: The bit rate for the TS, in kbytes.
  * @pid_ctx: A hash table to keep track of per-PID metadata.
  * @on_new_packets_available_cb: A callback to inform of new TS packets ready.
  * @mux_buf: A pointer to a buffer for this muxer. TS packets are stored there
@@ -99,6 +106,8 @@ struct vidtv_mux_pid_ctx {
  * @pcr_pid: The TS PID used for the PSI packets. All channels will share the
  * same PCR.
  * @transport_stream_id: The transport stream ID
+ * @network_id: The network ID
+ * @network_name: The network name
  * @priv: Private data.
  */
 struct vidtv_mux {
@@ -128,6 +137,8 @@ struct vidtv_mux {
 
        u16 pcr_pid;
        u16 transport_stream_id;
+       u16 network_id;
+       char *network_name;
        void *priv;
 };
 
@@ -142,6 +153,8 @@ struct vidtv_mux {
  * same PCR.
  * @transport_stream_id: The transport stream ID
  * @channels: an optional list of channels to use
+ * @network_id: The network ID
+ * @network_name: The network name
  * @priv: Private data.
  */
 struct vidtv_mux_init_args {
@@ -153,12 +166,14 @@ struct vidtv_mux_init_args {
        u16 pcr_pid;
        u16 transport_stream_id;
        struct vidtv_channel *channels;
+       u16 network_id;
+       char *network_name;
        void *priv;
 };
 
 struct vidtv_mux *vidtv_mux_init(struct dvb_frontend *fe,
                                 struct device *dev,
-                                struct vidtv_mux_init_args args);
+                                struct vidtv_mux_init_args *args);
 void vidtv_mux_destroy(struct vidtv_mux *m);
 
 void vidtv_mux_start_thread(struct vidtv_mux *m);
index 1c75f88..782e5e7 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/types.h>
 #include <linux/printk.h>
 #include <linux/ratelimit.h>
-#include <asm/byteorder.h>
 
 #include "vidtv_pes.h"
 #include "vidtv_common.h"
@@ -57,7 +56,7 @@ static u32 vidtv_pes_h_get_len(bool send_pts, bool send_dts)
        return len;
 }
 
-static u32 vidtv_pes_write_header_stuffing(struct pes_header_write_args args)
+static u32 vidtv_pes_write_header_stuffing(struct pes_header_write_args *args)
 {
        /*
         * This is a fixed 8-bit value equal to '0xFF' that can be inserted
@@ -65,20 +64,20 @@ static u32 vidtv_pes_write_header_stuffing(struct pes_header_write_args args)
         * It is discarded by the decoder. No more than 32 stuffing bytes shall
         * be present in one PES packet header.
         */
-       if (args.n_pes_h_s_bytes > PES_HEADER_MAX_STUFFING_BYTES) {
+       if (args->n_pes_h_s_bytes > PES_HEADER_MAX_STUFFING_BYTES) {
                pr_warn_ratelimited("More than %d stuffing bytes in PES packet header\n",
                                    PES_HEADER_MAX_STUFFING_BYTES);
-               args.n_pes_h_s_bytes = PES_HEADER_MAX_STUFFING_BYTES;
+               args->n_pes_h_s_bytes = PES_HEADER_MAX_STUFFING_BYTES;
        }
 
-       return vidtv_memset(args.dest_buf,
-                           args.dest_offset,
-                           args.dest_buf_sz,
+       return vidtv_memset(args->dest_buf,
+                           args->dest_offset,
+                           args->dest_buf_sz,
                            TS_FILL_BYTE,
-                           args.n_pes_h_s_bytes);
+                           args->n_pes_h_s_bytes);
 }
 
-static u32 vidtv_pes_write_pts_dts(struct pes_header_write_args args)
+static u32 vidtv_pes_write_pts_dts(struct pes_header_write_args *args)
 {
        u32 nbytes = 0;  /* the number of bytes written by this function */
 
@@ -90,7 +89,7 @@ static u32 vidtv_pes_write_pts_dts(struct pes_header_write_args args)
        u64 mask2;
        u64 mask3;
 
-       if (!args.send_pts && args.send_dts)
+       if (!args->send_pts && args->send_dts)
                return 0;
 
        mask1 = GENMASK_ULL(32, 30);
@@ -98,80 +97,81 @@ static u32 vidtv_pes_write_pts_dts(struct pes_header_write_args args)
        mask3 = GENMASK_ULL(14, 0);
 
        /* see ISO/IEC 13818-1 : 2000 p. 32 */
-       if (args.send_pts && args.send_dts) {
-               pts_dts.pts1 = (0x3 << 4) | ((args.pts & mask1) >> 29) | 0x1;
-               pts_dts.pts2 = cpu_to_be16(((args.pts & mask2) >> 14) | 0x1);
-               pts_dts.pts3 = cpu_to_be16(((args.pts & mask3) << 1) | 0x1);
+       if (args->send_pts && args->send_dts) {
+               pts_dts.pts1 = (0x3 << 4) | ((args->pts & mask1) >> 29) | 0x1;
+               pts_dts.pts2 = cpu_to_be16(((args->pts & mask2) >> 14) | 0x1);
+               pts_dts.pts3 = cpu_to_be16(((args->pts & mask3) << 1) | 0x1);
 
-               pts_dts.dts1 = (0x1 << 4) | ((args.dts & mask1) >> 29) | 0x1;
-               pts_dts.dts2 = cpu_to_be16(((args.dts & mask2) >> 14) | 0x1);
-               pts_dts.dts3 = cpu_to_be16(((args.dts & mask3) << 1) | 0x1);
+               pts_dts.dts1 = (0x1 << 4) | ((args->dts & mask1) >> 29) | 0x1;
+               pts_dts.dts2 = cpu_to_be16(((args->dts & mask2) >> 14) | 0x1);
+               pts_dts.dts3 = cpu_to_be16(((args->dts & mask3) << 1) | 0x1);
 
                op = &pts_dts;
                op_sz = sizeof(pts_dts);
 
-       } else if (args.send_pts) {
-               pts.pts1 = (0x1 << 5) | ((args.pts & mask1) >> 29) | 0x1;
-               pts.pts2 = cpu_to_be16(((args.pts & mask2) >> 14) | 0x1);
-               pts.pts3 = cpu_to_be16(((args.pts & mask3) << 1) | 0x1);
+       } else if (args->send_pts) {
+               pts.pts1 = (0x1 << 5) | ((args->pts & mask1) >> 29) | 0x1;
+               pts.pts2 = cpu_to_be16(((args->pts & mask2) >> 14) | 0x1);
+               pts.pts3 = cpu_to_be16(((args->pts & mask3) << 1) | 0x1);
 
                op = &pts;
                op_sz = sizeof(pts);
        }
 
        /* copy PTS/DTS optional */
-       nbytes += vidtv_memcpy(args.dest_buf,
-                              args.dest_offset + nbytes,
-                              args.dest_buf_sz,
+       nbytes += vidtv_memcpy(args->dest_buf,
+                              args->dest_offset + nbytes,
+                              args->dest_buf_sz,
                               op,
                               op_sz);
 
        return nbytes;
 }
 
-static u32 vidtv_pes_write_h(struct pes_header_write_args args)
+static u32 vidtv_pes_write_h(struct pes_header_write_args *args)
 {
        u32 nbytes = 0;  /* the number of bytes written by this function */
 
        struct vidtv_mpeg_pes pes_header          = {};
        struct vidtv_pes_optional pes_optional    = {};
-       struct pes_header_write_args pts_dts_args = args;
-       u32 stream_id = (args.encoder_id == S302M) ? PRIVATE_STREAM_1_ID : args.stream_id;
+       struct pes_header_write_args pts_dts_args;
+       u32 stream_id = (args->encoder_id == S302M) ? PRIVATE_STREAM_1_ID : args->stream_id;
        u16 pes_opt_bitfield = 0x01 << 15;
 
        pes_header.bitfield = cpu_to_be32((PES_START_CODE_PREFIX << 8) | stream_id);
 
-       pes_header.length = cpu_to_be16(vidtv_pes_op_get_len(args.send_pts,
-                                                            args.send_dts) +
-                                                            args.access_unit_len);
+       pes_header.length = cpu_to_be16(vidtv_pes_op_get_len(args->send_pts,
+                                                            args->send_dts) +
+                                                            args->access_unit_len);
 
-       if (args.send_pts && args.send_dts)
+       if (args->send_pts && args->send_dts)
                pes_opt_bitfield |= (0x3 << 6);
-       else if (args.send_pts)
+       else if (args->send_pts)
                pes_opt_bitfield |= (0x1 << 7);
 
        pes_optional.bitfield = cpu_to_be16(pes_opt_bitfield);
-       pes_optional.length = vidtv_pes_op_get_len(args.send_pts, args.send_dts) +
-                             args.n_pes_h_s_bytes -
+       pes_optional.length = vidtv_pes_op_get_len(args->send_pts, args->send_dts) +
+                             args->n_pes_h_s_bytes -
                              sizeof(struct vidtv_pes_optional);
 
        /* copy header */
-       nbytes += vidtv_memcpy(args.dest_buf,
-                              args.dest_offset + nbytes,
-                              args.dest_buf_sz,
+       nbytes += vidtv_memcpy(args->dest_buf,
+                              args->dest_offset + nbytes,
+                              args->dest_buf_sz,
                               &pes_header,
                               sizeof(pes_header));
 
        /* copy optional header bits */
-       nbytes += vidtv_memcpy(args.dest_buf,
-                              args.dest_offset + nbytes,
-                              args.dest_buf_sz,
+       nbytes += vidtv_memcpy(args->dest_buf,
+                              args->dest_offset + nbytes,
+                              args->dest_buf_sz,
                               &pes_optional,
                               sizeof(pes_optional));
 
        /* copy the timing information */
-       pts_dts_args.dest_offset = args.dest_offset + nbytes;
-       nbytes += vidtv_pes_write_pts_dts(pts_dts_args);
+       pts_dts_args = *args;
+       pts_dts_args.dest_offset = args->dest_offset + nbytes;
+       nbytes += vidtv_pes_write_pts_dts(&pts_dts_args);
 
        /* write any PES header stuffing */
        nbytes += vidtv_pes_write_header_stuffing(args);
@@ -300,14 +300,31 @@ static u32 vidtv_pes_write_ts_h(struct pes_ts_header_write_args args,
        return nbytes;
 }
 
-u32 vidtv_pes_write_into(struct pes_write_args args)
+u32 vidtv_pes_write_into(struct pes_write_args *args)
 {
-       u32 unaligned_bytes = (args.dest_offset % TS_PACKET_LEN);
-       struct pes_ts_header_write_args ts_header_args = {};
-       struct pes_header_write_args pes_header_args = {};
-       u32 remaining_len = args.access_unit_len;
+       u32 unaligned_bytes = (args->dest_offset % TS_PACKET_LEN);
+       struct pes_ts_header_write_args ts_header_args = {
+               .dest_buf               = args->dest_buf,
+               .dest_buf_sz            = args->dest_buf_sz,
+               .pid                    = args->pid,
+               .pcr                    = args->pcr,
+               .continuity_counter     = args->continuity_counter,
+       };
+       struct pes_header_write_args pes_header_args = {
+               .dest_buf               = args->dest_buf,
+               .dest_buf_sz            = args->dest_buf_sz,
+               .encoder_id             = args->encoder_id,
+               .send_pts               = args->send_pts,
+               .pts                    = args->pts,
+               .send_dts               = args->send_dts,
+               .dts                    = args->dts,
+               .stream_id              = args->stream_id,
+               .n_pes_h_s_bytes        = args->n_pes_h_s_bytes,
+               .access_unit_len        = args->access_unit_len,
+       };
+       u32 remaining_len = args->access_unit_len;
        bool wrote_pes_header = false;
-       u64 last_pcr = args.pcr;
+       u64 last_pcr = args->pcr;
        bool need_pcr = true;
        u32 available_space;
        u32 payload_size;
@@ -318,25 +335,13 @@ u32 vidtv_pes_write_into(struct pes_write_args args)
                pr_warn_ratelimited("buffer is misaligned, while starting PES\n");
 
                /* forcibly align and hope for the best */
-               nbytes += vidtv_memset(args.dest_buf,
-                                      args.dest_offset + nbytes,
-                                      args.dest_buf_sz,
+               nbytes += vidtv_memset(args->dest_buf,
+                                      args->dest_offset + nbytes,
+                                      args->dest_buf_sz,
                                       TS_FILL_BYTE,
                                       TS_PACKET_LEN - unaligned_bytes);
        }
 
-       if (args.send_dts && !args.send_pts) {
-               pr_warn_ratelimited("forbidden value '01' for PTS_DTS flags\n");
-               args.send_pts = true;
-               args.pts      = args.dts;
-       }
-
-       /* see SMPTE 302M clause 6.4 */
-       if (args.encoder_id == S302M) {
-               args.send_dts = false;
-               args.send_pts = true;
-       }
-
        while (remaining_len) {
                available_space = TS_PAYLOAD_LEN;
                /*
@@ -345,14 +350,14 @@ u32 vidtv_pes_write_into(struct pes_write_args args)
                 * the space needed for the TS header _and_ for the PES header
                 */
                if (!wrote_pes_header)
-                       available_space -= vidtv_pes_h_get_len(args.send_pts,
-                                                              args.send_dts);
+                       available_space -= vidtv_pes_h_get_len(args->send_pts,
+                                                              args->send_dts);
 
                /*
                 * if the encoder has inserted stuffing bytes in the PES
                 * header, account for them.
                 */
-               available_space -= args.n_pes_h_s_bytes;
+               available_space -= args->n_pes_h_s_bytes;
 
                /* Take the extra adaptation into account if need to send PCR */
                if (need_pcr) {
@@ -387,14 +392,9 @@ u32 vidtv_pes_write_into(struct pes_write_args args)
                }
 
                /* write ts header */
-               ts_header_args.dest_buf           = args.dest_buf;
-               ts_header_args.dest_offset        = args.dest_offset + nbytes;
-               ts_header_args.dest_buf_sz        = args.dest_buf_sz;
-               ts_header_args.pid                = args.pid;
-               ts_header_args.pcr                = args.pcr;
-               ts_header_args.continuity_counter = args.continuity_counter;
-               ts_header_args.wrote_pes_header   = wrote_pes_header;
-               ts_header_args.n_stuffing_bytes   = stuff_bytes;
+               ts_header_args.dest_offset = args->dest_offset + nbytes;
+               ts_header_args.wrote_pes_header = wrote_pes_header;
+               ts_header_args.n_stuffing_bytes = stuff_bytes;
 
                nbytes += vidtv_pes_write_ts_h(ts_header_args, need_pcr,
                                               &last_pcr);
@@ -403,33 +403,20 @@ u32 vidtv_pes_write_into(struct pes_write_args args)
 
                if (!wrote_pes_header) {
                        /* write the PES header only once */
-                       pes_header_args.dest_buf        = args.dest_buf;
-
-                       pes_header_args.dest_offset     = args.dest_offset +
-                                                         nbytes;
-
-                       pes_header_args.dest_buf_sz     = args.dest_buf_sz;
-                       pes_header_args.encoder_id      = args.encoder_id;
-                       pes_header_args.send_pts        = args.send_pts;
-                       pes_header_args.pts             = args.pts;
-                       pes_header_args.send_dts        = args.send_dts;
-                       pes_header_args.dts             = args.dts;
-                       pes_header_args.stream_id       = args.stream_id;
-                       pes_header_args.n_pes_h_s_bytes = args.n_pes_h_s_bytes;
-                       pes_header_args.access_unit_len = args.access_unit_len;
-
-                       nbytes           += vidtv_pes_write_h(pes_header_args);
-                       wrote_pes_header  = true;
+                       pes_header_args.dest_offset = args->dest_offset +
+                                                     nbytes;
+                       nbytes += vidtv_pes_write_h(&pes_header_args);
+                       wrote_pes_header = true;
                }
 
                /* write as much of the payload as we possibly can */
-               nbytes += vidtv_memcpy(args.dest_buf,
-                                      args.dest_offset + nbytes,
-                                      args.dest_buf_sz,
-                                      args.from,
+               nbytes += vidtv_memcpy(args->dest_buf,
+                                      args->dest_offset + nbytes,
+                                      args->dest_buf_sz,
+                                      args->from,
                                       payload_size);
 
-               args.from += payload_size;
+               args->from += payload_size;
 
                remaining_len -= payload_size;
        }
index 0ea9e86..963c591 100644 (file)
@@ -14,7 +14,6 @@
 #ifndef VIDTV_PES_H
 #define VIDTV_PES_H
 
-#include <asm/byteorder.h>
 #include <linux/types.h>
 
 #include "vidtv_common.h"
@@ -114,8 +113,10 @@ struct pes_header_write_args {
  * @dest_buf_sz: The size of the dest_buffer
  * @pid: The PID to use for the TS packets.
  * @continuity_counter: Incremented on every new TS packet.
- * @n_pes_h_s_bytes: Padding bytes. Might be used by an encoder if needed, gets
+ * @wrote_pes_header: Flag to indicate that the PES header was written
+ * @n_stuffing_bytes: Padding bytes. Might be used by an encoder if needed, gets
  * discarded by the decoder.
+ * @pcr: counter driven by a 27Mhz clock.
  */
 struct pes_ts_header_write_args {
        void *dest_buf;
@@ -146,6 +147,7 @@ struct pes_ts_header_write_args {
  * @dts: DTS value to send.
  * @n_pes_h_s_bytes: Padding bytes. Might be used by an encoder if needed, gets
  * discarded by the decoder.
+ * @pcr: counter driven by a 27Mhz clock.
  */
 struct pes_write_args {
        void *dest_buf;
@@ -186,6 +188,6 @@ struct pes_write_args {
  * equal to the size of the access unit, since we need space for PES headers, TS headers
  * and padding bytes, if any.
  */
-u32 vidtv_pes_write_into(struct pes_write_args args);
+u32 vidtv_pes_write_into(struct pes_write_args *args);
 
 #endif // VIDTV_PES_H
index 82cf67d..4511a2a 100644 (file)
@@ -6,31 +6,31 @@
  * technically be broken into one or more sections, we do not do this here,
  * hence 'table' and 'section' are interchangeable for vidtv.
  *
- * This code currently supports three tables: PAT, PMT and SDT. These are the
- * bare minimum to get userspace to recognize our MPEG transport stream. It can
- * be extended to support more PSI tables in the future.
- *
  * Copyright (C) 2020 Daniel W. S. Almeida
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s, %d: " fmt, __func__, __LINE__
 
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/slab.h>
+#include <linux/bcd.h>
 #include <linux/crc32.h>
-#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ktime.h>
 #include <linux/printk.h>
 #include <linux/ratelimit.h>
+#include <linux/slab.h>
 #include <linux/string.h>
-#include <asm/byteorder.h>
+#include <linux/string.h>
+#include <linux/time.h>
+#include <linux/types.h>
 
-#include "vidtv_psi.h"
 #include "vidtv_common.h"
+#include "vidtv_psi.h"
 #include "vidtv_ts.h"
 
 #define CRC_SIZE_IN_BYTES 4
 #define MAX_VERSION_NUM 32
+#define INITIAL_CRC 0xffffffff
+#define ISO_LANGUAGE_CODE_LEN 3
 
 static const u32 CRC_LUT[256] = {
        /* from libdvbv5 */
@@ -79,7 +79,7 @@ static const u32 CRC_LUT[256] = {
        0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4
 };
 
-static inline u32 dvb_crc32(u32 crc, u8 *data, u32 len)
+static u32 dvb_crc32(u32 crc, u8 *data, u32 len)
 {
        /* from libdvbv5 */
        while (len--)
@@ -92,40 +92,7 @@ static void vidtv_psi_update_version_num(struct vidtv_psi_table_header *h)
        h->version++;
 }
 
-static inline u16 vidtv_psi_sdt_serv_get_desc_loop_len(struct vidtv_psi_table_sdt_service *s)
-{
-       u16 mask;
-       u16 ret;
-
-       mask = GENMASK(11, 0);
-
-       ret = be16_to_cpu(s->bitfield) & mask;
-       return ret;
-}
-
-static inline u16 vidtv_psi_pmt_stream_get_desc_loop_len(struct vidtv_psi_table_pmt_stream *s)
-{
-       u16 mask;
-       u16 ret;
-
-       mask = GENMASK(9, 0);
-
-       ret = be16_to_cpu(s->bitfield2) & mask;
-       return ret;
-}
-
-static inline u16 vidtv_psi_pmt_get_desc_loop_len(struct vidtv_psi_table_pmt *p)
-{
-       u16 mask;
-       u16 ret;
-
-       mask = GENMASK(9, 0);
-
-       ret = be16_to_cpu(p->bitfield2) & mask;
-       return ret;
-}
-
-static inline u16 vidtv_psi_get_sec_len(struct vidtv_psi_table_header *h)
+static u16 vidtv_psi_get_sec_len(struct vidtv_psi_table_header *h)
 {
        u16 mask;
        u16 ret;
@@ -136,7 +103,7 @@ static inline u16 vidtv_psi_get_sec_len(struct vidtv_psi_table_header *h)
        return ret;
 }
 
-inline u16 vidtv_psi_get_pat_program_pid(struct vidtv_psi_table_pat_program *p)
+u16 vidtv_psi_get_pat_program_pid(struct vidtv_psi_table_pat_program *p)
 {
        u16 mask;
        u16 ret;
@@ -147,7 +114,7 @@ inline u16 vidtv_psi_get_pat_program_pid(struct vidtv_psi_table_pat_program *p)
        return ret;
 }
 
-inline u16 vidtv_psi_pmt_stream_get_elem_pid(struct vidtv_psi_table_pmt_stream *s)
+u16 vidtv_psi_pmt_stream_get_elem_pid(struct vidtv_psi_table_pmt_stream *s)
 {
        u16 mask;
        u16 ret;
@@ -158,10 +125,11 @@ inline u16 vidtv_psi_pmt_stream_get_elem_pid(struct vidtv_psi_table_pmt_stream *
        return ret;
 }
 
-static inline void vidtv_psi_set_desc_loop_len(__be16 *bitfield, u16 new_len, u8 desc_len_nbits)
+static void vidtv_psi_set_desc_loop_len(__be16 *bitfield, u16 new_len,
+                                       u8 desc_len_nbits)
 {
-       u16 mask;
        __be16 new;
+       u16 mask;
 
        mask = GENMASK(15, desc_len_nbits);
 
@@ -188,90 +156,81 @@ static void vidtv_psi_set_sec_len(struct vidtv_psi_table_header *h, u16 new_len)
        h->bitfield = new;
 }
 
-static u32 vidtv_psi_ts_psi_write_into(struct psi_write_args args)
+/*
+ * Packetize PSI sections into TS packets:
+ * push a TS header (4bytes) every 184 bytes
+ * manage the continuity_counter
+ * add stuffing (i.e. padding bytes) after the CRC
+ */
+static u32 vidtv_psi_ts_psi_write_into(struct psi_write_args *args)
 {
-       /*
-        * Packetize PSI sections into TS packets:
-        * push a TS header (4bytes) every 184 bytes
-        * manage the continuity_counter
-        * add stuffing (i.e. padding bytes) after the CRC
-        */
-
-       u32 nbytes_past_boundary = (args.dest_offset % TS_PACKET_LEN);
+       struct vidtv_mpeg_ts ts_header = {
+               .sync_byte = TS_SYNC_BYTE,
+               .bitfield = cpu_to_be16((args->new_psi_section << 14) | args->pid),
+               .scrambling = 0,
+               .payload = 1,
+               .adaptation_field = 0, /* no adaptation field */
+       };
+       u32 nbytes_past_boundary = (args->dest_offset % TS_PACKET_LEN);
        bool aligned = (nbytes_past_boundary == 0);
-       struct vidtv_mpeg_ts ts_header = {};
-
-       /* number of bytes written by this function */
-       u32 nbytes = 0;
-       /* how much there is left to write */
-       u32 remaining_len = args.len;
-       /* how much we can be written in this packet */
+       u32 remaining_len = args->len;
        u32 payload_write_len = 0;
-       /* where we are in the source */
        u32 payload_offset = 0;
+       u32 nbytes = 0;
 
-       const u16 PAYLOAD_START = args.new_psi_section;
-
-       if (!args.crc && !args.is_crc)
+       if (!args->crc && !args->is_crc)
                pr_warn_ratelimited("Missing CRC for chunk\n");
 
-       if (args.crc)
-               *args.crc = dvb_crc32(*args.crc, args.from, args.len);
+       if (args->crc)
+               *args->crc = dvb_crc32(*args->crc, args->from, args->len);
 
-       if (args.new_psi_section && !aligned) {
+       if (args->new_psi_section && !aligned) {
                pr_warn_ratelimited("Cannot write a new PSI section in a misaligned buffer\n");
 
                /* forcibly align and hope for the best */
-               nbytes += vidtv_memset(args.dest_buf,
-                                      args.dest_offset + nbytes,
-                                      args.dest_buf_sz,
+               nbytes += vidtv_memset(args->dest_buf,
+                                      args->dest_offset + nbytes,
+                                      args->dest_buf_sz,
                                       TS_FILL_BYTE,
                                       TS_PACKET_LEN - nbytes_past_boundary);
        }
 
        while (remaining_len) {
-               nbytes_past_boundary = (args.dest_offset + nbytes) % TS_PACKET_LEN;
+               nbytes_past_boundary = (args->dest_offset + nbytes) % TS_PACKET_LEN;
                aligned = (nbytes_past_boundary == 0);
 
                if (aligned) {
                        /* if at a packet boundary, write a new TS header */
-                       ts_header.sync_byte = TS_SYNC_BYTE;
-                       ts_header.bitfield = cpu_to_be16((PAYLOAD_START << 14) | args.pid);
-                       ts_header.scrambling = 0;
-                       ts_header.continuity_counter = *args.continuity_counter;
-                       ts_header.payload = 1;
-                       /* no adaptation field */
-                       ts_header.adaptation_field = 0;
-
-                       /* copy the header */
-                       nbytes += vidtv_memcpy(args.dest_buf,
-                                              args.dest_offset + nbytes,
-                                              args.dest_buf_sz,
+                       ts_header.continuity_counter = *args->continuity_counter;
+
+                       nbytes += vidtv_memcpy(args->dest_buf,
+                                              args->dest_offset + nbytes,
+                                              args->dest_buf_sz,
                                               &ts_header,
                                               sizeof(ts_header));
                        /*
                         * This will trigger a discontinuity if the buffer is full,
                         * effectively dropping the packet.
                         */
-                       vidtv_ts_inc_cc(args.continuity_counter);
+                       vidtv_ts_inc_cc(args->continuity_counter);
                }
 
                /* write the pointer_field in the first byte of the payload */
-               if (args.new_psi_section)
-                       nbytes += vidtv_memset(args.dest_buf,
-                                              args.dest_offset + nbytes,
-                                              args.dest_buf_sz,
+               if (args->new_psi_section)
+                       nbytes += vidtv_memset(args->dest_buf,
+                                              args->dest_offset + nbytes,
+                                              args->dest_buf_sz,
                                               0x0,
                                               1);
 
                /* write as much of the payload as possible */
-               nbytes_past_boundary = (args.dest_offset + nbytes) % TS_PACKET_LEN;
+               nbytes_past_boundary = (args->dest_offset + nbytes) % TS_PACKET_LEN;
                payload_write_len = min(TS_PACKET_LEN - nbytes_past_boundary, remaining_len);
 
-               nbytes += vidtv_memcpy(args.dest_buf,
-                                      args.dest_offset + nbytes,
-                                      args.dest_buf_sz,
-                                      args.from + payload_offset,
+               nbytes += vidtv_memcpy(args->dest_buf,
+                                      args->dest_offset + nbytes,
+                                      args->dest_buf_sz,
+                                      args->from + payload_offset,
                                       payload_write_len);
 
                /* 'payload_write_len' written from a total of 'len' requested*/
@@ -283,37 +242,45 @@ static u32 vidtv_psi_ts_psi_write_into(struct psi_write_args args)
         * fill the rest of the packet if there is any remaining space unused
         */
 
-       nbytes_past_boundary = (args.dest_offset + nbytes) % TS_PACKET_LEN;
+       nbytes_past_boundary = (args->dest_offset + nbytes) % TS_PACKET_LEN;
 
-       if (args.is_crc)
-               nbytes += vidtv_memset(args.dest_buf,
-                                      args.dest_offset + nbytes,
-                                      args.dest_buf_sz,
+       if (args->is_crc)
+               nbytes += vidtv_memset(args->dest_buf,
+                                      args->dest_offset + nbytes,
+                                      args->dest_buf_sz,
                                       TS_FILL_BYTE,
                                       TS_PACKET_LEN - nbytes_past_boundary);
 
        return nbytes;
 }
 
-static u32 table_section_crc32_write_into(struct crc32_write_args args)
+static u32 table_section_crc32_write_into(struct crc32_write_args *args)
 {
+       struct psi_write_args psi_args = {
+               .dest_buf           = args->dest_buf,
+               .from               = &args->crc,
+               .len                = CRC_SIZE_IN_BYTES,
+               .dest_offset        = args->dest_offset,
+               .pid                = args->pid,
+               .new_psi_section    = false,
+               .continuity_counter = args->continuity_counter,
+               .is_crc             = true,
+               .dest_buf_sz        = args->dest_buf_sz,
+       };
+
        /* the CRC is the last entry in the section */
-       u32 nbytes = 0;
-       struct psi_write_args psi_args = {};
 
-       psi_args.dest_buf           = args.dest_buf;
-       psi_args.from               = &args.crc;
-       psi_args.len                = CRC_SIZE_IN_BYTES;
-       psi_args.dest_offset        = args.dest_offset;
-       psi_args.pid                = args.pid;
-       psi_args.new_psi_section    = false;
-       psi_args.continuity_counter = args.continuity_counter;
-       psi_args.is_crc             = true;
-       psi_args.dest_buf_sz        = args.dest_buf_sz;
+       return vidtv_psi_ts_psi_write_into(&psi_args);
+}
 
-       nbytes += vidtv_psi_ts_psi_write_into(psi_args);
+static void vidtv_psi_desc_chain(struct vidtv_psi_desc *head, struct vidtv_psi_desc *desc)
+{
+       if (head) {
+               while (head->next)
+                       head = head->next;
 
-       return nbytes;
+               head->next = desc;
+       }
 }
 
 struct vidtv_psi_desc_service *vidtv_psi_service_desc_init(struct vidtv_psi_desc *head,
@@ -326,6 +293,8 @@ struct vidtv_psi_desc_service *vidtv_psi_service_desc_init(struct vidtv_psi_desc
        u32 provider_name_len = provider_name ? strlen(provider_name) : 0;
 
        desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+       if (!desc)
+               return NULL;
 
        desc->type = SERVICE_DESCRIPTOR;
 
@@ -347,12 +316,7 @@ struct vidtv_psi_desc_service *vidtv_psi_service_desc_init(struct vidtv_psi_desc
        if (provider_name && provider_name_len)
                desc->provider_name = kstrdup(provider_name, GFP_KERNEL);
 
-       if (head) {
-               while (head->next)
-                       head = head->next;
-
-               head->next = (struct vidtv_psi_desc *)desc;
-       }
+       vidtv_psi_desc_chain(head, (struct vidtv_psi_desc *)desc);
        return desc;
 }
 
@@ -365,6 +329,8 @@ struct vidtv_psi_desc_registration
        struct vidtv_psi_desc_registration *desc;
 
        desc = kzalloc(sizeof(*desc) + sizeof(format_id) + additional_info_len, GFP_KERNEL);
+       if (!desc)
+               return NULL;
 
        desc->type = REGISTRATION_DESCRIPTOR;
 
@@ -378,44 +344,178 @@ struct vidtv_psi_desc_registration
                       additional_ident_info,
                       additional_info_len);
 
-       if (head) {
-               while (head->next)
-                       head = head->next;
+       vidtv_psi_desc_chain(head, (struct vidtv_psi_desc *)desc);
+       return desc;
+}
 
-               head->next = (struct vidtv_psi_desc *)desc;
+struct vidtv_psi_desc_network_name
+*vidtv_psi_network_name_desc_init(struct vidtv_psi_desc *head, char *network_name)
+{
+       u32 network_name_len = network_name ? strlen(network_name) : 0;
+       struct vidtv_psi_desc_network_name *desc;
+
+       desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+       if (!desc)
+               return NULL;
+
+       desc->type = NETWORK_NAME_DESCRIPTOR;
+
+       desc->length = network_name_len;
+
+       if (network_name && network_name_len)
+               desc->network_name = kstrdup(network_name, GFP_KERNEL);
+
+       vidtv_psi_desc_chain(head, (struct vidtv_psi_desc *)desc);
+       return desc;
+}
+
+struct vidtv_psi_desc_service_list
+*vidtv_psi_service_list_desc_init(struct vidtv_psi_desc *head,
+                                 struct vidtv_psi_desc_service_list_entry *entry)
+{
+       struct vidtv_psi_desc_service_list_entry *curr_e = NULL;
+       struct vidtv_psi_desc_service_list_entry *head_e = NULL;
+       struct vidtv_psi_desc_service_list_entry *prev_e = NULL;
+       struct vidtv_psi_desc_service_list *desc;
+       u16 length = 0;
+
+       desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+       if (!desc)
+               return NULL;
+
+       desc->type = SERVICE_LIST_DESCRIPTOR;
+
+       while (entry) {
+               curr_e = kzalloc(sizeof(*curr_e), GFP_KERNEL);
+               if (!curr_e) {
+                       while (head_e) {
+                               curr_e = head_e;
+                               head_e = head_e->next;
+                               kfree(curr_e);
+                       }
+                       kfree(desc);
+                       return NULL;
+               }
+
+               curr_e->service_id = entry->service_id;
+               curr_e->service_type = entry->service_type;
+
+               length += sizeof(struct vidtv_psi_desc_service_list_entry) -
+                         sizeof(struct vidtv_psi_desc_service_list_entry *);
+
+               if (!head_e)
+                       head_e = curr_e;
+               if (prev_e)
+                       prev_e->next = curr_e;
+
+               prev_e = curr_e;
+               entry = entry->next;
        }
 
+       desc->length = length;
+       desc->service_list = head_e;
+
+       vidtv_psi_desc_chain(head, (struct vidtv_psi_desc *)desc);
+       return desc;
+}
+
+struct vidtv_psi_desc_short_event
+*vidtv_psi_short_event_desc_init(struct vidtv_psi_desc *head,
+                                char *iso_language_code,
+                                char *event_name,
+                                char *text)
+{
+       u32 iso_len =  iso_language_code ? strlen(iso_language_code) : 0;
+       u32 event_name_len = event_name ? strlen(event_name) : 0;
+       struct vidtv_psi_desc_short_event *desc;
+       u32 text_len =  text ? strlen(text) : 0;
+
+       desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+       if (!desc)
+               return NULL;
+
+       desc->type = SHORT_EVENT_DESCRIPTOR;
+
+       desc->length = ISO_LANGUAGE_CODE_LEN +
+                      sizeof_field(struct vidtv_psi_desc_short_event, event_name_len) +
+                      event_name_len +
+                      sizeof_field(struct vidtv_psi_desc_short_event, text_len) +
+                      text_len;
+
+       desc->event_name_len = event_name_len;
+       desc->text_len = text_len;
+
+       if (iso_len != ISO_LANGUAGE_CODE_LEN)
+               iso_language_code = "eng";
+
+       desc->iso_language_code = kstrdup(iso_language_code, GFP_KERNEL);
+
+       if (event_name && event_name_len)
+               desc->event_name = kstrdup(event_name, GFP_KERNEL);
+
+       if (text && text_len)
+               desc->text = kstrdup(text, GFP_KERNEL);
+
+       vidtv_psi_desc_chain(head, (struct vidtv_psi_desc *)desc);
        return desc;
 }
 
 struct vidtv_psi_desc *vidtv_psi_desc_clone(struct vidtv_psi_desc *desc)
 {
+       struct vidtv_psi_desc_network_name *desc_network_name;
+       struct vidtv_psi_desc_service_list *desc_service_list;
+       struct vidtv_psi_desc_short_event  *desc_short_event;
+       struct vidtv_psi_desc_service *service;
        struct vidtv_psi_desc *head = NULL;
        struct vidtv_psi_desc *prev = NULL;
        struct vidtv_psi_desc *curr = NULL;
 
-       struct vidtv_psi_desc_service *service;
-
        while (desc) {
                switch (desc->type) {
                case SERVICE_DESCRIPTOR:
                        service = (struct vidtv_psi_desc_service *)desc;
                        curr = (struct vidtv_psi_desc *)
-                               vidtv_psi_service_desc_init(head,
-                                                           service->service_type,
-                                                           service->service_name,
-                                                           service->provider_name);
+                              vidtv_psi_service_desc_init(head,
+                                                          service->service_type,
+                                                          service->service_name,
+                                                          service->provider_name);
+               break;
+
+               case NETWORK_NAME_DESCRIPTOR:
+                       desc_network_name = (struct vidtv_psi_desc_network_name *)desc;
+                       curr = (struct vidtv_psi_desc *)
+                              vidtv_psi_network_name_desc_init(head,
+                                                               desc_network_name->network_name);
+               break;
+
+               case SERVICE_LIST_DESCRIPTOR:
+                       desc_service_list = (struct vidtv_psi_desc_service_list *)desc;
+                       curr = (struct vidtv_psi_desc *)
+                              vidtv_psi_service_list_desc_init(head,
+                                                               desc_service_list->service_list);
+               break;
+
+               case SHORT_EVENT_DESCRIPTOR:
+                       desc_short_event = (struct vidtv_psi_desc_short_event *)desc;
+                       curr = (struct vidtv_psi_desc *)
+                              vidtv_psi_short_event_desc_init(head,
+                                                              desc_short_event->iso_language_code,
+                                                              desc_short_event->event_name,
+                                                              desc_short_event->text);
                break;
 
                case REGISTRATION_DESCRIPTOR:
                default:
                        curr = kzalloc(sizeof(*desc) + desc->length, GFP_KERNEL);
+                       if (!curr)
+                               return NULL;
                        memcpy(curr, desc, sizeof(*desc) + desc->length);
-               break;
-       }
+               }
 
-               if (curr)
-                       curr->next = NULL;
+               if (!curr)
+                       return NULL;
+
+               curr->next = NULL;
                if (!head)
                        head = curr;
                if (prev)
@@ -430,6 +530,8 @@ struct vidtv_psi_desc *vidtv_psi_desc_clone(struct vidtv_psi_desc *desc)
 
 void vidtv_psi_desc_destroy(struct vidtv_psi_desc *desc)
 {
+       struct vidtv_psi_desc_service_list_entry *sl_entry_tmp = NULL;
+       struct vidtv_psi_desc_service_list_entry *sl_entry = NULL;
        struct vidtv_psi_desc *curr = desc;
        struct vidtv_psi_desc *tmp  = NULL;
 
@@ -447,6 +549,25 @@ void vidtv_psi_desc_destroy(struct vidtv_psi_desc *desc)
                        /* nothing to do */
                        break;
 
+               case NETWORK_NAME_DESCRIPTOR:
+                       kfree(((struct vidtv_psi_desc_network_name *)tmp)->network_name);
+                       break;
+
+               case SERVICE_LIST_DESCRIPTOR:
+                       sl_entry = ((struct vidtv_psi_desc_service_list *)tmp)->service_list;
+                       while (sl_entry) {
+                               sl_entry_tmp = sl_entry;
+                               sl_entry = sl_entry->next;
+                               kfree(sl_entry_tmp);
+                       }
+                       break;
+
+               case SHORT_EVENT_DESCRIPTOR:
+                       kfree(((struct vidtv_psi_desc_short_event *)tmp)->iso_language_code);
+                       kfree(((struct vidtv_psi_desc_short_event *)tmp)->event_name);
+                       kfree(((struct vidtv_psi_desc_short_event *)tmp)->text);
+               break;
+
                default:
                        pr_warn_ratelimited("Possible leak: not handling descriptor type %d\n",
                                            tmp->type);
@@ -513,63 +634,119 @@ void vidtv_sdt_desc_assign(struct vidtv_psi_table_sdt *sdt,
        vidtv_psi_update_version_num(&sdt->header);
 }
 
-static u32 vidtv_psi_desc_write_into(struct desc_write_args args)
+static u32 vidtv_psi_desc_write_into(struct desc_write_args *args)
 {
-       /* the number of bytes written by this function */
+       struct psi_write_args psi_args = {
+               .dest_buf           = args->dest_buf,
+               .from               = &args->desc->type,
+               .pid                = args->pid,
+               .new_psi_section    = false,
+               .continuity_counter = args->continuity_counter,
+               .is_crc             = false,
+               .dest_buf_sz        = args->dest_buf_sz,
+               .crc                = args->crc,
+               .len                = sizeof_field(struct vidtv_psi_desc, type) +
+                                     sizeof_field(struct vidtv_psi_desc, length),
+       };
+       struct vidtv_psi_desc_service_list_entry *serv_list_entry = NULL;
        u32 nbytes = 0;
-       struct psi_write_args psi_args = {};
-
-       psi_args.dest_buf = args.dest_buf;
-       psi_args.from     = &args.desc->type;
 
-       psi_args.len = sizeof_field(struct vidtv_psi_desc, type) +
-                      sizeof_field(struct vidtv_psi_desc, length);
+       psi_args.dest_offset        = args->dest_offset + nbytes;
 
-       psi_args.dest_offset        = args.dest_offset + nbytes;
-       psi_args.pid                = args.pid;
-       psi_args.new_psi_section    = false;
-       psi_args.continuity_counter = args.continuity_counter;
-       psi_args.is_crc             = false;
-       psi_args.dest_buf_sz        = args.dest_buf_sz;
-       psi_args.crc                = args.crc;
+       nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
 
-       nbytes += vidtv_psi_ts_psi_write_into(psi_args);
-
-       switch (args.desc->type) {
+       switch (args->desc->type) {
        case SERVICE_DESCRIPTOR:
-               psi_args.dest_offset = args.dest_offset + nbytes;
+               psi_args.dest_offset = args->dest_offset + nbytes;
                psi_args.len = sizeof_field(struct vidtv_psi_desc_service, service_type) +
                               sizeof_field(struct vidtv_psi_desc_service, provider_name_len);
-               psi_args.from = &((struct vidtv_psi_desc_service *)args.desc)->service_type;
+               psi_args.from = &((struct vidtv_psi_desc_service *)args->desc)->service_type;
 
-               nbytes += vidtv_psi_ts_psi_write_into(psi_args);
+               nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
 
-               psi_args.dest_offset = args.dest_offset + nbytes;
-               psi_args.len = ((struct vidtv_psi_desc_service *)args.desc)->provider_name_len;
-               psi_args.from = ((struct vidtv_psi_desc_service *)args.desc)->provider_name;
+               psi_args.dest_offset = args->dest_offset + nbytes;
+               psi_args.len = ((struct vidtv_psi_desc_service *)args->desc)->provider_name_len;
+               psi_args.from = ((struct vidtv_psi_desc_service *)args->desc)->provider_name;
 
-               nbytes += vidtv_psi_ts_psi_write_into(psi_args);
+               nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
 
-               psi_args.dest_offset = args.dest_offset + nbytes;
+               psi_args.dest_offset = args->dest_offset + nbytes;
                psi_args.len = sizeof_field(struct vidtv_psi_desc_service, service_name_len);
-               psi_args.from = &((struct vidtv_psi_desc_service *)args.desc)->service_name_len;
+               psi_args.from = &((struct vidtv_psi_desc_service *)args->desc)->service_name_len;
 
-               nbytes += vidtv_psi_ts_psi_write_into(psi_args);
+               nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
 
-               psi_args.dest_offset = args.dest_offset + nbytes;
-               psi_args.len = ((struct vidtv_psi_desc_service *)args.desc)->service_name_len;
-               psi_args.from = ((struct vidtv_psi_desc_service *)args.desc)->service_name;
+               psi_args.dest_offset = args->dest_offset + nbytes;
+               psi_args.len = ((struct vidtv_psi_desc_service *)args->desc)->service_name_len;
+               psi_args.from = ((struct vidtv_psi_desc_service *)args->desc)->service_name;
+
+               nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
+               break;
+
+       case NETWORK_NAME_DESCRIPTOR:
+               psi_args.dest_offset = args->dest_offset + nbytes;
+               psi_args.len = args->desc->length;
+               psi_args.from = ((struct vidtv_psi_desc_network_name *)args->desc)->network_name;
+
+               nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
+               break;
+
+       case SERVICE_LIST_DESCRIPTOR:
+               serv_list_entry = ((struct vidtv_psi_desc_service_list *)args->desc)->service_list;
+               while (serv_list_entry) {
+                       psi_args.dest_offset = args->dest_offset + nbytes;
+                       psi_args.len = sizeof(struct vidtv_psi_desc_service_list_entry) -
+                                      sizeof(struct vidtv_psi_desc_service_list_entry *);
+                       psi_args.from = serv_list_entry;
+
+                       nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
+
+                       serv_list_entry = serv_list_entry->next;
+               }
+               break;
+
+       case SHORT_EVENT_DESCRIPTOR:
+               psi_args.dest_offset = args->dest_offset + nbytes;
+               psi_args.len = ISO_LANGUAGE_CODE_LEN;
+               psi_args.from = ((struct vidtv_psi_desc_short_event *)
+                                 args->desc)->iso_language_code;
+
+               nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
+
+               psi_args.dest_offset = args->dest_offset + nbytes;
+               psi_args.len = sizeof_field(struct vidtv_psi_desc_short_event, event_name_len);
+               psi_args.from = &((struct vidtv_psi_desc_short_event *)
+                                 args->desc)->event_name_len;
+
+               nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
+
+               psi_args.dest_offset = args->dest_offset + nbytes;
+               psi_args.len = ((struct vidtv_psi_desc_short_event *)args->desc)->event_name_len;
+               psi_args.from = ((struct vidtv_psi_desc_short_event *)args->desc)->event_name;
+
+               nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
+
+               psi_args.dest_offset = args->dest_offset + nbytes;
+               psi_args.len = sizeof_field(struct vidtv_psi_desc_short_event, text_len);
+               psi_args.from = &((struct vidtv_psi_desc_short_event *)args->desc)->text_len;
+
+               nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
+
+               psi_args.dest_offset = args->dest_offset + nbytes;
+               psi_args.len = ((struct vidtv_psi_desc_short_event *)args->desc)->text_len;
+               psi_args.from = ((struct vidtv_psi_desc_short_event *)args->desc)->text;
+
+               nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
 
-               nbytes += vidtv_psi_ts_psi_write_into(psi_args);
                break;
 
        case REGISTRATION_DESCRIPTOR:
        default:
-               psi_args.dest_offset = args.dest_offset + nbytes;
-               psi_args.len = args.desc->length;
-               psi_args.from = &args.desc->data;
+               psi_args.dest_offset = args->dest_offset + nbytes;
+               psi_args.len = args->desc->length;
+               psi_args.from = &args->desc->data;
 
-               nbytes += vidtv_psi_ts_psi_write_into(psi_args);
+               nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
                break;
        }
 
@@ -577,40 +754,37 @@ static u32 vidtv_psi_desc_write_into(struct desc_write_args args)
 }
 
 static u32
-vidtv_psi_table_header_write_into(struct header_write_args args)
+vidtv_psi_table_header_write_into(struct header_write_args *args)
 {
-       /* the number of bytes written by this function */
-       u32 nbytes = 0;
-       struct psi_write_args psi_args = {};
-
-       psi_args.dest_buf           = args.dest_buf;
-       psi_args.from               = args.h;
-       psi_args.len                = sizeof(struct vidtv_psi_table_header);
-       psi_args.dest_offset        = args.dest_offset;
-       psi_args.pid                = args.pid;
-       psi_args.new_psi_section    = true;
-       psi_args.continuity_counter = args.continuity_counter;
-       psi_args.is_crc             = false;
-       psi_args.dest_buf_sz        = args.dest_buf_sz;
-       psi_args.crc                = args.crc;
-
-       nbytes += vidtv_psi_ts_psi_write_into(psi_args);
-
-       return nbytes;
+       struct psi_write_args psi_args = {
+               .dest_buf           = args->dest_buf,
+               .from               = args->h,
+               .len                = sizeof(struct vidtv_psi_table_header),
+               .dest_offset        = args->dest_offset,
+               .pid                = args->pid,
+               .new_psi_section    = true,
+               .continuity_counter = args->continuity_counter,
+               .is_crc             = false,
+               .dest_buf_sz        = args->dest_buf_sz,
+               .crc                = args->crc,
+       };
+
+       return vidtv_psi_ts_psi_write_into(&psi_args);
 }
 
 void
 vidtv_psi_pat_table_update_sec_len(struct vidtv_psi_table_pat *pat)
 {
-       /* see ISO/IEC 13818-1 : 2000 p.43 */
        u16 length = 0;
        u32 i;
 
+       /* see ISO/IEC 13818-1 : 2000 p.43 */
+
        /* from immediately after 'section_length' until 'last_section_number'*/
        length += PAT_LEN_UNTIL_LAST_SECTION_NUMBER;
 
        /* do not count the pointer */
-       for (i = 0; i < pat->programs; ++i)
+       for (i = 0; i < pat->num_pat; ++i)
                length += sizeof(struct vidtv_psi_table_pat_program) -
                          sizeof(struct vidtv_psi_table_pat_program *);
 
@@ -621,10 +795,11 @@ vidtv_psi_pat_table_update_sec_len(struct vidtv_psi_table_pat *pat)
 
 void vidtv_psi_pmt_table_update_sec_len(struct vidtv_psi_table_pmt *pmt)
 {
-       /* see ISO/IEC 13818-1 : 2000 p.46 */
-       u16 length = 0;
        struct vidtv_psi_table_pmt_stream *s = pmt->stream;
        u16 desc_loop_len;
+       u16 length = 0;
+
+       /* see ISO/IEC 13818-1 : 2000 p.46 */
 
        /* from immediately after 'section_length' until 'program_info_length'*/
        length += PMT_LEN_UNTIL_PROGRAM_INFO_LENGTH;
@@ -655,10 +830,11 @@ void vidtv_psi_pmt_table_update_sec_len(struct vidtv_psi_table_pmt *pmt)
 
 void vidtv_psi_sdt_table_update_sec_len(struct vidtv_psi_table_sdt *sdt)
 {
-       /* see ETSI EN 300 468 V 1.10.1 p.24 */
-       u16 length = 0;
        struct vidtv_psi_table_sdt_service *s = sdt->service;
        u16 desc_loop_len;
+       u16 length = 0;
+
+       /* see ETSI EN 300 468 V 1.10.1 p.24 */
 
        /*
         * from immediately after 'section_length' until
@@ -681,7 +857,6 @@ void vidtv_psi_sdt_table_update_sec_len(struct vidtv_psi_table_sdt *sdt)
        }
 
        length += CRC_SIZE_IN_BYTES;
-
        vidtv_psi_set_sec_len(&sdt->header, length);
 }
 
@@ -694,6 +869,8 @@ vidtv_psi_pat_program_init(struct vidtv_psi_table_pat_program *head,
        const u16 RESERVED = 0x07;
 
        program = kzalloc(sizeof(*program), GFP_KERNEL);
+       if (!program)
+               return NULL;
 
        program->service_id = cpu_to_be16(service_id);
 
@@ -714,8 +891,8 @@ vidtv_psi_pat_program_init(struct vidtv_psi_table_pat_program *head,
 void
 vidtv_psi_pat_program_destroy(struct vidtv_psi_table_pat_program *p)
 {
-       struct vidtv_psi_table_pat_program *curr = p;
        struct vidtv_psi_table_pat_program *tmp  = NULL;
+       struct vidtv_psi_table_pat_program *curr = p;
 
        while (curr) {
                tmp  = curr;
@@ -724,42 +901,49 @@ vidtv_psi_pat_program_destroy(struct vidtv_psi_table_pat_program *p)
        }
 }
 
+/* This function transfers ownership of p to the table */
 void
 vidtv_psi_pat_program_assign(struct vidtv_psi_table_pat *pat,
                             struct vidtv_psi_table_pat_program *p)
 {
-       /* This function transfers ownership of p to the table */
+       struct vidtv_psi_table_pat_program *program;
+       u16 program_count;
 
-       u16 program_count = 0;
-       struct vidtv_psi_table_pat_program *program = p;
+       do {
+               program_count = 0;
+               program = p;
 
-       if (p == pat->program)
-               return;
+               if (p == pat->program)
+                       return;
 
-       while (program) {
-               ++program_count;
-               program = program->next;
-       }
+               while (program) {
+                       ++program_count;
+                       program = program->next;
+               }
 
-       pat->programs = program_count;
-       pat->program  = p;
+               pat->num_pat = program_count;
+               pat->program  = p;
 
-       /* Recompute section length */
-       vidtv_psi_pat_table_update_sec_len(pat);
+               /* Recompute section length */
+               vidtv_psi_pat_table_update_sec_len(pat);
 
-       if (vidtv_psi_get_sec_len(&pat->header) > MAX_SECTION_LEN)
-               vidtv_psi_pat_program_assign(pat, NULL);
+               p = NULL;
+       } while (vidtv_psi_get_sec_len(&pat->header) > MAX_SECTION_LEN);
 
        vidtv_psi_update_version_num(&pat->header);
 }
 
 struct vidtv_psi_table_pat *vidtv_psi_pat_table_init(u16 transport_stream_id)
 {
-       struct vidtv_psi_table_pat *pat = kzalloc(sizeof(*pat), GFP_KERNEL);
+       struct vidtv_psi_table_pat *pat;
        const u16 SYNTAX = 0x1;
        const u16 ZERO = 0x0;
        const u16 ONES = 0x03;
 
+       pat = kzalloc(sizeof(*pat), GFP_KERNEL);
+       if (!pat)
+               return NULL;
+
        pat->header.table_id = 0x0;
 
        pat->header.bitfield = cpu_to_be16((SYNTAX << 15) | (ZERO << 14) | (ONES << 12));
@@ -772,70 +956,68 @@ struct vidtv_psi_table_pat *vidtv_psi_pat_table_init(u16 transport_stream_id)
        pat->header.section_id   = 0x0;
        pat->header.last_section = 0x0;
 
-       pat->programs = 0;
-
        vidtv_psi_pat_table_update_sec_len(pat);
 
        return pat;
 }
 
-u32 vidtv_psi_pat_write_into(struct vidtv_psi_pat_write_args args)
+u32 vidtv_psi_pat_write_into(struct vidtv_psi_pat_write_args *args)
 {
-       /* the number of bytes written by this function */
+       struct vidtv_psi_table_pat_program *p = args->pat->program;
+       struct header_write_args h_args       = {
+               .dest_buf           = args->buf,
+               .dest_offset        = args->offset,
+               .pid                = VIDTV_PAT_PID,
+               .h                  = &args->pat->header,
+               .continuity_counter = args->continuity_counter,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       struct psi_write_args psi_args        = {
+               .dest_buf           = args->buf,
+               .pid                = VIDTV_PAT_PID,
+               .new_psi_section    = false,
+               .continuity_counter = args->continuity_counter,
+               .is_crc             = false,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       struct crc32_write_args c_args        = {
+               .dest_buf           = args->buf,
+               .pid                = VIDTV_PAT_PID,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       u32 crc = INITIAL_CRC;
        u32 nbytes = 0;
-       const u16 pat_pid = VIDTV_PAT_PID;
-       u32 crc = 0xffffffff;
-
-       struct vidtv_psi_table_pat_program *p = args.pat->program;
 
-       struct header_write_args h_args       = {};
-       struct psi_write_args psi_args            = {};
-       struct crc32_write_args c_args        = {};
+       vidtv_psi_pat_table_update_sec_len(args->pat);
 
-       vidtv_psi_pat_table_update_sec_len(args.pat);
-
-       h_args.dest_buf           = args.buf;
-       h_args.dest_offset        = args.offset;
-       h_args.h                  = &args.pat->header;
-       h_args.pid                = pat_pid;
-       h_args.continuity_counter = args.continuity_counter;
-       h_args.dest_buf_sz        = args.buf_sz;
        h_args.crc = &crc;
 
-       nbytes += vidtv_psi_table_header_write_into(h_args);
+       nbytes += vidtv_psi_table_header_write_into(&h_args);
 
        /* note that the field 'u16 programs' is not really part of the PAT */
 
-       psi_args.dest_buf           = args.buf;
-       psi_args.pid                = pat_pid;
-       psi_args.new_psi_section    = false;
-       psi_args.continuity_counter = args.continuity_counter;
-       psi_args.is_crc             = false;
-       psi_args.dest_buf_sz        = args.buf_sz;
-       psi_args.crc                = &crc;
+       psi_args.crc = &crc;
 
        while (p) {
                /* copy the PAT programs */
                psi_args.from = p;
                /* skip the pointer */
                psi_args.len = sizeof(*p) -
-                          sizeof(struct vidtv_psi_table_pat_program *);
-               psi_args.dest_offset = args.offset + nbytes;
+                              sizeof(struct vidtv_psi_table_pat_program *);
+               psi_args.dest_offset = args->offset + nbytes;
+               psi_args.continuity_counter = args->continuity_counter;
 
-               nbytes += vidtv_psi_ts_psi_write_into(psi_args);
+               nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
 
                p = p->next;
        }
 
-       c_args.dest_buf           = args.buf;
-       c_args.dest_offset        = args.offset + nbytes;
+       c_args.dest_offset        = args->offset + nbytes;
+       c_args.continuity_counter = args->continuity_counter;
        c_args.crc                = cpu_to_be32(crc);
-       c_args.pid                = pat_pid;
-       c_args.continuity_counter = args.continuity_counter;
-       c_args.dest_buf_sz        = args.buf_sz;
 
        /* Write the CRC32 at the end */
-       nbytes += table_section_crc32_write_into(c_args);
+       nbytes += table_section_crc32_write_into(&c_args);
 
        return nbytes;
 }
@@ -859,6 +1041,8 @@ vidtv_psi_pmt_stream_init(struct vidtv_psi_table_pmt_stream *head,
        u16 desc_loop_len;
 
        stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+       if (!stream)
+               return NULL;
 
        stream->type = stream_type;
 
@@ -883,8 +1067,8 @@ vidtv_psi_pmt_stream_init(struct vidtv_psi_table_pmt_stream *head,
 
 void vidtv_psi_pmt_stream_destroy(struct vidtv_psi_table_pmt_stream *s)
 {
-       struct vidtv_psi_table_pmt_stream *curr_stream = s;
        struct vidtv_psi_table_pmt_stream *tmp_stream  = NULL;
+       struct vidtv_psi_table_pmt_stream *curr_stream = s;
 
        while (curr_stream) {
                tmp_stream  = curr_stream;
@@ -897,15 +1081,16 @@ void vidtv_psi_pmt_stream_destroy(struct vidtv_psi_table_pmt_stream *s)
 void vidtv_psi_pmt_stream_assign(struct vidtv_psi_table_pmt *pmt,
                                 struct vidtv_psi_table_pmt_stream *s)
 {
-       /* This function transfers ownership of s to the table */
-       if (s == pmt->stream)
-               return;
+       do {
+               /* This function transfers ownership of s to the table */
+               if (s == pmt->stream)
+                       return;
 
-       pmt->stream = s;
-       vidtv_psi_pmt_table_update_sec_len(pmt);
+               pmt->stream = s;
+               vidtv_psi_pmt_table_update_sec_len(pmt);
 
-       if (vidtv_psi_get_sec_len(&pmt->header) > MAX_SECTION_LEN)
-               vidtv_psi_pmt_stream_assign(pmt, NULL);
+               s = NULL;
+       } while (vidtv_psi_get_sec_len(&pmt->header) > MAX_SECTION_LEN);
 
        vidtv_psi_update_version_num(&pmt->header);
 }
@@ -933,14 +1118,18 @@ u16 vidtv_psi_pmt_get_pid(struct vidtv_psi_table_pmt *section,
 struct vidtv_psi_table_pmt *vidtv_psi_pmt_table_init(u16 program_number,
                                                     u16 pcr_pid)
 {
-       struct vidtv_psi_table_pmt *pmt = kzalloc(sizeof(*pmt), GFP_KERNEL);
-       const u16 SYNTAX = 0x1;
-       const u16 ZERO = 0x0;
-       const u16 ONES = 0x03;
+       struct vidtv_psi_table_pmt *pmt;
        const u16 RESERVED1 = 0x07;
        const u16 RESERVED2 = 0x0f;
+       const u16 SYNTAX = 0x1;
+       const u16 ONES = 0x03;
+       const u16 ZERO = 0x0;
        u16 desc_loop_len;
 
+       pmt = kzalloc(sizeof(*pmt), GFP_KERNEL);
+       if (!pmt)
+               return NULL;
+
        if (!pcr_pid)
                pcr_pid = 0x1fff;
 
@@ -970,87 +1159,84 @@ struct vidtv_psi_table_pmt *vidtv_psi_pmt_table_init(u16 program_number,
        return pmt;
 }
 
-u32 vidtv_psi_pmt_write_into(struct vidtv_psi_pmt_write_args args)
+u32 vidtv_psi_pmt_write_into(struct vidtv_psi_pmt_write_args *args)
 {
-       /* the number of bytes written by this function */
+       struct vidtv_psi_desc *table_descriptor   = args->pmt->descriptor;
+       struct vidtv_psi_table_pmt_stream *stream = args->pmt->stream;
+       struct vidtv_psi_desc *stream_descriptor;
+       struct header_write_args h_args = {
+               .dest_buf           = args->buf,
+               .dest_offset        = args->offset,
+               .h                  = &args->pmt->header,
+               .pid                = args->pid,
+               .continuity_counter = args->continuity_counter,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       struct psi_write_args psi_args  = {
+               .dest_buf = args->buf,
+               .from     = &args->pmt->bitfield,
+               .len      = sizeof_field(struct vidtv_psi_table_pmt, bitfield) +
+                           sizeof_field(struct vidtv_psi_table_pmt, bitfield2),
+               .pid                = args->pid,
+               .new_psi_section    = false,
+               .is_crc             = false,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       struct desc_write_args d_args   = {
+               .dest_buf           = args->buf,
+               .desc               = table_descriptor,
+               .pid                = args->pid,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       struct crc32_write_args c_args  = {
+               .dest_buf           = args->buf,
+               .pid                = args->pid,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       u32 crc = INITIAL_CRC;
        u32 nbytes = 0;
-       u32 crc = 0xffffffff;
-
-       struct vidtv_psi_desc *table_descriptor   = args.pmt->descriptor;
-       struct vidtv_psi_table_pmt_stream *stream = args.pmt->stream;
-       struct vidtv_psi_desc *stream_descriptor  = (stream) ?
-                                                   args.pmt->stream->descriptor :
-                                                   NULL;
-
-       struct header_write_args h_args = {};
-       struct psi_write_args psi_args  = {};
-       struct desc_write_args d_args   = {};
-       struct crc32_write_args c_args  = {};
-
-       vidtv_psi_pmt_table_update_sec_len(args.pmt);
-
-       h_args.dest_buf           = args.buf;
-       h_args.dest_offset        = args.offset;
-       h_args.h                  = &args.pmt->header;
-       h_args.pid                = args.pid;
-       h_args.continuity_counter = args.continuity_counter;
-       h_args.dest_buf_sz        = args.buf_sz;
+
+       vidtv_psi_pmt_table_update_sec_len(args->pmt);
+
        h_args.crc                = &crc;
 
-       nbytes += vidtv_psi_table_header_write_into(h_args);
+       nbytes += vidtv_psi_table_header_write_into(&h_args);
 
        /* write the two bitfields */
-       psi_args.dest_buf = args.buf;
-       psi_args.from     = &args.pmt->bitfield;
-       psi_args.len      = sizeof_field(struct vidtv_psi_table_pmt, bitfield) +
-                           sizeof_field(struct vidtv_psi_table_pmt, bitfield2);
-
-       psi_args.dest_offset        = args.offset + nbytes;
-       psi_args.pid                = args.pid;
-       psi_args.new_psi_section    = false;
-       psi_args.continuity_counter = args.continuity_counter;
-       psi_args.is_crc             = false;
-       psi_args.dest_buf_sz        = args.buf_sz;
-       psi_args.crc                = &crc;
-
-       nbytes += vidtv_psi_ts_psi_write_into(psi_args);
+       psi_args.dest_offset        = args->offset + nbytes;
+       psi_args.continuity_counter = args->continuity_counter;
+       nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
 
        while (table_descriptor) {
                /* write the descriptors, if any */
-               d_args.dest_buf           = args.buf;
-               d_args.dest_offset        = args.offset + nbytes;
-               d_args.desc               = table_descriptor;
-               d_args.pid                = args.pid;
-               d_args.continuity_counter = args.continuity_counter;
-               d_args.dest_buf_sz        = args.buf_sz;
+               d_args.dest_offset        = args->offset + nbytes;
+               d_args.continuity_counter = args->continuity_counter;
                d_args.crc                = &crc;
 
-               nbytes += vidtv_psi_desc_write_into(d_args);
+               nbytes += vidtv_psi_desc_write_into(&d_args);
 
                table_descriptor = table_descriptor->next;
        }
 
+       psi_args.len += sizeof_field(struct vidtv_psi_table_pmt_stream, type);
        while (stream) {
                /* write the streams, if any */
                psi_args.from = stream;
-               psi_args.len  = sizeof_field(struct vidtv_psi_table_pmt_stream, type) +
-                               sizeof_field(struct vidtv_psi_table_pmt_stream, bitfield) +
-                               sizeof_field(struct vidtv_psi_table_pmt_stream, bitfield2);
-               psi_args.dest_offset = args.offset + nbytes;
+               psi_args.dest_offset = args->offset + nbytes;
+               psi_args.continuity_counter = args->continuity_counter;
+
+               nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
 
-               nbytes += vidtv_psi_ts_psi_write_into(psi_args);
+               stream_descriptor = stream->descriptor;
 
                while (stream_descriptor) {
                        /* write the stream descriptors, if any */
-                       d_args.dest_buf           = args.buf;
-                       d_args.dest_offset        = args.offset + nbytes;
+                       d_args.dest_offset        = args->offset + nbytes;
                        d_args.desc               = stream_descriptor;
-                       d_args.pid                = args.pid;
-                       d_args.continuity_counter = args.continuity_counter;
-                       d_args.dest_buf_sz        = args.buf_sz;
+                       d_args.continuity_counter = args->continuity_counter;
                        d_args.crc                = &crc;
 
-                       nbytes += vidtv_psi_desc_write_into(d_args);
+                       nbytes += vidtv_psi_desc_write_into(&d_args);
 
                        stream_descriptor = stream_descriptor->next;
                }
@@ -1058,15 +1244,12 @@ u32 vidtv_psi_pmt_write_into(struct vidtv_psi_pmt_write_args args)
                stream = stream->next;
        }
 
-       c_args.dest_buf           = args.buf;
-       c_args.dest_offset        = args.offset + nbytes;
+       c_args.dest_offset        = args->offset + nbytes;
        c_args.crc                = cpu_to_be32(crc);
-       c_args.pid                = args.pid;
-       c_args.continuity_counter = args.continuity_counter;
-       c_args.dest_buf_sz        = args.buf_sz;
+       c_args.continuity_counter = args->continuity_counter;
 
        /* Write the CRC32 at the end */
-       nbytes += table_section_crc32_write_into(c_args);
+       nbytes += table_section_crc32_write_into(&c_args);
 
        return nbytes;
 }
@@ -1078,16 +1261,20 @@ void vidtv_psi_pmt_table_destroy(struct vidtv_psi_table_pmt *pmt)
        kfree(pmt);
 }
 
-struct vidtv_psi_table_sdt *vidtv_psi_sdt_table_init(u16 transport_stream_id)
+struct vidtv_psi_table_sdt *vidtv_psi_sdt_table_init(u16 network_id,
+                                                    u16 transport_stream_id)
 {
-       struct vidtv_psi_table_sdt *sdt = kzalloc(sizeof(*sdt), GFP_KERNEL);
+       struct vidtv_psi_table_sdt *sdt;
+       const u16 RESERVED = 0xff;
        const u16 SYNTAX = 0x1;
-       const u16 ONE = 0x1;
        const u16 ONES = 0x03;
-       const u16 RESERVED = 0xff;
+       const u16 ONE = 0x1;
 
-       sdt->header.table_id = 0x42;
+       sdt  = kzalloc(sizeof(*sdt), GFP_KERNEL);
+       if (!sdt)
+               return NULL;
 
+       sdt->header.table_id = 0x42;
        sdt->header.bitfield = cpu_to_be16((SYNTAX << 15) | (ONE << 14) | (ONES << 12));
 
        /*
@@ -1111,7 +1298,7 @@ struct vidtv_psi_table_sdt *vidtv_psi_sdt_table_init(u16 transport_stream_id)
         * This can be changed to something more useful, when support for
         * NIT gets added
         */
-       sdt->network_id = cpu_to_be16(0xff01);
+       sdt->network_id = cpu_to_be16(network_id);
        sdt->reserved = RESERVED;
 
        vidtv_psi_sdt_table_update_sec_len(sdt);
@@ -1119,74 +1306,79 @@ struct vidtv_psi_table_sdt *vidtv_psi_sdt_table_init(u16 transport_stream_id)
        return sdt;
 }
 
-u32 vidtv_psi_sdt_write_into(struct vidtv_psi_sdt_write_args args)
+u32 vidtv_psi_sdt_write_into(struct vidtv_psi_sdt_write_args *args)
 {
+       struct header_write_args h_args = {
+               .dest_buf           = args->buf,
+               .dest_offset        = args->offset,
+               .h                  = &args->sdt->header,
+               .pid                = VIDTV_SDT_PID,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       struct psi_write_args psi_args  = {
+               .dest_buf = args->buf,
+               .len = sizeof_field(struct vidtv_psi_table_sdt, network_id) +
+                      sizeof_field(struct vidtv_psi_table_sdt, reserved),
+               .pid                = VIDTV_SDT_PID,
+               .new_psi_section    = false,
+               .is_crc             = false,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       struct desc_write_args d_args   = {
+               .dest_buf           = args->buf,
+               .pid                = VIDTV_SDT_PID,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       struct crc32_write_args c_args  = {
+               .dest_buf           = args->buf,
+               .pid                = VIDTV_SDT_PID,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       struct vidtv_psi_table_sdt_service *service = args->sdt->service;
+       struct vidtv_psi_desc *service_desc;
        u32 nbytes  = 0;
-       u16 sdt_pid = VIDTV_SDT_PID;  /* see ETSI EN 300 468 v1.15.1 p. 11 */
+       u32 crc = INITIAL_CRC;
 
-       u32 crc = 0xffffffff;
+       /* see ETSI EN 300 468 v1.15.1 p. 11 */
 
-       struct vidtv_psi_table_sdt_service *service = args.sdt->service;
-       struct vidtv_psi_desc *service_desc = (args.sdt->service) ?
-                                             args.sdt->service->descriptor :
-                                             NULL;
+       vidtv_psi_sdt_table_update_sec_len(args->sdt);
 
-       struct header_write_args h_args = {};
-       struct psi_write_args psi_args  = {};
-       struct desc_write_args d_args   = {};
-       struct crc32_write_args c_args  = {};
-
-       vidtv_psi_sdt_table_update_sec_len(args.sdt);
-
-       h_args.dest_buf           = args.buf;
-       h_args.dest_offset        = args.offset;
-       h_args.h                  = &args.sdt->header;
-       h_args.pid                = sdt_pid;
-       h_args.continuity_counter = args.continuity_counter;
-       h_args.dest_buf_sz        = args.buf_sz;
+       h_args.continuity_counter = args->continuity_counter;
        h_args.crc                = &crc;
 
-       nbytes += vidtv_psi_table_header_write_into(h_args);
-
-       psi_args.dest_buf = args.buf;
-       psi_args.from     = &args.sdt->network_id;
+       nbytes += vidtv_psi_table_header_write_into(&h_args);
 
-       psi_args.len = sizeof_field(struct vidtv_psi_table_sdt, network_id) +
-                      sizeof_field(struct vidtv_psi_table_sdt, reserved);
-
-       psi_args.dest_offset        = args.offset + nbytes;
-       psi_args.pid                = sdt_pid;
-       psi_args.new_psi_section    = false;
-       psi_args.continuity_counter = args.continuity_counter;
-       psi_args.is_crc             = false;
-       psi_args.dest_buf_sz        = args.buf_sz;
+       psi_args.from               = &args->sdt->network_id;
+       psi_args.dest_offset        = args->offset + nbytes;
+       psi_args.continuity_counter = args->continuity_counter;
        psi_args.crc                = &crc;
 
        /* copy u16 network_id + u8 reserved)*/
-       nbytes += vidtv_psi_ts_psi_write_into(psi_args);
+       nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
+
+       /* skip both pointers at the end */
+       psi_args.len = sizeof(struct vidtv_psi_table_sdt_service) -
+                      sizeof(struct vidtv_psi_desc *) -
+                      sizeof(struct vidtv_psi_table_sdt_service *);
 
        while (service) {
                /* copy the services, if any */
                psi_args.from = service;
-               /* skip both pointers at the end */
-               psi_args.len = sizeof(struct vidtv_psi_table_sdt_service) -
-                              sizeof(struct vidtv_psi_desc *) -
-                              sizeof(struct vidtv_psi_table_sdt_service *);
-               psi_args.dest_offset = args.offset + nbytes;
+               psi_args.dest_offset = args->offset + nbytes;
+               psi_args.continuity_counter = args->continuity_counter;
+
+               nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
 
-               nbytes += vidtv_psi_ts_psi_write_into(psi_args);
+               service_desc = service->descriptor;
 
                while (service_desc) {
                        /* copy the service descriptors, if any */
-                       d_args.dest_buf           = args.buf;
-                       d_args.dest_offset        = args.offset + nbytes;
+                       d_args.dest_offset        = args->offset + nbytes;
                        d_args.desc               = service_desc;
-                       d_args.pid                = sdt_pid;
-                       d_args.continuity_counter = args.continuity_counter;
-                       d_args.dest_buf_sz        = args.buf_sz;
+                       d_args.continuity_counter = args->continuity_counter;
                        d_args.crc                = &crc;
 
-                       nbytes += vidtv_psi_desc_write_into(d_args);
+                       nbytes += vidtv_psi_desc_write_into(&d_args);
 
                        service_desc = service_desc->next;
                }
@@ -1194,15 +1386,12 @@ u32 vidtv_psi_sdt_write_into(struct vidtv_psi_sdt_write_args args)
                service = service->next;
        }
 
-       c_args.dest_buf           = args.buf;
-       c_args.dest_offset        = args.offset + nbytes;
+       c_args.dest_offset        = args->offset + nbytes;
        c_args.crc                = cpu_to_be32(crc);
-       c_args.pid                = sdt_pid;
-       c_args.continuity_counter = args.continuity_counter;
-       c_args.dest_buf_sz        = args.buf_sz;
+       c_args.continuity_counter = args->continuity_counter;
 
        /* Write the CRC at the end */
-       nbytes += table_section_crc32_write_into(c_args);
+       nbytes += table_section_crc32_write_into(&c_args);
 
        return nbytes;
 }
@@ -1215,11 +1404,15 @@ void vidtv_psi_sdt_table_destroy(struct vidtv_psi_table_sdt *sdt)
 
 struct vidtv_psi_table_sdt_service
 *vidtv_psi_sdt_service_init(struct vidtv_psi_table_sdt_service *head,
-                           u16 service_id)
+                           u16 service_id,
+                           bool eit_schedule,
+                           bool eit_present_following)
 {
        struct vidtv_psi_table_sdt_service *service;
 
        service = kzalloc(sizeof(*service), GFP_KERNEL);
+       if (!service)
+               return NULL;
 
        /*
         * ETSI 300 468: this is a 16bit field which serves as a label to
@@ -1228,8 +1421,8 @@ struct vidtv_psi_table_sdt_service
         * corresponding program_map_section
         */
        service->service_id            = cpu_to_be16(service_id);
-       service->EIT_schedule          = 0x0;
-       service->EIT_present_following = 0x0;
+       service->EIT_schedule          = eit_schedule;
+       service->EIT_present_following = eit_present_following;
        service->reserved              = 0x3f;
 
        service->bitfield = cpu_to_be16(RUNNING << 13);
@@ -1262,53 +1455,78 @@ void
 vidtv_psi_sdt_service_assign(struct vidtv_psi_table_sdt *sdt,
                             struct vidtv_psi_table_sdt_service *service)
 {
-       if (service == sdt->service)
-               return;
+       do {
+               if (service == sdt->service)
+                       return;
 
-       sdt->service = service;
+               sdt->service = service;
 
-       /* recompute section length */
-       vidtv_psi_sdt_table_update_sec_len(sdt);
+               /* recompute section length */
+               vidtv_psi_sdt_table_update_sec_len(sdt);
 
-       if (vidtv_psi_get_sec_len(&sdt->header) > MAX_SECTION_LEN)
-               vidtv_psi_sdt_service_assign(sdt, NULL);
+               service = NULL;
+       } while (vidtv_psi_get_sec_len(&sdt->header) > MAX_SECTION_LEN);
 
        vidtv_psi_update_version_num(&sdt->header);
 }
 
+/*
+ * PMTs contain information about programs. For each program,
+ * there is one PMT section. This function will create a section
+ * for each program found in the PAT
+ */
 struct vidtv_psi_table_pmt**
-vidtv_psi_pmt_create_sec_for_each_pat_entry(struct vidtv_psi_table_pat *pat, u16 pcr_pid)
+vidtv_psi_pmt_create_sec_for_each_pat_entry(struct vidtv_psi_table_pat *pat,
+                                           u16 pcr_pid)
 
 {
+       struct vidtv_psi_table_pat_program *program;
+       struct vidtv_psi_table_pmt **pmt_secs;
+       u32 i = 0, num_pmt = 0;
+
        /*
-        * PMTs contain information about programs. For each program,
-        * there is one PMT section. This function will create a section
-        * for each program found in the PAT
+        * The number of PMT entries is the number of PAT entries
+        * that contain service_id. That exclude special tables, like NIT
         */
-       struct vidtv_psi_table_pat_program *program = pat->program;
-       struct vidtv_psi_table_pmt **pmt_secs;
-       u32 i = 0;
+       program = pat->program;
+       while (program) {
+               if (program->service_id)
+                       num_pmt++;
+               program = program->next;
+       }
 
-       /* a section for each program_id */
-       pmt_secs = kcalloc(pat->programs,
+       pmt_secs = kcalloc(num_pmt,
                           sizeof(struct vidtv_psi_table_pmt *),
                           GFP_KERNEL);
-
-       while (program) {
-               pmt_secs[i] = vidtv_psi_pmt_table_init(be16_to_cpu(program->service_id), pcr_pid);
-               ++i;
-               program = program->next;
+       if (!pmt_secs)
+               return NULL;
+
+       for (program = pat->program; program; program = program->next) {
+               if (!program->service_id)
+                       continue;
+               pmt_secs[i] = vidtv_psi_pmt_table_init(be16_to_cpu(program->service_id),
+                                                      pcr_pid);
+
+               if (!pmt_secs[i]) {
+                       while (i > 0) {
+                               i--;
+                               vidtv_psi_pmt_table_destroy(pmt_secs[i]);
+                       }
+                       return NULL;
+               }
+               i++;
        }
+       pat->num_pmt = num_pmt;
 
        return pmt_secs;
 }
 
+/* find the PMT section associated with 'program_num' */
 struct vidtv_psi_table_pmt
 *vidtv_psi_find_pmt_sec(struct vidtv_psi_table_pmt **pmt_sections,
                        u16 nsections,
                        u16 program_num)
 {
-       /* find the PMT section associated with 'program_num' */
        struct vidtv_psi_table_pmt *sec = NULL;
        u32 i;
 
@@ -1320,3 +1538,488 @@ struct vidtv_psi_table_pmt
 
        return NULL; /* not found */
 }
+
+static void vidtv_psi_nit_table_update_sec_len(struct vidtv_psi_table_nit *nit)
+{
+       u16 length = 0;
+       struct vidtv_psi_table_transport *t = nit->transport;
+       u16 desc_loop_len;
+       u16 transport_loop_len = 0;
+
+       /*
+        * from immediately after 'section_length' until
+        * 'network_descriptor_length'
+        */
+       length += NIT_LEN_UNTIL_NETWORK_DESCRIPTOR_LEN;
+
+       desc_loop_len = vidtv_psi_desc_comp_loop_len(nit->descriptor);
+       vidtv_psi_set_desc_loop_len(&nit->bitfield, desc_loop_len, 12);
+
+       length += desc_loop_len;
+
+       length += sizeof_field(struct vidtv_psi_table_nit, bitfield2);
+
+       while (t) {
+               /* skip both pointers at the end */
+               transport_loop_len += sizeof(struct vidtv_psi_table_transport) -
+                                     sizeof(struct vidtv_psi_desc *) -
+                                     sizeof(struct vidtv_psi_table_transport *);
+
+               length += transport_loop_len;
+
+               desc_loop_len = vidtv_psi_desc_comp_loop_len(t->descriptor);
+               vidtv_psi_set_desc_loop_len(&t->bitfield, desc_loop_len, 12);
+
+               length += desc_loop_len;
+
+               t = t->next;
+       }
+
+       // Actually sets the transport stream loop len, maybe rename this function later
+       vidtv_psi_set_desc_loop_len(&nit->bitfield2, transport_loop_len, 12);
+       length += CRC_SIZE_IN_BYTES;
+
+       vidtv_psi_set_sec_len(&nit->header, length);
+}
+
+struct vidtv_psi_table_nit
+*vidtv_psi_nit_table_init(u16 network_id,
+                         u16 transport_stream_id,
+                         char *network_name,
+                         struct vidtv_psi_desc_service_list_entry *service_list)
+{
+       struct vidtv_psi_table_transport *transport;
+       struct vidtv_psi_table_nit *nit;
+       const u16 SYNTAX = 0x1;
+       const u16 ONES = 0x03;
+       const u16 ONE = 0x1;
+
+       nit = kzalloc(sizeof(*nit), GFP_KERNEL);
+       if (!nit)
+               return NULL;
+
+       transport = kzalloc(sizeof(*transport), GFP_KERNEL);
+       if (!transport)
+               goto free_nit;
+
+       nit->header.table_id = 0x40; // ACTUAL_NETWORK
+
+       nit->header.bitfield = cpu_to_be16((SYNTAX << 15) | (ONE << 14) | (ONES << 12));
+
+       nit->header.id = cpu_to_be16(network_id);
+       nit->header.current_next = ONE;
+
+       nit->header.version = 0x1f;
+
+       nit->header.one2  = ONES;
+       nit->header.section_id   = 0;
+       nit->header.last_section = 0;
+
+       nit->bitfield = cpu_to_be16(0xf);
+       nit->bitfield2 = cpu_to_be16(0xf);
+
+       nit->descriptor = (struct vidtv_psi_desc *)
+                         vidtv_psi_network_name_desc_init(NULL, network_name);
+       if (!nit->descriptor)
+               goto free_transport;
+
+       transport->transport_id = cpu_to_be16(transport_stream_id);
+       transport->network_id = cpu_to_be16(network_id);
+       transport->bitfield = cpu_to_be16(0xf);
+       transport->descriptor = (struct vidtv_psi_desc *)
+                               vidtv_psi_service_list_desc_init(NULL, service_list);
+       if (!transport->descriptor)
+               goto free_nit_desc;
+
+       nit->transport = transport;
+
+       vidtv_psi_nit_table_update_sec_len(nit);
+
+       return nit;
+
+free_nit_desc:
+       vidtv_psi_desc_destroy((struct vidtv_psi_desc *)nit->descriptor);
+
+free_transport:
+       kfree(transport);
+free_nit:
+       kfree(nit);
+       return NULL;
+}
+
+u32 vidtv_psi_nit_write_into(struct vidtv_psi_nit_write_args *args)
+{
+       struct header_write_args h_args = {
+               .dest_buf           = args->buf,
+               .dest_offset        = args->offset,
+               .h                  = &args->nit->header,
+               .pid                = VIDTV_NIT_PID,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       struct psi_write_args psi_args  = {
+               .dest_buf           = args->buf,
+               .from               = &args->nit->bitfield,
+               .len                = sizeof_field(struct vidtv_psi_table_nit, bitfield),
+               .pid                = VIDTV_NIT_PID,
+               .new_psi_section    = false,
+               .is_crc             = false,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       struct desc_write_args d_args   = {
+               .dest_buf           = args->buf,
+               .pid                = VIDTV_NIT_PID,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       struct crc32_write_args c_args  = {
+               .dest_buf           = args->buf,
+               .pid                = VIDTV_NIT_PID,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       struct vidtv_psi_desc *table_descriptor     = args->nit->descriptor;
+       struct vidtv_psi_table_transport *transport = args->nit->transport;
+       struct vidtv_psi_desc *transport_descriptor;
+       u32 crc = INITIAL_CRC;
+       u32 nbytes = 0;
+
+       vidtv_psi_nit_table_update_sec_len(args->nit);
+
+       h_args.continuity_counter = args->continuity_counter;
+       h_args.crc                = &crc;
+
+       nbytes += vidtv_psi_table_header_write_into(&h_args);
+
+       /* write the bitfield */
+
+       psi_args.dest_offset        = args->offset + nbytes;
+       psi_args.continuity_counter = args->continuity_counter;
+       psi_args.crc                = &crc;
+
+       nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
+
+       while (table_descriptor) {
+               /* write the descriptors, if any */
+               d_args.dest_offset        = args->offset + nbytes;
+               d_args.desc               = table_descriptor;
+               d_args.continuity_counter = args->continuity_counter;
+               d_args.crc                = &crc;
+
+               nbytes += vidtv_psi_desc_write_into(&d_args);
+
+               table_descriptor = table_descriptor->next;
+       }
+
+       /* write the second bitfield */
+       psi_args.from = &args->nit->bitfield2;
+       psi_args.len = sizeof_field(struct vidtv_psi_table_nit, bitfield2);
+       psi_args.dest_offset = args->offset + nbytes;
+
+       nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
+
+       psi_args.len  = sizeof_field(struct vidtv_psi_table_transport, transport_id) +
+                       sizeof_field(struct vidtv_psi_table_transport, network_id)   +
+                       sizeof_field(struct vidtv_psi_table_transport, bitfield);
+       while (transport) {
+               /* write the transport sections, if any */
+               psi_args.from = transport;
+               psi_args.dest_offset = args->offset + nbytes;
+
+               nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
+
+               transport_descriptor = transport->descriptor;
+
+               while (transport_descriptor) {
+                       /* write the transport descriptors, if any */
+                       d_args.dest_offset        = args->offset + nbytes;
+                       d_args.desc               = transport_descriptor;
+                       d_args.continuity_counter = args->continuity_counter;
+                       d_args.crc                = &crc;
+
+                       nbytes += vidtv_psi_desc_write_into(&d_args);
+
+                       transport_descriptor = transport_descriptor->next;
+               }
+
+               transport = transport->next;
+       }
+
+       c_args.dest_offset        = args->offset + nbytes;
+       c_args.crc                = cpu_to_be32(crc);
+       c_args.continuity_counter = args->continuity_counter;
+
+       /* Write the CRC32 at the end */
+       nbytes += table_section_crc32_write_into(&c_args);
+
+       return nbytes;
+}
+
+static void vidtv_psi_transport_destroy(struct vidtv_psi_table_transport *t)
+{
+       struct vidtv_psi_table_transport *tmp_t  = NULL;
+       struct vidtv_psi_table_transport *curr_t = t;
+
+       while (curr_t) {
+               tmp_t  = curr_t;
+               curr_t = curr_t->next;
+               vidtv_psi_desc_destroy(tmp_t->descriptor);
+               kfree(tmp_t);
+       }
+}
+
+void vidtv_psi_nit_table_destroy(struct vidtv_psi_table_nit *nit)
+{
+       vidtv_psi_desc_destroy(nit->descriptor);
+       vidtv_psi_transport_destroy(nit->transport);
+       kfree(nit);
+}
+
+void vidtv_psi_eit_table_update_sec_len(struct vidtv_psi_table_eit *eit)
+{
+       struct vidtv_psi_table_eit_event *e = eit->event;
+       u16 desc_loop_len;
+       u16 length = 0;
+
+       /*
+        * from immediately after 'section_length' until
+        * 'last_table_id'
+        */
+       length += EIT_LEN_UNTIL_LAST_TABLE_ID;
+
+       while (e) {
+               /* skip both pointers at the end */
+               length += sizeof(struct vidtv_psi_table_eit_event) -
+                         sizeof(struct vidtv_psi_desc *) -
+                         sizeof(struct vidtv_psi_table_eit_event *);
+
+               desc_loop_len = vidtv_psi_desc_comp_loop_len(e->descriptor);
+               vidtv_psi_set_desc_loop_len(&e->bitfield, desc_loop_len, 12);
+
+               length += desc_loop_len;
+
+               e = e->next;
+       }
+
+       length += CRC_SIZE_IN_BYTES;
+
+       vidtv_psi_set_sec_len(&eit->header, length);
+}
+
+void vidtv_psi_eit_event_assign(struct vidtv_psi_table_eit *eit,
+                               struct vidtv_psi_table_eit_event *e)
+{
+       do {
+               if (e == eit->event)
+                       return;
+
+               eit->event = e;
+               vidtv_psi_eit_table_update_sec_len(eit);
+
+               e = NULL;
+       } while (vidtv_psi_get_sec_len(&eit->header) > EIT_MAX_SECTION_LEN);
+
+       vidtv_psi_update_version_num(&eit->header);
+}
+
+struct vidtv_psi_table_eit
+*vidtv_psi_eit_table_init(u16 network_id,
+                         u16 transport_stream_id,
+                         __be16 service_id)
+{
+       struct vidtv_psi_table_eit *eit;
+       const u16 SYNTAX = 0x1;
+       const u16 ONE = 0x1;
+       const u16 ONES = 0x03;
+
+       eit = kzalloc(sizeof(*eit), GFP_KERNEL);
+       if (!eit)
+               return NULL;
+
+       eit->header.table_id = 0x4e; //actual_transport_stream: present/following
+
+       eit->header.bitfield = cpu_to_be16((SYNTAX << 15) | (ONE << 14) | (ONES << 12));
+
+       eit->header.id = service_id;
+       eit->header.current_next = ONE;
+
+       eit->header.version = 0x1f;
+
+       eit->header.one2  = ONES;
+       eit->header.section_id   = 0;
+       eit->header.last_section = 0;
+
+       eit->transport_id = cpu_to_be16(transport_stream_id);
+       eit->network_id = cpu_to_be16(network_id);
+
+       eit->last_segment = eit->header.last_section; /* not implemented */
+       eit->last_table_id = eit->header.table_id; /* not implemented */
+
+       vidtv_psi_eit_table_update_sec_len(eit);
+
+       return eit;
+}
+
+u32 vidtv_psi_eit_write_into(struct vidtv_psi_eit_write_args *args)
+{
+       struct header_write_args h_args = {
+               .dest_buf        = args->buf,
+               .dest_offset     = args->offset,
+               .h               = &args->eit->header,
+               .pid             = VIDTV_EIT_PID,
+               .dest_buf_sz     = args->buf_sz,
+       };
+       struct psi_write_args psi_args  = {
+               .dest_buf        = args->buf,
+               .len             = sizeof_field(struct vidtv_psi_table_eit, transport_id) +
+                                  sizeof_field(struct vidtv_psi_table_eit, network_id)   +
+                                  sizeof_field(struct vidtv_psi_table_eit, last_segment) +
+                                  sizeof_field(struct vidtv_psi_table_eit, last_table_id),
+               .pid             = VIDTV_EIT_PID,
+               .new_psi_section = false,
+               .is_crc          = false,
+               .dest_buf_sz     = args->buf_sz,
+       };
+       struct desc_write_args d_args   = {
+               .dest_buf           = args->buf,
+               .pid                = VIDTV_EIT_PID,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       struct crc32_write_args c_args  = {
+               .dest_buf           = args->buf,
+               .pid                = VIDTV_EIT_PID,
+               .dest_buf_sz        = args->buf_sz,
+       };
+       struct vidtv_psi_table_eit_event *event = args->eit->event;
+       struct vidtv_psi_desc *event_descriptor;
+       u32 crc = INITIAL_CRC;
+       u32 nbytes  = 0;
+
+       vidtv_psi_eit_table_update_sec_len(args->eit);
+
+       h_args.continuity_counter = args->continuity_counter;
+       h_args.crc                = &crc;
+
+       nbytes += vidtv_psi_table_header_write_into(&h_args);
+
+       psi_args.from               = &args->eit->transport_id;
+       psi_args.dest_offset        = args->offset + nbytes;
+       psi_args.continuity_counter = args->continuity_counter;
+       psi_args.crc                = &crc;
+
+       nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
+
+       /* skip both pointers at the end */
+       psi_args.len = sizeof(struct vidtv_psi_table_eit_event) -
+                      sizeof(struct vidtv_psi_desc *) -
+                      sizeof(struct vidtv_psi_table_eit_event *);
+       while (event) {
+               /* copy the events, if any */
+               psi_args.from = event;
+               psi_args.dest_offset = args->offset + nbytes;
+
+               nbytes += vidtv_psi_ts_psi_write_into(&psi_args);
+
+               event_descriptor = event->descriptor;
+
+               while (event_descriptor) {
+                       /* copy the event descriptors, if any */
+                       d_args.dest_offset        = args->offset + nbytes;
+                       d_args.desc               = event_descriptor;
+                       d_args.continuity_counter = args->continuity_counter;
+                       d_args.crc                = &crc;
+
+                       nbytes += vidtv_psi_desc_write_into(&d_args);
+
+                       event_descriptor = event_descriptor->next;
+               }
+
+               event = event->next;
+       }
+
+       c_args.dest_offset        = args->offset + nbytes;
+       c_args.crc                = cpu_to_be32(crc);
+       c_args.continuity_counter = args->continuity_counter;
+
+       /* Write the CRC at the end */
+       nbytes += table_section_crc32_write_into(&c_args);
+
+       return nbytes;
+}
+
+struct vidtv_psi_table_eit_event
+*vidtv_psi_eit_event_init(struct vidtv_psi_table_eit_event *head, u16 event_id)
+{
+       const u8 DURATION[] = {0x23, 0x59, 0x59}; /* BCD encoded */
+       struct vidtv_psi_table_eit_event *e;
+       struct timespec64 ts;
+       struct tm time;
+       int mjd, l;
+       __be16 mjd_be;
+
+       e = kzalloc(sizeof(*e), GFP_KERNEL);
+       if (!e)
+               return NULL;
+
+       e->event_id = cpu_to_be16(event_id);
+
+       ts = ktime_to_timespec64(ktime_get_real());
+       time64_to_tm(ts.tv_sec, 0, &time);
+
+       /* Convert date to Modified Julian Date - per EN 300 468 Annex C */
+       if (time.tm_mon < 2)
+               l = 1;
+       else
+               l = 0;
+
+       mjd = 14956 + time.tm_mday;
+       mjd += (time.tm_year - l) * 36525 / 100;
+       mjd += (time.tm_mon + 2 + l * 12) * 306001 / 10000;
+       mjd_be = cpu_to_be16(mjd);
+
+       /*
+        * Store MJD and hour/min/sec to the event.
+        *
+        * Let's make the event to start on a full hour
+        */
+       memcpy(e->start_time, &mjd_be, sizeof(mjd_be));
+       e->start_time[2] = bin2bcd(time.tm_hour);
+       e->start_time[3] = 0;
+       e->start_time[4] = 0;
+
+       /*
+        * TODO: for now, the event will last for a day. Should be
+        * enough for testing purposes, but if one runs the driver
+        * for more than that, the current event will become invalid.
+        * So, we need a better code here in order to change the start
+        * time once the event expires.
+        */
+       memcpy(e->duration, DURATION, sizeof(e->duration));
+
+       e->bitfield = cpu_to_be16(RUNNING << 13);
+
+       if (head) {
+               while (head->next)
+                       head = head->next;
+
+               head->next = e;
+       }
+
+       return e;
+}
+
+void vidtv_psi_eit_event_destroy(struct vidtv_psi_table_eit_event *e)
+{
+       struct vidtv_psi_table_eit_event *tmp_e  = NULL;
+       struct vidtv_psi_table_eit_event *curr_e = e;
+
+       while (curr_e) {
+               tmp_e  = curr_e;
+               curr_e = curr_e->next;
+               vidtv_psi_desc_destroy(tmp_e->descriptor);
+               kfree(tmp_e);
+       }
+}
+
+void vidtv_psi_eit_table_destroy(struct vidtv_psi_table_eit *eit)
+{
+       vidtv_psi_eit_event_destroy(eit->event);
+       kfree(eit);
+}
index 3f962cc..fdc825e 100644 (file)
@@ -6,10 +6,6 @@
  * technically be broken into one or more sections, we do not do this here,
  * hence 'table' and 'section' are interchangeable for vidtv.
  *
- * This code currently supports three tables: PAT, PMT and SDT. These are the
- * bare minimum to get userspace to recognize our MPEG transport stream. It can
- * be extended to support more PSI tables in the future.
- *
  * Copyright (C) 2020 Daniel W. S. Almeida
  */
 
@@ -17,7 +13,6 @@
 #define VIDTV_PSI_H
 
 #include <linux/types.h>
-#include <asm/byteorder.h>
 
 /*
  * all section lengths start immediately after the 'section_length' field
 #define PAT_LEN_UNTIL_LAST_SECTION_NUMBER 5
 #define PMT_LEN_UNTIL_PROGRAM_INFO_LENGTH 9
 #define SDT_LEN_UNTIL_RESERVED_FOR_FUTURE_USE 8
+#define NIT_LEN_UNTIL_NETWORK_DESCRIPTOR_LEN 7
+#define EIT_LEN_UNTIL_LAST_TABLE_ID 11
 #define MAX_SECTION_LEN 1021
+#define EIT_MAX_SECTION_LEN 4093 /* see ETSI 300 468 v.1.10.1 p. 26 */
 #define VIDTV_PAT_PID 0 /* mandated by the specs */
 #define VIDTV_SDT_PID 0x0011 /* mandated by the specs */
+#define VIDTV_NIT_PID 0x0010 /* mandated by the specs */
+#define VIDTV_EIT_PID 0x0012 /*mandated by the specs */
 
 enum vidtv_psi_descriptors {
        REGISTRATION_DESCRIPTOR = 0x05, /* See ISO/IEC 13818-1 section 2.6.8 */
+       NETWORK_NAME_DESCRIPTOR = 0x40, /* See ETSI EN 300 468 section 6.2.27 */
+       SERVICE_LIST_DESCRIPTOR = 0x41, /* See ETSI EN 300 468 section 6.2.35 */
        SERVICE_DESCRIPTOR = 0x48, /* See ETSI EN 300 468 section 6.2.33 */
+       SHORT_EVENT_DESCRIPTOR = 0x4d, /* See ETSI EN 300 468 section 6.2.37 */
 };
 
 enum vidtv_psi_stream_types {
        STREAM_PRIVATE_DATA = 0x06, /* see ISO/IEC 13818-1 2000 p. 48 */
 };
 
-/**
+/*
  * struct vidtv_psi_desc - A generic PSI descriptor type.
  * The descriptor length is an 8-bit field specifying the total number of bytes of the data portion
  * of the descriptor following the byte defining the value of this field.
@@ -52,7 +55,7 @@ struct vidtv_psi_desc {
        u8 data[];
 } __packed;
 
-/**
+/*
  * struct vidtv_psi_desc_service - Service descriptor.
  * See ETSI EN 300 468 section 6.2.33.
  */
@@ -68,7 +71,7 @@ struct vidtv_psi_desc_service {
        char *service_name;
 } __packed;
 
-/**
+/*
  * struct vidtv_psi_desc_registration - A registration descriptor.
  * See ISO/IEC 13818-1 section 2.6.8
  */
@@ -90,7 +93,56 @@ struct vidtv_psi_desc_registration {
        u8 additional_identification_info[];
 } __packed;
 
-/**
+/*
+ * struct vidtv_psi_desc_network_name - A network name descriptor
+ * see ETSI EN 300 468 v1.15.1 section 6.2.27
+ */
+struct vidtv_psi_desc_network_name {
+       struct vidtv_psi_desc *next;
+       u8 type;
+       u8 length;
+       char *network_name;
+} __packed;
+
+struct vidtv_psi_desc_service_list_entry {
+       __be16 service_id;
+       u8 service_type;
+       struct vidtv_psi_desc_service_list_entry *next;
+} __packed;
+
+/*
+ * struct vidtv_psi_desc_service_list - A service list descriptor
+ * see ETSI EN 300 468 v1.15.1 section 6.2.35
+ */
+struct vidtv_psi_desc_service_list {
+       struct vidtv_psi_desc *next;
+       u8 type;
+       u8 length;
+       struct vidtv_psi_desc_service_list_entry *service_list;
+} __packed;
+
+/*
+ * struct vidtv_psi_desc_short_event - A short event descriptor
+ * see ETSI EN 300 468 v1.15.1 section 6.2.37
+ */
+struct vidtv_psi_desc_short_event {
+       struct vidtv_psi_desc *next;
+       u8 type;
+       u8 length;
+       char *iso_language_code;
+       u8 event_name_len;
+       char *event_name;
+       u8 text_len;
+       char *text;
+} __packed;
+
+struct vidtv_psi_desc_short_event
+*vidtv_psi_short_event_desc_init(struct vidtv_psi_desc *head,
+                                char *iso_language_code,
+                                char *event_name,
+                                char *text);
+
+/*
  * struct vidtv_psi_table_header - A header that is present for all PSI tables.
  */
 struct vidtv_psi_table_header {
@@ -106,7 +158,7 @@ struct vidtv_psi_table_header {
        u8  last_section; /* last_section_number */
 } __packed;
 
-/**
+/*
  * struct vidtv_psi_table_pat_program - A single program in the PAT
  * See ISO/IEC 13818-1 : 2000 p.43
  */
@@ -116,17 +168,18 @@ struct vidtv_psi_table_pat_program {
        struct vidtv_psi_table_pat_program *next;
 } __packed;
 
-/**
+/*
  * struct vidtv_psi_table_pat - The Program Allocation Table (PAT)
  * See ISO/IEC 13818-1 : 2000 p.43
  */
 struct vidtv_psi_table_pat {
        struct vidtv_psi_table_header header;
-       u16 programs; /* Included by libdvbv5, not part of the table and not actually serialized */
+       u16 num_pat;
+       u16 num_pmt;
        struct vidtv_psi_table_pat_program *program;
 } __packed;
 
-/**
+/*
  * struct vidtv_psi_table_sdt_service - Represents a service in the SDT.
  * see ETSI EN 300 468 v1.15.1 section 5.2.3.
  */
@@ -140,7 +193,7 @@ struct vidtv_psi_table_sdt_service {
        struct vidtv_psi_table_sdt_service *next;
 } __packed;
 
-/**
+/*
  * struct vidtv_psi_table_sdt - Represents the Service Description Table
  * see ETSI EN 300 468 v1.15.1 section 5.2.3.
  */
@@ -152,7 +205,7 @@ struct vidtv_psi_table_sdt {
        struct vidtv_psi_table_sdt_service *service;
 } __packed;
 
-/**
+/*
  * enum service_running_status - Status of a SDT service.
  * see ETSI EN 300 468 v1.15.1 section 5.2.3 table 6.
  */
@@ -160,16 +213,17 @@ enum service_running_status {
        RUNNING = 0x4,
 };
 
-/**
+/*
  * enum service_type - The type of a SDT service.
  * see ETSI EN 300 468 v1.15.1 section 6.2.33, table 81.
  */
 enum service_type {
        /* see ETSI EN 300 468 v1.15.1 p. 77 */
        DIGITAL_TELEVISION_SERVICE = 0x1,
+       DIGITAL_RADIO_SOUND_SERVICE = 0X2,
 };
 
-/**
+/*
  * struct vidtv_psi_table_pmt_stream - A single stream in the PMT.
  * See ISO/IEC 13818-1 : 2000 p.46.
  */
@@ -181,7 +235,7 @@ struct vidtv_psi_table_pmt_stream {
        struct vidtv_psi_table_pmt_stream *next;
 } __packed;
 
-/**
+/*
  * struct vidtv_psi_table_pmt - The Program Map Table (PMT).
  * See ISO/IEC 13818-1 : 2000 p.46.
  */
@@ -290,6 +344,13 @@ struct vidtv_psi_desc_registration
                                  u8 *additional_ident_info,
                                  u32 additional_info_len);
 
+struct vidtv_psi_desc_network_name
+*vidtv_psi_network_name_desc_init(struct vidtv_psi_desc *head, char *network_name);
+
+struct vidtv_psi_desc_service_list
+*vidtv_psi_service_list_desc_init(struct vidtv_psi_desc *head,
+                                 struct vidtv_psi_desc_service_list_entry *entry);
+
 struct vidtv_psi_table_pat_program
 *vidtv_psi_pat_program_init(struct vidtv_psi_table_pat_program *head,
                            u16 service_id,
@@ -305,11 +366,14 @@ struct vidtv_psi_table_pat *vidtv_psi_pat_table_init(u16 transport_stream_id);
 struct vidtv_psi_table_pmt *vidtv_psi_pmt_table_init(u16 program_number,
                                                     u16 pcr_pid);
 
-struct vidtv_psi_table_sdt *vidtv_psi_sdt_table_init(u16 transport_stream_id);
+struct vidtv_psi_table_sdt *vidtv_psi_sdt_table_init(u16 network_id,
+                                                    u16 transport_stream_id);
 
 struct vidtv_psi_table_sdt_service*
 vidtv_psi_sdt_service_init(struct vidtv_psi_table_sdt_service *head,
-                          u16 service_id);
+                          u16 service_id,
+                          bool eit_schedule,
+                          bool eit_present_following);
 
 void
 vidtv_psi_desc_destroy(struct vidtv_psi_desc *desc);
@@ -356,7 +420,7 @@ void vidtv_psi_desc_assign(struct vidtv_psi_desc **to,
                           struct vidtv_psi_desc *desc);
 
 /**
- * vidtv_psi_pmt_desc_assign - Assigns a descriptor loop at some point in a PMT section.
+ * vidtv_pmt_desc_assign - Assigns a descriptor loop at some point in a PMT section.
  * @pmt: The PMT section that will contain the descriptor loop
  * @to: Where in the PMT to assign this descriptor loop to
  * @desc: The descriptor loop that will be assigned.
@@ -370,7 +434,7 @@ void vidtv_pmt_desc_assign(struct vidtv_psi_table_pmt *pmt,
                           struct vidtv_psi_desc *desc);
 
 /**
- * vidtv_psi_sdt_desc_assign - Assigns a descriptor loop at some point in a SDT.
+ * vidtv_sdt_desc_assign - Assigns a descriptor loop at some point in a SDT.
  * @sdt: The SDT that will contain the descriptor loop
  * @to: Where in the PMT to assign this descriptor loop to
  * @desc: The descriptor loop that will be assigned.
@@ -410,10 +474,9 @@ void vidtv_psi_pmt_stream_assign(struct vidtv_psi_table_pmt *pmt,
 struct vidtv_psi_desc *vidtv_psi_desc_clone(struct vidtv_psi_desc *desc);
 
 /**
- * vidtv_psi_create_sec_for_each_pat_entry - Create a PMT section for each
+ * vidtv_psi_pmt_create_sec_for_each_pat_entry - Create a PMT section for each
  * program found in the PAT
  * @pat: The PAT to look for programs.
- * @s: The stream loop (one or more streams)
  * @pcr_pid: packet ID for the PCR to be used for the program described in this
  * PMT section
  */
@@ -492,7 +555,7 @@ struct vidtv_psi_pat_write_args {
  * equal to the size of the PAT, since more space is needed for TS headers during TS
  * encapsulation.
  */
-u32 vidtv_psi_pat_write_into(struct vidtv_psi_pat_write_args args);
+u32 vidtv_psi_pat_write_into(struct vidtv_psi_pat_write_args *args);
 
 /**
  * struct vidtv_psi_sdt_write_args - Arguments for writing a SDT table
@@ -524,16 +587,18 @@ struct vidtv_psi_sdt_write_args {
  * equal to the size of the SDT, since more space is needed for TS headers during TS
  * encapsulation.
  */
-u32 vidtv_psi_sdt_write_into(struct vidtv_psi_sdt_write_args args);
+u32 vidtv_psi_sdt_write_into(struct vidtv_psi_sdt_write_args *args);
 
 /**
  * struct vidtv_psi_pmt_write_args - Arguments for writing a PMT section
  * @buf: The destination buffer.
  * @offset: The offset into the destination buffer.
  * @pmt: A pointer to the PMT.
+ * @pid: Program ID
  * @buf_sz: The size of the destination buffer.
  * @continuity_counter: A pointer to the CC. Incremented on every new packet.
- *
+ * @pcr_pid: The TS PID used for the PSI packets. All channels will share the
+ * same PCR.
  */
 struct vidtv_psi_pmt_write_args {
        char *buf;
@@ -557,7 +622,7 @@ struct vidtv_psi_pmt_write_args {
  * equal to the size of the PMT section, since more space is needed for TS headers
  * during TS encapsulation.
  */
-u32 vidtv_psi_pmt_write_into(struct vidtv_psi_pmt_write_args args);
+u32 vidtv_psi_pmt_write_into(struct vidtv_psi_pmt_write_args *args);
 
 /**
  * vidtv_psi_find_pmt_sec - Finds the PMT section for 'program_num'
@@ -574,4 +639,171 @@ struct vidtv_psi_table_pmt *vidtv_psi_find_pmt_sec(struct vidtv_psi_table_pmt **
 u16 vidtv_psi_get_pat_program_pid(struct vidtv_psi_table_pat_program *p);
 u16 vidtv_psi_pmt_stream_get_elem_pid(struct vidtv_psi_table_pmt_stream *s);
 
+/**
+ * struct vidtv_psi_table_transport - A entry in the TS loop for the NIT and/or other tables.
+ * See ETSI 300 468 section 5.2.1
+ * @transport_id: The TS ID being described
+ * @network_id: The network_id that contains the TS ID
+ * @bitfield: Contains the descriptor loop length
+ * @descriptor: A descriptor loop
+ * @next: Pointer to the next entry
+ *
+ */
+struct vidtv_psi_table_transport {
+       __be16 transport_id;
+       __be16 network_id;
+       __be16 bitfield; /* desc_len: 12, reserved: 4 */
+       struct vidtv_psi_desc *descriptor;
+       struct vidtv_psi_table_transport *next;
+} __packed;
+
+/**
+ * struct vidtv_psi_table_nit - A Network Information Table (NIT). See ETSI 300
+ * 468 section 5.2.1
+ * @header: A PSI table header
+ * @bitfield: Contains the network descriptor length
+ * @descriptor: A descriptor loop describing the network
+ * @bitfield2: Contains the transport stream loop length
+ * @transport: The transport stream loop
+ *
+ */
+struct vidtv_psi_table_nit {
+       struct vidtv_psi_table_header header;
+       __be16 bitfield; /* network_desc_len: 12, reserved:4 */
+       struct vidtv_psi_desc *descriptor;
+       __be16 bitfield2; /* ts_loop_len: 12, reserved: 4 */
+       struct vidtv_psi_table_transport *transport;
+} __packed;
+
+struct vidtv_psi_table_nit
+*vidtv_psi_nit_table_init(u16 network_id,
+                         u16 transport_stream_id,
+                         char *network_name,
+                         struct vidtv_psi_desc_service_list_entry *service_list);
+
+/**
+ * struct vidtv_psi_nit_write_args - Arguments for writing a NIT section
+ * @buf: The destination buffer.
+ * @offset: The offset into the destination buffer.
+ * @nit: A pointer to the NIT
+ * @buf_sz: The size of the destination buffer.
+ * @continuity_counter: A pointer to the CC. Incremented on every new packet.
+ *
+ */
+struct vidtv_psi_nit_write_args {
+       char *buf;
+       u32 offset;
+       struct vidtv_psi_table_nit *nit;
+       u32 buf_sz;
+       u8 *continuity_counter;
+};
+
+/**
+ * vidtv_psi_nit_write_into - Write NIT as MPEG-TS packets into a buffer.
+ * @args: an instance of struct vidtv_psi_nit_write_args
+ *
+ * This function writes the MPEG TS packets for a NIT table into a buffer.
+ * Calling code will usually generate the NIT via a call to its init function
+ * and thus is responsible for freeing it.
+ *
+ * Return: The number of bytes written into the buffer. This is NOT
+ * equal to the size of the NIT, since more space is needed for TS headers during TS
+ * encapsulation.
+ */
+u32 vidtv_psi_nit_write_into(struct vidtv_psi_nit_write_args *args);
+
+void vidtv_psi_nit_table_destroy(struct vidtv_psi_table_nit *nit);
+
+/*
+ * struct vidtv_psi_desc_short_event - A short event descriptor
+ * see ETSI EN 300 468 v1.15.1 section 6.2.37
+ */
+struct vidtv_psi_table_eit_event {
+       __be16 event_id;
+       u8 start_time[5];
+       u8 duration[3];
+       __be16 bitfield; /* desc_length: 12, free_CA_mode: 1, running_status: 1 */
+       struct vidtv_psi_desc *descriptor;
+       struct vidtv_psi_table_eit_event *next;
+} __packed;
+
+/*
+ * struct vidtv_psi_table_eit - A Event Information Table (EIT)
+ * See ETSI 300 468 section 5.2.4
+ */
+struct vidtv_psi_table_eit {
+       struct vidtv_psi_table_header header;
+       __be16 transport_id;
+       __be16 network_id;
+       u8 last_segment;
+       u8 last_table_id;
+       struct vidtv_psi_table_eit_event *event;
+} __packed;
+
+struct vidtv_psi_table_eit
+*vidtv_psi_eit_table_init(u16 network_id,
+                         u16 transport_stream_id,
+                         __be16 service_id);
+
+/**
+ * struct vidtv_psi_eit_write_args - Arguments for writing an EIT section
+ * @buf: The destination buffer.
+ * @offset: The offset into the destination buffer.
+ * @eit: A pointer to the EIT
+ * @buf_sz: The size of the destination buffer.
+ * @continuity_counter: A pointer to the CC. Incremented on every new packet.
+ *
+ */
+struct vidtv_psi_eit_write_args {
+       char *buf;
+       u32 offset;
+       struct vidtv_psi_table_eit *eit;
+       u32 buf_sz;
+       u8 *continuity_counter;
+};
+
+/**
+ * vidtv_psi_eit_write_into - Write EIT as MPEG-TS packets into a buffer.
+ * @args: an instance of struct vidtv_psi_nit_write_args
+ *
+ * This function writes the MPEG TS packets for a EIT table into a buffer.
+ * Calling code will usually generate the EIT via a call to its init function
+ * and thus is responsible for freeing it.
+ *
+ * Return: The number of bytes written into the buffer. This is NOT
+ * equal to the size of the EIT, since more space is needed for TS headers during TS
+ * encapsulation.
+ */
+u32 vidtv_psi_eit_write_into(struct vidtv_psi_eit_write_args *args);
+
+void vidtv_psi_eit_table_destroy(struct vidtv_psi_table_eit *eit);
+
+/**
+ * vidtv_psi_eit_table_update_sec_len - Recompute and update the EIT section length.
+ * @eit: The EIT whose length is to be updated.
+ *
+ * This will traverse the table and accumulate the length of its components,
+ * which is then used to replace the 'section_length' field.
+ *
+ * If section_length > EIT_MAX_SECTION_LEN, the operation fails.
+ */
+void vidtv_psi_eit_table_update_sec_len(struct vidtv_psi_table_eit *eit);
+
+/**
+ * vidtv_psi_eit_event_assign - Assigns the event loop to the EIT.
+ * @eit: The EIT to assign to.
+ * @e: The event loop
+ *
+ * This will free the previous event loop in the table.
+ * This will assign ownership of the stream loop to the table, i.e. the table
+ * will free this stream loop when a call to its destroy function is made.
+ */
+void vidtv_psi_eit_event_assign(struct vidtv_psi_table_eit *eit,
+                               struct vidtv_psi_table_eit_event *e);
+
+struct vidtv_psi_table_eit_event
+*vidtv_psi_eit_event_init(struct vidtv_psi_table_eit_event *head, u16 event_id);
+
+void vidtv_psi_eit_event_destroy(struct vidtv_psi_table_eit_event *e);
+
 #endif // VIDTV_PSI_H
index a447ccb..d79b658 100644 (file)
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s, %d: " fmt, __func__, __LINE__
 
-#include <linux/types.h>
-#include <linux/slab.h>
+#include <linux/bug.h>
 #include <linux/crc32.h>
-#include <linux/vmalloc.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
+#include <linux/fixp-arith.h>
 #include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
 #include <linux/printk.h>
 #include <linux/ratelimit.h>
-#include <linux/fixp-arith.h>
-
-#include <linux/math64.h>
-#include <asm/byteorder.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
 
-#include "vidtv_s302m.h"
-#include "vidtv_encoder.h"
 #include "vidtv_common.h"
+#include "vidtv_encoder.h"
+#include "vidtv_s302m.h"
 
 #define S302M_SAMPLING_RATE_HZ 48000
 #define PES_PRIVATE_STREAM_1 0xbd  /* PES: private_stream_1 */
@@ -79,8 +78,9 @@ struct tone_duration {
        int duration;
 };
 
-#define COMPASS 120            /* beats per minute (Allegro) */
-static const struct tone_duration beethoven_5th_symphony[] = {
+#define COMPASS 100 /* beats per minute */
+static const struct tone_duration beethoven_fur_elise[] = {
+       { NOTE_SILENT, 512},
        { NOTE_E_6, 128},  { NOTE_DS_6, 128}, { NOTE_E_6, 128},
        { NOTE_DS_6, 128}, { NOTE_E_6, 128},  { NOTE_B_5, 128},
        { NOTE_D_6, 128},  { NOTE_C_6, 128},  { NOTE_A_3, 128},
@@ -121,31 +121,36 @@ static const struct tone_duration beethoven_5th_symphony[] = {
        { NOTE_E_5, 128},  { NOTE_D_5, 128},  { NOTE_A_3, 128},
        { NOTE_E_4, 128},  { NOTE_A_4, 128},  { NOTE_E_4, 128},
        { NOTE_D_5, 128},  { NOTE_C_5, 128},  { NOTE_E_3, 128},
-       { NOTE_E_4, 128},  { NOTE_E_5, 255},  { NOTE_E_6, 128},
-       { NOTE_E_5, 128},  { NOTE_E_6, 128},  { NOTE_E_5, 255},
+       { NOTE_E_4, 128},  { NOTE_E_5, 128},  { NOTE_E_5, 128},
+       { NOTE_E_6, 128},  { NOTE_E_5, 128},  { NOTE_E_6, 128},
+       { NOTE_E_5, 128},  { NOTE_E_5, 128},  { NOTE_DS_5, 128},
+       { NOTE_E_5, 128},  { NOTE_DS_6, 128}, { NOTE_E_6, 128},
        { NOTE_DS_5, 128}, { NOTE_E_5, 128},  { NOTE_DS_6, 128},
-       { NOTE_E_6, 128},  { NOTE_DS_5, 128}, { NOTE_E_5, 128},
-       { NOTE_DS_6, 128}, { NOTE_E_6, 128},  { NOTE_DS_6, 128},
        { NOTE_E_6, 128},  { NOTE_DS_6, 128}, { NOTE_E_6, 128},
-       { NOTE_B_5, 128},  { NOTE_D_6, 128},  { NOTE_C_6, 128},
-       { NOTE_A_3, 128},  { NOTE_E_4, 128},  { NOTE_A_4, 128},
-       { NOTE_C_5, 128},  { NOTE_E_5, 128},  { NOTE_A_5, 128},
-       { NOTE_E_3, 128},  { NOTE_E_4, 128},  { NOTE_GS_4, 128},
-       { NOTE_E_5, 128},  { NOTE_GS_5, 128}, { NOTE_B_5, 128},
-       { NOTE_A_3, 128},  { NOTE_E_4, 128},  { NOTE_A_4, 128},
-       { NOTE_E_5, 128},  { NOTE_E_6, 128},  { NOTE_DS_6, 128},
+       { NOTE_DS_6, 128}, { NOTE_E_6, 128},  { NOTE_B_5, 128},
+       { NOTE_D_6, 128},  { NOTE_C_6, 128},  { NOTE_A_3, 128},
+       { NOTE_E_4, 128},  { NOTE_A_4, 128},  { NOTE_C_5, 128},
+       { NOTE_E_5, 128},  { NOTE_A_5, 128},  { NOTE_E_3, 128},
+       { NOTE_E_4, 128},  { NOTE_GS_4, 128}, { NOTE_E_5, 128},
+       { NOTE_GS_5, 128}, { NOTE_B_5, 128},  { NOTE_A_3, 128},
+       { NOTE_E_4, 128},  { NOTE_A_4, 128},  { NOTE_E_5, 128},
        { NOTE_E_6, 128},  { NOTE_DS_6, 128}, { NOTE_E_6, 128},
-       { NOTE_B_5, 128},  { NOTE_D_6, 128},  { NOTE_C_6, 128},
-       { NOTE_A_3, 128},  { NOTE_E_4, 128},  { NOTE_A_4, 128},
-       { NOTE_C_5, 128},  { NOTE_E_5, 128},  { NOTE_A_5, 128},
-       { NOTE_E_3, 128},  { NOTE_E_4, 128},  { NOTE_GS_4, 128},
-       { NOTE_E_5, 128},  { NOTE_C_6, 128},  { NOTE_B_5, 128},
-       { NOTE_C_5, 255},  { NOTE_C_5, 255},  { NOTE_SILENT, 512},
+       { NOTE_DS_6, 128}, { NOTE_E_6, 128},  { NOTE_B_5, 128},
+       { NOTE_D_6, 128},  { NOTE_C_6, 128},  { NOTE_A_3, 128},
+       { NOTE_E_4, 128},  { NOTE_A_4, 128},  { NOTE_C_5, 128},
+       { NOTE_E_5, 128},  { NOTE_A_5, 128},  { NOTE_E_3, 128},
+       { NOTE_E_4, 128},  { NOTE_GS_4, 128}, { NOTE_E_5, 128},
+       { NOTE_C_6, 128},  { NOTE_B_5, 128},  { NOTE_A_5, 512},
+       { NOTE_SILENT, 256},
 };
 
 static struct vidtv_access_unit *vidtv_s302m_access_unit_init(struct vidtv_access_unit *head)
 {
-       struct vidtv_access_unit *au = kzalloc(sizeof(*au), GFP_KERNEL);
+       struct vidtv_access_unit *au;
+
+       au = kzalloc(sizeof(*au), GFP_KERNEL);
+       if (!au)
+               return NULL;
 
        if (head) {
                while (head->next)
@@ -196,10 +201,10 @@ static void vidtv_s302m_alloc_au(struct vidtv_encoder *e)
 static void
 vidtv_s302m_compute_sample_count_from_video(struct vidtv_encoder *e)
 {
-       struct vidtv_access_unit *au = e->access_units;
        struct vidtv_access_unit *sync_au = e->sync->access_units;
-       u32 vau_duration_usecs;
+       struct vidtv_access_unit *au = e->access_units;
        u32 sample_duration_usecs;
+       u32 vau_duration_usecs;
        u32 s;
 
        vau_duration_usecs    = USEC_PER_SEC / e->sync->sampling_rate_hz;
@@ -230,36 +235,32 @@ static u16 vidtv_s302m_get_sample(struct vidtv_encoder *e)
 {
        u16 sample;
        int pos;
+       struct vidtv_s302m_ctx *ctx = e->ctx;
 
        if (!e->src_buf) {
                /*
                 * Simple tone generator: play the tones at the
-                * beethoven_5th_symphony array.
+                * beethoven_fur_elise array.
                 */
-               if (e->last_duration <= 0) {
-                       if (e->src_buf_offset >= ARRAY_SIZE(beethoven_5th_symphony))
+               if (ctx->last_duration <= 0) {
+                       if (e->src_buf_offset >= ARRAY_SIZE(beethoven_fur_elise))
                                e->src_buf_offset = 0;
 
-                       e->last_tone = beethoven_5th_symphony[e->src_buf_offset].note;
-                       e->last_duration = beethoven_5th_symphony[e->src_buf_offset].duration * S302M_SAMPLING_RATE_HZ / COMPASS / 5;
+                       ctx->last_tone = beethoven_fur_elise[e->src_buf_offset].note;
+                       ctx->last_duration = beethoven_fur_elise[e->src_buf_offset].duration *
+                                            S302M_SAMPLING_RATE_HZ / COMPASS / 5;
                        e->src_buf_offset++;
-                       e->note_offset = 0;
+                       ctx->note_offset = 0;
                } else {
-                       e->last_duration--;
+                       ctx->last_duration--;
                }
 
-               /* Handle silent */
-               if (!e->last_tone) {
-                       e->src_buf_offset = 0;
+               /* Handle pause notes */
+               if (!ctx->last_tone)
                        return 0x8000;
-               }
-
-               pos = (2 * PI * e->note_offset * e->last_tone / S302M_SAMPLING_RATE_HZ);
 
-               if (pos == 360)
-                       e->note_offset = 0;
-               else
-                       e->note_offset++;
+               pos = (2 * PI * ctx->note_offset * ctx->last_tone) / S302M_SAMPLING_RATE_HZ;
+               ctx->note_offset++;
 
                return (fixp_sin32(pos % (2 * PI)) >> 16) + 0x8000;
        }
@@ -289,9 +290,9 @@ static u16 vidtv_s302m_get_sample(struct vidtv_encoder *e)
 static u32 vidtv_s302m_write_frame(struct vidtv_encoder *e,
                                   u16 sample)
 {
-       u32 nbytes = 0;
-       struct vidtv_s302m_frame_16 f = {};
        struct vidtv_s302m_ctx *ctx = e->ctx;
+       struct vidtv_s302m_frame_16 f = {};
+       u32 nbytes = 0;
 
        /* from ffmpeg: see s302enc.c */
 
@@ -388,6 +389,8 @@ static void vidtv_s302m_write_frames(struct vidtv_encoder *e)
 
 static void *vidtv_s302m_encode(struct vidtv_encoder *e)
 {
+       struct vidtv_s302m_ctx *ctx = e->ctx;
+
        /*
         * According to SMPTE 302M, an audio access unit is specified as those
         * AES3 words that are associated with a corresponding video frame.
@@ -401,8 +404,6 @@ static void *vidtv_s302m_encode(struct vidtv_encoder *e)
         * ffmpeg
         */
 
-       struct vidtv_s302m_ctx *ctx = e->ctx;
-
        vidtv_s302m_access_unit_destroy(e);
        vidtv_s302m_alloc_au(e);
 
@@ -440,8 +441,13 @@ static u32 vidtv_s302m_clear(struct vidtv_encoder *e)
 struct vidtv_encoder
 *vidtv_s302m_encoder_init(struct vidtv_s302m_encoder_init_args args)
 {
-       struct vidtv_encoder *e = kzalloc(sizeof(*e), GFP_KERNEL);
        u32 priv_sz = sizeof(struct vidtv_s302m_ctx);
+       struct vidtv_s302m_ctx *ctx;
+       struct vidtv_encoder *e;
+
+       e = kzalloc(sizeof(*e), GFP_KERNEL);
+       if (!e)
+               return NULL;
 
        e->id = S302M;
 
@@ -453,14 +459,21 @@ struct vidtv_encoder
        e->encoder_buf_offset = 0;
 
        e->sample_count = 0;
-       e->last_duration = 0;
 
        e->src_buf = (args.src_buf) ? args.src_buf : NULL;
        e->src_buf_sz = (args.src_buf) ? args.src_buf_sz : 0;
        e->src_buf_offset = 0;
 
        e->is_video_encoder = false;
-       e->ctx = kzalloc(priv_sz, GFP_KERNEL);
+
+       ctx = kzalloc(priv_sz, GFP_KERNEL);
+       if (!ctx) {
+               kfree(e);
+               return NULL;
+       }
+
+       e->ctx = ctx;
+       ctx->last_duration = 0;
 
        e->encode = vidtv_s302m_encode;
        e->clear = vidtv_s302m_clear;
index eca5e31..9cc94e4 100644 (file)
@@ -19,7 +19,6 @@
 #define VIDTV_S302M_H
 
 #include <linux/types.h>
-#include <asm/byteorder.h>
 
 #include "vidtv_encoder.h"
 
  * @enc: A pointer to the containing encoder structure.
  * @frame_index: The current frame in a block
  * @au_count: The total number of access units encoded up to now
+ * @last_duration: Duration of the tone currently being played
+ * @note_offset: Position at the music tone array
+ * @last_tone: Tone currently being played
  */
 struct vidtv_s302m_ctx {
        struct vidtv_encoder *enc;
        u32 frame_index;
        u32 au_count;
+       int last_duration;
+       unsigned int note_offset;
+       enum musical_notes last_tone;
 };
 
-/**
+/*
  * struct vidtv_smpte_s302m_es - s302m MPEG Elementary Stream header.
  *
  * See SMPTE 302M 2007 table 1.
index 190b9e4..ca4bb9c 100644 (file)
@@ -9,14 +9,13 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ":%s, %d: " fmt, __func__, __LINE__
 
+#include <linux/math64.h>
 #include <linux/printk.h>
 #include <linux/ratelimit.h>
 #include <linux/types.h>
-#include <linux/math64.h>
-#include <asm/byteorder.h>
 
-#include "vidtv_ts.h"
 #include "vidtv_common.h"
+#include "vidtv_ts.h"
 
 static u32 vidtv_ts_write_pcr_bits(u8 *to, u32 to_offset, u64 pcr)
 {
index 83dcc91..f5e8e1f 100644 (file)
@@ -11,7 +11,6 @@
 #define VIDTV_TS_H
 
 #include <linux/types.h>
-#include <asm/byteorder.h>
 
 #define TS_SYNC_BYTE 0x47
 #define TS_PACKET_LEN 188
@@ -45,7 +44,7 @@ struct vidtv_mpeg_ts {
                u8 adaptation_field:1;
                u8 scrambling:2;
        } __packed;
-       struct vidtv_mpeg_ts_adaption adaption[];
+       struct vidtv_mpeg_ts_adaption *adaption;
 } __packed;
 
 /**
@@ -54,7 +53,7 @@ struct vidtv_mpeg_ts {
  * @dest_offset: The byte offset into the buffer.
  * @pid: The TS PID for the PCR packets.
  * @buf_sz: The size of the buffer in bytes.
- * @countinuity_counter: The TS continuity_counter.
+ * @continuity_counter: The TS continuity_counter.
  * @pcr: A sample from the system clock.
  */
 struct pcr_write_args {
@@ -71,7 +70,7 @@ struct pcr_write_args {
  * @dest_buf: The buffer to write into.
  * @dest_offset: The byte offset into the buffer.
  * @buf_sz: The size of the buffer in bytes.
- * @countinuity_counter: The TS continuity_counter.
+ * @continuity_counter: The TS continuity_counter.
  */
 struct null_packet_write_args {
        void *dest_buf;
index 9bc49e0..14b6bc9 100644 (file)
 #include <linux/errno.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/ratelimit.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+
 #include <media/dvb_frontend.h>
-#include <linux/printk.h>
-#include <linux/ratelimit.h>
 
 #include "vidtv_tuner.h"
 
index 8455b2d..fd55346 100644 (file)
@@ -11,6 +11,7 @@
 #define VIDTV_TUNER_H
 
 #include <linux/types.h>
+
 #include <media/dvb_frontend.h>
 
 #define NUM_VALID_TUNER_FREQS 8
index 1c0a418..926408b 100644 (file)
@@ -736,7 +736,6 @@ static int at24_probe(struct i2c_client *client)
 
        nvmem_config.type = NVMEM_TYPE_EEPROM;
        nvmem_config.dev = dev;
-       nvmem_config.id = NVMEM_DEVID_AUTO;
        nvmem_config.read_only = !writable;
        nvmem_config.root_only = !(flags & AT24_FLAG_IRUGO);
        nvmem_config.owner = THIS_MODULE;
index 2057222..783bbdc 100644 (file)
@@ -231,16 +231,16 @@ delete_cdev_device:
 
 static void device_cdev_sysfs_del(struct hl_device *hdev)
 {
-       /* device_release() won't be called so must free devices explicitly */
-       if (!hdev->cdev_sysfs_created) {
-               kfree(hdev->dev_ctrl);
-               kfree(hdev->dev);
-               return;
-       }
+       if (!hdev->cdev_sysfs_created)
+               goto put_devices;
 
        hl_sysfs_fini(hdev);
        cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
        cdev_device_del(&hdev->cdev, hdev->dev);
+
+put_devices:
+       put_device(hdev->dev);
+       put_device(hdev->dev_ctrl);
 }
 
 /*
@@ -1371,9 +1371,9 @@ sw_fini:
 early_fini:
        device_early_fini(hdev);
 free_dev_ctrl:
-       kfree(hdev->dev_ctrl);
+       put_device(hdev->dev_ctrl);
 free_dev:
-       kfree(hdev->dev);
+       put_device(hdev->dev);
 out_disabled:
        hdev->disabled = true;
        if (add_cdev_sysfs_on_err)
index 8422781..bfe223a 100644 (file)
@@ -1626,6 +1626,7 @@ static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
                        goto host_hpage_range_err;
                }
        } else {
+               kfree(ctx->host_huge_va_range);
                ctx->host_huge_va_range = ctx->host_va_range;
        }
 
index 2519a34..7ea6b43 100644 (file)
@@ -5436,6 +5436,8 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
                params.num_memories = 33;
                params.derr = true;
                params.disable_clock_gating = true;
+               extract_info_from_fw = false;
+               break;
        default:
                return;
        }
index c06581f..f5fd5b7 100644 (file)
@@ -46,14 +46,4 @@ config INTEL_MEI_TXE
          Supported SoCs:
          Intel Bay Trail
 
-config INTEL_MEI_VIRTIO
-       tristate "Intel MEI interface emulation with virtio framework"
-       select INTEL_MEI
-       depends on X86 && PCI && VIRTIO_PCI
-       help
-         This module implements mei hw emulation over virtio transport.
-         The module will be called mei_virtio.
-         Enable this if your virtual machine supports virtual mei
-         device over virtio.
-
 source "drivers/misc/mei/hdcp/Kconfig"
index 52aefaa..f1c76f7 100644 (file)
@@ -22,9 +22,6 @@ obj-$(CONFIG_INTEL_MEI_TXE) += mei-txe.o
 mei-txe-objs := pci-txe.o
 mei-txe-objs += hw-txe.o
 
-obj-$(CONFIG_INTEL_MEI_VIRTIO) += mei-virtio.o
-mei-virtio-objs := hw-virtio.o
-
 mei-$(CONFIG_EVENT_TRACING) += mei-trace.o
 CFLAGS_mei-trace.o = -I$(src)
 
diff --git a/drivers/misc/mei/hw-virtio.c b/drivers/misc/mei/hw-virtio.c
deleted file mode 100644 (file)
index 899dc1c..0000000
+++ /dev/null
@@ -1,874 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Intel Management Engine Interface (Intel MEI) Linux driver
- * Copyright (c) 2018-2020, Intel Corporation.
- */
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/pm_runtime.h>
-#include <linux/scatterlist.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/virtio.h>
-#include <linux/virtio_config.h>
-#include <linux/virtio_ids.h>
-#include <linux/atomic.h>
-
-#include "mei_dev.h"
-#include "hbm.h"
-#include "client.h"
-
-#define MEI_VIRTIO_RPM_TIMEOUT 500
-/* ACRN virtio device types */
-#ifndef VIRTIO_ID_MEI
-#define VIRTIO_ID_MEI 0xFFFE /* virtio mei */
-#endif
-
-/**
- * struct mei_virtio_cfg - settings passed from the virtio backend
- * @buf_depth: read buffer depth in slots (4bytes)
- * @hw_ready: hw is ready for operation
- * @host_reset: synchronize reset with virtio backend
- * @reserved: reserved for alignment
- * @fw_status: FW status
- */
-struct mei_virtio_cfg {
-       u32 buf_depth;
-       u8 hw_ready;
-       u8 host_reset;
-       u8 reserved[2];
-       u32 fw_status[MEI_FW_STATUS_MAX];
-} __packed;
-
-struct mei_virtio_hw {
-       struct mei_device mdev;
-       char name[32];
-
-       struct virtqueue *in;
-       struct virtqueue *out;
-
-       bool host_ready;
-       struct work_struct intr_handler;
-
-       u32 *recv_buf;
-       u8 recv_rdy;
-       size_t recv_sz;
-       u32 recv_idx;
-       u32 recv_len;
-
-       /* send buffer */
-       atomic_t hbuf_ready;
-       const void *send_hdr;
-       const void *send_buf;
-
-       struct mei_virtio_cfg cfg;
-};
-
-#define to_virtio_hw(_dev) container_of(_dev, struct mei_virtio_hw, mdev)
-
-/**
- * mei_virtio_fw_status() - read status register of mei
- * @dev: mei device
- * @fw_status: fw status register values
- *
- * Return: always 0
- */
-static int mei_virtio_fw_status(struct mei_device *dev,
-                               struct mei_fw_status *fw_status)
-{
-       struct virtio_device *vdev = dev_to_virtio(dev->dev);
-
-       fw_status->count = MEI_FW_STATUS_MAX;
-       virtio_cread_bytes(vdev, offsetof(struct mei_virtio_cfg, fw_status),
-                          fw_status->status, sizeof(fw_status->status));
-       return 0;
-}
-
-/**
- * mei_virtio_pg_state() - translate internal pg state
- *   to the mei power gating state
- *   There is no power management in ACRN mode always return OFF
- * @dev: mei device
- *
- * Return:
- * * MEI_PG_OFF - if aliveness is on (always)
- * * MEI_PG_ON  - (never)
- */
-static inline enum mei_pg_state mei_virtio_pg_state(struct mei_device *dev)
-{
-       return MEI_PG_OFF;
-}
-
-/**
- * mei_virtio_hw_config() - configure hw dependent settings
- *
- * @dev: mei device
- *
- * Return: always 0
- */
-static int mei_virtio_hw_config(struct mei_device *dev)
-{
-       return 0;
-}
-
-/**
- * mei_virtio_hbuf_empty_slots() - counts write empty slots.
- * @dev: the device structure
- *
- * Return: always return frontend buf size if buffer is ready, 0 otherwise
- */
-static int mei_virtio_hbuf_empty_slots(struct mei_device *dev)
-{
-       struct mei_virtio_hw *hw = to_virtio_hw(dev);
-
-       return (atomic_read(&hw->hbuf_ready) == 1) ? hw->cfg.buf_depth : 0;
-}
-
-/**
- * mei_virtio_hbuf_is_ready() - checks if write buffer is ready
- * @dev: the device structure
- *
- * Return: true if hbuf is ready
- */
-static bool mei_virtio_hbuf_is_ready(struct mei_device *dev)
-{
-       struct mei_virtio_hw *hw = to_virtio_hw(dev);
-
-       return atomic_read(&hw->hbuf_ready) == 1;
-}
-
-/**
- * mei_virtio_hbuf_max_depth() - returns depth of FE write buffer.
- * @dev: the device structure
- *
- * Return: size of frontend write buffer in bytes
- */
-static u32 mei_virtio_hbuf_depth(const struct mei_device *dev)
-{
-       struct mei_virtio_hw *hw = to_virtio_hw(dev);
-
-       return hw->cfg.buf_depth;
-}
-
-/**
- * mei_virtio_intr_clear() - clear and stop interrupts
- * @dev: the device structure
- */
-static void mei_virtio_intr_clear(struct mei_device *dev)
-{
-       /*
-        * In our virtio solution, there are two types of interrupts,
-        * vq interrupt and config change interrupt.
-        *   1) start/reset rely on virtio config changed interrupt;
-        *   2) send/recv rely on virtio virtqueue interrupts.
-        * They are all virtual interrupts. So, we don't have corresponding
-        * operation to do here.
-        */
-}
-
-/**
- * mei_virtio_intr_enable() - enables mei BE virtqueues callbacks
- * @dev: the device structure
- */
-static void mei_virtio_intr_enable(struct mei_device *dev)
-{
-       struct mei_virtio_hw *hw = to_virtio_hw(dev);
-       struct virtio_device *vdev = dev_to_virtio(dev->dev);
-
-       virtio_config_enable(vdev);
-
-       virtqueue_enable_cb(hw->in);
-       virtqueue_enable_cb(hw->out);
-}
-
-/**
- * mei_virtio_intr_disable() - disables mei BE virtqueues callbacks
- *
- * @dev: the device structure
- */
-static void mei_virtio_intr_disable(struct mei_device *dev)
-{
-       struct mei_virtio_hw *hw = to_virtio_hw(dev);
-       struct virtio_device *vdev = dev_to_virtio(dev->dev);
-
-       virtio_config_disable(vdev);
-
-       virtqueue_disable_cb(hw->in);
-       virtqueue_disable_cb(hw->out);
-}
-
-/**
- * mei_virtio_synchronize_irq() - wait for pending IRQ handlers for all
- *     virtqueue
- * @dev: the device structure
- */
-static void mei_virtio_synchronize_irq(struct mei_device *dev)
-{
-       struct mei_virtio_hw *hw = to_virtio_hw(dev);
-
-       /*
-        * Now, all IRQ handlers are converted to workqueue.
-        * Change synchronize irq to flush this work.
-        */
-       flush_work(&hw->intr_handler);
-}
-
-static void mei_virtio_free_outbufs(struct mei_virtio_hw *hw)
-{
-       kfree(hw->send_hdr);
-       kfree(hw->send_buf);
-       hw->send_hdr = NULL;
-       hw->send_buf = NULL;
-}
-
-/**
- * mei_virtio_write_message() - writes a message to mei virtio back-end service.
- * @dev: the device structure
- * @hdr: mei header of message
- * @hdr_len: header length
- * @data: message payload will be written
- * @data_len: message payload length
- *
- * Return:
- * *  0: on success
- * * -EIO: if write has failed
- * * -ENOMEM: on memory allocation failure
- */
-static int mei_virtio_write_message(struct mei_device *dev,
-                                   const void *hdr, size_t hdr_len,
-                                   const void *data, size_t data_len)
-{
-       struct mei_virtio_hw *hw = to_virtio_hw(dev);
-       struct scatterlist sg[2];
-       const void *hbuf, *dbuf;
-       int ret;
-
-       if (WARN_ON(!atomic_add_unless(&hw->hbuf_ready, -1, 0)))
-               return -EIO;
-
-       hbuf = kmemdup(hdr, hdr_len, GFP_KERNEL);
-       hw->send_hdr = hbuf;
-
-       dbuf = kmemdup(data, data_len, GFP_KERNEL);
-       hw->send_buf = dbuf;
-
-       if (!hbuf || !dbuf) {
-               ret = -ENOMEM;
-               goto fail;
-       }
-
-       sg_init_table(sg, 2);
-       sg_set_buf(&sg[0], hbuf, hdr_len);
-       sg_set_buf(&sg[1], dbuf, data_len);
-
-       ret = virtqueue_add_outbuf(hw->out, sg, 2, hw, GFP_KERNEL);
-       if (ret) {
-               dev_err(dev->dev, "failed to add outbuf\n");
-               goto fail;
-       }
-
-       virtqueue_kick(hw->out);
-       return 0;
-fail:
-
-       mei_virtio_free_outbufs(hw);
-
-       return ret;
-}
-
-/**
- * mei_virtio_count_full_read_slots() - counts read full slots.
- * @dev: the device structure
- *
- * Return: -EOVERFLOW if overflow, otherwise filled slots count
- */
-static int mei_virtio_count_full_read_slots(struct mei_device *dev)
-{
-       struct mei_virtio_hw *hw = to_virtio_hw(dev);
-
-       if (hw->recv_idx > hw->recv_len)
-               return -EOVERFLOW;
-
-       return hw->recv_len - hw->recv_idx;
-}
-
-/**
- * mei_virtio_read_hdr() - Reads 32bit dword from mei virtio receive buffer
- *
- * @dev: the device structure
- *
- * Return: 32bit dword of receive buffer (u32)
- */
-static inline u32 mei_virtio_read_hdr(const struct mei_device *dev)
-{
-       struct mei_virtio_hw *hw = to_virtio_hw(dev);
-
-       WARN_ON(hw->cfg.buf_depth < hw->recv_idx + 1);
-
-       return hw->recv_buf[hw->recv_idx++];
-}
-
-static int mei_virtio_read(struct mei_device *dev, unsigned char *buffer,
-                          unsigned long len)
-{
-       struct mei_virtio_hw *hw = to_virtio_hw(dev);
-       u32 slots = mei_data2slots(len);
-
-       if (WARN_ON(hw->cfg.buf_depth < hw->recv_idx + slots))
-               return -EOVERFLOW;
-
-       /*
-        * Assumption: There is only one MEI message in recv_buf each time.
-        * Backend service need follow this rule too.
-        */
-       memcpy(buffer, hw->recv_buf + hw->recv_idx, len);
-       hw->recv_idx += slots;
-
-       return 0;
-}
-
-static bool mei_virtio_pg_is_enabled(struct mei_device *dev)
-{
-       return false;
-}
-
-static bool mei_virtio_pg_in_transition(struct mei_device *dev)
-{
-       return false;
-}
-
-static void mei_virtio_add_recv_buf(struct mei_virtio_hw *hw)
-{
-       struct scatterlist sg;
-
-       if (hw->recv_rdy) /* not needed */
-               return;
-
-       /* refill the recv_buf to IN virtqueue to get next message */
-       sg_init_one(&sg, hw->recv_buf, mei_slots2data(hw->cfg.buf_depth));
-       hw->recv_len = 0;
-       hw->recv_idx = 0;
-       hw->recv_rdy = 1;
-       virtqueue_add_inbuf(hw->in, &sg, 1, hw->recv_buf, GFP_KERNEL);
-       virtqueue_kick(hw->in);
-}
-
-/**
- * mei_virtio_hw_is_ready() - check whether the BE(hw) has turned ready
- * @dev: mei device
- * Return: bool
- */
-static bool mei_virtio_hw_is_ready(struct mei_device *dev)
-{
-       struct mei_virtio_hw *hw = to_virtio_hw(dev);
-       struct virtio_device *vdev = dev_to_virtio(dev->dev);
-
-       virtio_cread(vdev, struct mei_virtio_cfg,
-                    hw_ready, &hw->cfg.hw_ready);
-
-       dev_dbg(dev->dev, "hw ready %d\n", hw->cfg.hw_ready);
-
-       return hw->cfg.hw_ready;
-}
-
-/**
- * mei_virtio_hw_reset - resets virtio hw.
- *
- * @dev: the device structure
- * @intr_enable: virtio use data/config callbacks
- *
- * Return: 0 on success an error code otherwise
- */
-static int mei_virtio_hw_reset(struct mei_device *dev, bool intr_enable)
-{
-       struct mei_virtio_hw *hw = to_virtio_hw(dev);
-       struct virtio_device *vdev = dev_to_virtio(dev->dev);
-
-       dev_dbg(dev->dev, "hw reset\n");
-
-       dev->recvd_hw_ready = false;
-       hw->host_ready = false;
-       atomic_set(&hw->hbuf_ready, 0);
-       hw->recv_len = 0;
-       hw->recv_idx = 0;
-
-       hw->cfg.host_reset = 1;
-       virtio_cwrite(vdev, struct mei_virtio_cfg,
-                     host_reset, &hw->cfg.host_reset);
-
-       mei_virtio_hw_is_ready(dev);
-
-       if (intr_enable)
-               mei_virtio_intr_enable(dev);
-
-       return 0;
-}
-
-/**
- * mei_virtio_hw_reset_release() - release device from the reset
- * @dev: the device structure
- */
-static void mei_virtio_hw_reset_release(struct mei_device *dev)
-{
-       struct mei_virtio_hw *hw = to_virtio_hw(dev);
-       struct virtio_device *vdev = dev_to_virtio(dev->dev);
-
-       dev_dbg(dev->dev, "hw reset release\n");
-       hw->cfg.host_reset = 0;
-       virtio_cwrite(vdev, struct mei_virtio_cfg,
-                     host_reset, &hw->cfg.host_reset);
-}
-
-/**
- * mei_virtio_hw_ready_wait() - wait until the virtio(hw) has turned ready
- *  or timeout is reached
- * @dev: mei device
- *
- * Return: 0 on success, error otherwise
- */
-static int mei_virtio_hw_ready_wait(struct mei_device *dev)
-{
-       mutex_unlock(&dev->device_lock);
-       wait_event_timeout(dev->wait_hw_ready,
-                          dev->recvd_hw_ready,
-                          mei_secs_to_jiffies(MEI_HW_READY_TIMEOUT));
-       mutex_lock(&dev->device_lock);
-       if (!dev->recvd_hw_ready) {
-               dev_err(dev->dev, "wait hw ready failed\n");
-               return -ETIMEDOUT;
-       }
-
-       dev->recvd_hw_ready = false;
-       return 0;
-}
-
-/**
- * mei_virtio_hw_start() - hw start routine
- * @dev: mei device
- *
- * Return: 0 on success, error otherwise
- */
-static int mei_virtio_hw_start(struct mei_device *dev)
-{
-       struct mei_virtio_hw *hw = to_virtio_hw(dev);
-       int ret;
-
-       dev_dbg(dev->dev, "hw start\n");
-       mei_virtio_hw_reset_release(dev);
-
-       ret = mei_virtio_hw_ready_wait(dev);
-       if (ret)
-               return ret;
-
-       mei_virtio_add_recv_buf(hw);
-       atomic_set(&hw->hbuf_ready, 1);
-       dev_dbg(dev->dev, "hw is ready\n");
-       hw->host_ready = true;
-
-       return 0;
-}
-
-/**
- * mei_virtio_host_is_ready() - check whether the FE has turned ready
- * @dev: mei device
- *
- * Return: bool
- */
-static bool mei_virtio_host_is_ready(struct mei_device *dev)
-{
-       struct mei_virtio_hw *hw = to_virtio_hw(dev);
-
-       dev_dbg(dev->dev, "host ready %d\n", hw->host_ready);
-
-       return hw->host_ready;
-}
-
-/**
- * mei_virtio_data_in() - The callback of recv virtqueue of virtio mei
- * @vq: receiving virtqueue
- */
-static void mei_virtio_data_in(struct virtqueue *vq)
-{
-       struct mei_virtio_hw *hw = vq->vdev->priv;
-
-       /* disable interrupts (enabled again from in the interrupt worker) */
-       virtqueue_disable_cb(hw->in);
-
-       schedule_work(&hw->intr_handler);
-}
-
-/**
- * mei_virtio_data_out() - The callback of send virtqueue of virtio mei
- * @vq: transmitting virtqueue
- */
-static void mei_virtio_data_out(struct virtqueue *vq)
-{
-       struct mei_virtio_hw *hw = vq->vdev->priv;
-
-       schedule_work(&hw->intr_handler);
-}
-
-static void mei_virtio_intr_handler(struct work_struct *work)
-{
-       struct mei_virtio_hw *hw =
-               container_of(work, struct mei_virtio_hw, intr_handler);
-       struct mei_device *dev = &hw->mdev;
-       LIST_HEAD(complete_list);
-       s32 slots;
-       int rets = 0;
-       void *data;
-       unsigned int len;
-
-       mutex_lock(&dev->device_lock);
-
-       if (dev->dev_state == MEI_DEV_DISABLED) {
-               dev_warn(dev->dev, "Interrupt in disabled state.\n");
-               mei_virtio_intr_disable(dev);
-               goto end;
-       }
-
-       /* check if ME wants a reset */
-       if (!mei_hw_is_ready(dev) && dev->dev_state != MEI_DEV_RESETTING) {
-               dev_warn(dev->dev, "BE service not ready: resetting.\n");
-               schedule_work(&dev->reset_work);
-               goto end;
-       }
-
-       /* check if we need to start the dev */
-       if (!mei_host_is_ready(dev)) {
-               if (mei_hw_is_ready(dev)) {
-                       dev_dbg(dev->dev, "we need to start the dev.\n");
-                       dev->recvd_hw_ready = true;
-                       wake_up(&dev->wait_hw_ready);
-               } else {
-                       dev_warn(dev->dev, "Spurious Interrupt\n");
-               }
-               goto end;
-       }
-
-       /* read */
-       if (hw->recv_rdy) {
-               data = virtqueue_get_buf(hw->in, &len);
-               if (!data || !len) {
-                       dev_dbg(dev->dev, "No data %d", len);
-               } else {
-                       dev_dbg(dev->dev, "data_in %d\n", len);
-                       WARN_ON(data != hw->recv_buf);
-                       hw->recv_len = mei_data2slots(len);
-                       hw->recv_rdy = 0;
-               }
-       }
-
-       /* write */
-       if (!atomic_read(&hw->hbuf_ready)) {
-               if (!virtqueue_get_buf(hw->out, &len)) {
-                       dev_warn(dev->dev, "Failed to getbuf\n");
-               } else {
-                       mei_virtio_free_outbufs(hw);
-                       atomic_inc(&hw->hbuf_ready);
-               }
-       }
-
-       /* check slots available for reading */
-       slots = mei_count_full_read_slots(dev);
-       while (slots > 0) {
-               dev_dbg(dev->dev, "slots to read = %08x\n", slots);
-               rets = mei_irq_read_handler(dev, &complete_list, &slots);
-
-               if (rets &&
-                   (dev->dev_state != MEI_DEV_RESETTING &&
-                    dev->dev_state != MEI_DEV_POWER_DOWN)) {
-                       dev_err(dev->dev, "mei_irq_read_handler ret = %d.\n",
-                               rets);
-                       schedule_work(&dev->reset_work);
-                       goto end;
-               }
-       }
-
-       dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
-
-       mei_irq_write_handler(dev, &complete_list);
-
-       dev->hbuf_is_ready = mei_hbuf_is_ready(dev);
-
-       mei_irq_compl_handler(dev, &complete_list);
-
-       mei_virtio_add_recv_buf(hw);
-
-end:
-       if (dev->dev_state != MEI_DEV_DISABLED) {
-               if (!virtqueue_enable_cb(hw->in))
-                       schedule_work(&hw->intr_handler);
-       }
-
-       mutex_unlock(&dev->device_lock);
-}
-
-static void mei_virtio_config_changed(struct virtio_device *vdev)
-{
-       struct mei_virtio_hw *hw = vdev->priv;
-       struct mei_device *dev = &hw->mdev;
-
-       virtio_cread(vdev, struct mei_virtio_cfg,
-                    hw_ready, &hw->cfg.hw_ready);
-
-       if (dev->dev_state == MEI_DEV_DISABLED) {
-               dev_dbg(dev->dev, "disabled state don't start\n");
-               return;
-       }
-
-       /* Run intr handler once to handle reset notify */
-       schedule_work(&hw->intr_handler);
-}
-
-static void mei_virtio_remove_vqs(struct virtio_device *vdev)
-{
-       struct mei_virtio_hw *hw = vdev->priv;
-
-       virtqueue_detach_unused_buf(hw->in);
-       hw->recv_len = 0;
-       hw->recv_idx = 0;
-       hw->recv_rdy = 0;
-
-       virtqueue_detach_unused_buf(hw->out);
-
-       mei_virtio_free_outbufs(hw);
-
-       vdev->config->del_vqs(vdev);
-}
-
-/*
- * There are two virtqueues, one is for send and another is for recv.
- */
-static int mei_virtio_init_vqs(struct mei_virtio_hw *hw,
-                              struct virtio_device *vdev)
-{
-       struct virtqueue *vqs[2];
-
-       vq_callback_t *cbs[] = {
-               mei_virtio_data_in,
-               mei_virtio_data_out,
-       };
-       static const char * const names[] = {
-               "in",
-               "out",
-       };
-       int ret;
-
-       ret = virtio_find_vqs(vdev, 2, vqs, cbs, names, NULL);
-       if (ret)
-               return ret;
-
-       hw->in = vqs[0];
-       hw->out = vqs[1];
-
-       return 0;
-}
-
-static const struct mei_hw_ops mei_virtio_ops = {
-       .fw_status = mei_virtio_fw_status,
-       .pg_state  = mei_virtio_pg_state,
-
-       .host_is_ready = mei_virtio_host_is_ready,
-
-       .hw_is_ready = mei_virtio_hw_is_ready,
-       .hw_reset = mei_virtio_hw_reset,
-       .hw_config = mei_virtio_hw_config,
-       .hw_start = mei_virtio_hw_start,
-
-       .pg_in_transition = mei_virtio_pg_in_transition,
-       .pg_is_enabled = mei_virtio_pg_is_enabled,
-
-       .intr_clear = mei_virtio_intr_clear,
-       .intr_enable = mei_virtio_intr_enable,
-       .intr_disable = mei_virtio_intr_disable,
-       .synchronize_irq = mei_virtio_synchronize_irq,
-
-       .hbuf_free_slots = mei_virtio_hbuf_empty_slots,
-       .hbuf_is_ready = mei_virtio_hbuf_is_ready,
-       .hbuf_depth = mei_virtio_hbuf_depth,
-
-       .write = mei_virtio_write_message,
-
-       .rdbuf_full_slots = mei_virtio_count_full_read_slots,
-       .read_hdr = mei_virtio_read_hdr,
-       .read = mei_virtio_read,
-};
-
-static int mei_virtio_probe(struct virtio_device *vdev)
-{
-       struct mei_virtio_hw *hw;
-       int ret;
-
-       hw = devm_kzalloc(&vdev->dev, sizeof(*hw), GFP_KERNEL);
-       if (!hw)
-               return -ENOMEM;
-
-       vdev->priv = hw;
-
-       INIT_WORK(&hw->intr_handler, mei_virtio_intr_handler);
-
-       ret = mei_virtio_init_vqs(hw, vdev);
-       if (ret)
-               goto vqs_failed;
-
-       virtio_cread(vdev, struct mei_virtio_cfg,
-                    buf_depth, &hw->cfg.buf_depth);
-
-       hw->recv_buf = kzalloc(mei_slots2data(hw->cfg.buf_depth), GFP_KERNEL);
-       if (!hw->recv_buf) {
-               ret = -ENOMEM;
-               goto hbuf_failed;
-       }
-       atomic_set(&hw->hbuf_ready, 0);
-
-       virtio_device_ready(vdev);
-
-       mei_device_init(&hw->mdev, &vdev->dev, &mei_virtio_ops);
-
-       pm_runtime_get_noresume(&vdev->dev);
-       pm_runtime_set_active(&vdev->dev);
-       pm_runtime_enable(&vdev->dev);
-
-       ret = mei_start(&hw->mdev);
-       if (ret)
-               goto mei_start_failed;
-
-       pm_runtime_set_autosuspend_delay(&vdev->dev, MEI_VIRTIO_RPM_TIMEOUT);
-       pm_runtime_use_autosuspend(&vdev->dev);
-
-       ret = mei_register(&hw->mdev, &vdev->dev);
-       if (ret)
-               goto mei_failed;
-
-       pm_runtime_put(&vdev->dev);
-
-       return 0;
-
-mei_failed:
-       mei_stop(&hw->mdev);
-mei_start_failed:
-       mei_cancel_work(&hw->mdev);
-       mei_disable_interrupts(&hw->mdev);
-       kfree(hw->recv_buf);
-hbuf_failed:
-       vdev->config->del_vqs(vdev);
-vqs_failed:
-       return ret;
-}
-
-static int __maybe_unused mei_virtio_pm_runtime_idle(struct device *device)
-{
-       struct virtio_device *vdev = dev_to_virtio(device);
-       struct mei_virtio_hw *hw = vdev->priv;
-
-       dev_dbg(&vdev->dev, "rpm: mei_virtio : runtime_idle\n");
-
-       if (!hw)
-               return -ENODEV;
-
-       if (mei_write_is_idle(&hw->mdev))
-               pm_runtime_autosuspend(device);
-
-       return -EBUSY;
-}
-
-static int __maybe_unused mei_virtio_pm_runtime_suspend(struct device *device)
-{
-       return 0;
-}
-
-static int __maybe_unused mei_virtio_pm_runtime_resume(struct device *device)
-{
-       return 0;
-}
-
-static int __maybe_unused mei_virtio_freeze(struct virtio_device *vdev)
-{
-       struct mei_virtio_hw *hw = vdev->priv;
-
-       dev_dbg(&vdev->dev, "freeze\n");
-
-       if (!hw)
-               return -ENODEV;
-
-       mei_stop(&hw->mdev);
-       mei_disable_interrupts(&hw->mdev);
-       cancel_work_sync(&hw->intr_handler);
-       vdev->config->reset(vdev);
-       mei_virtio_remove_vqs(vdev);
-
-       return 0;
-}
-
-static int __maybe_unused mei_virtio_restore(struct virtio_device *vdev)
-{
-       struct mei_virtio_hw *hw = vdev->priv;
-       int ret;
-
-       dev_dbg(&vdev->dev, "restore\n");
-
-       if (!hw)
-               return -ENODEV;
-
-       ret = mei_virtio_init_vqs(hw, vdev);
-       if (ret)
-               return ret;
-
-       virtio_device_ready(vdev);
-
-       ret = mei_restart(&hw->mdev);
-       if (ret)
-               return ret;
-
-       /* Start timer if stopped in suspend */
-       schedule_delayed_work(&hw->mdev.timer_work, HZ);
-
-       return 0;
-}
-
-static const struct dev_pm_ops mei_virtio_pm_ops = {
-       SET_RUNTIME_PM_OPS(mei_virtio_pm_runtime_suspend,
-                          mei_virtio_pm_runtime_resume,
-                          mei_virtio_pm_runtime_idle)
-};
-
-static void mei_virtio_remove(struct virtio_device *vdev)
-{
-       struct mei_virtio_hw *hw = vdev->priv;
-
-       mei_stop(&hw->mdev);
-       mei_disable_interrupts(&hw->mdev);
-       cancel_work_sync(&hw->intr_handler);
-       mei_deregister(&hw->mdev);
-       vdev->config->reset(vdev);
-       mei_virtio_remove_vqs(vdev);
-       kfree(hw->recv_buf);
-       pm_runtime_disable(&vdev->dev);
-}
-
-static struct virtio_device_id id_table[] = {
-       { VIRTIO_ID_MEI, VIRTIO_DEV_ANY_ID },
-       { }
-};
-
-static struct virtio_driver mei_virtio_driver = {
-       .id_table = id_table,
-       .probe = mei_virtio_probe,
-       .remove = mei_virtio_remove,
-       .config_changed = mei_virtio_config_changed,
-       .driver = {
-               .name = KBUILD_MODNAME,
-               .owner = THIS_MODULE,
-               .pm = &mei_virtio_pm_ops,
-       },
-#ifdef CONFIG_PM_SLEEP
-       .freeze = mei_virtio_freeze,
-       .restore = mei_virtio_restore,
-#endif
-};
-
-module_virtio_driver(mei_virtio_driver);
-MODULE_DEVICE_TABLE(virtio, id_table);
-MODULE_DESCRIPTION("Virtio MEI frontend driver");
-MODULE_LICENSE("GPL v2");
index 8d3df0b..42e27a2 100644 (file)
@@ -580,7 +580,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md,
 
        memcpy(&(idata->ic.response), cmd.resp, sizeof(cmd.resp));
 
-       if (idata->rpmb || (cmd.flags & MMC_RSP_R1B)) {
+       if (idata->rpmb || (cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B) {
                /*
                 * Ensure RPMB/R1B command has completed by polling CMD13
                 * "Send Status".
index a704745..004fbfc 100644 (file)
@@ -446,7 +446,7 @@ struct msdc_host {
 
 static const struct mtk_mmc_compatible mt8135_compat = {
        .clk_div_bits = 8,
-       .recheck_sdio_irq = false,
+       .recheck_sdio_irq = true,
        .hs400_tune = false,
        .pad_tune_reg = MSDC_PAD_TUNE,
        .async_fifo = false,
@@ -485,7 +485,7 @@ static const struct mtk_mmc_compatible mt8183_compat = {
 
 static const struct mtk_mmc_compatible mt2701_compat = {
        .clk_div_bits = 12,
-       .recheck_sdio_irq = false,
+       .recheck_sdio_irq = true,
        .hs400_tune = false,
        .pad_tune_reg = MSDC_PAD_TUNE0,
        .async_fifo = true,
@@ -511,7 +511,7 @@ static const struct mtk_mmc_compatible mt2712_compat = {
 
 static const struct mtk_mmc_compatible mt7622_compat = {
        .clk_div_bits = 12,
-       .recheck_sdio_irq = false,
+       .recheck_sdio_irq = true,
        .hs400_tune = false,
        .pad_tune_reg = MSDC_PAD_TUNE0,
        .async_fifo = true,
@@ -524,7 +524,7 @@ static const struct mtk_mmc_compatible mt7622_compat = {
 
 static const struct mtk_mmc_compatible mt8516_compat = {
        .clk_div_bits = 12,
-       .recheck_sdio_irq = false,
+       .recheck_sdio_irq = true,
        .hs400_tune = false,
        .pad_tune_reg = MSDC_PAD_TUNE0,
        .async_fifo = true,
@@ -535,7 +535,7 @@ static const struct mtk_mmc_compatible mt8516_compat = {
 
 static const struct mtk_mmc_compatible mt7620_compat = {
        .clk_div_bits = 8,
-       .recheck_sdio_irq = false,
+       .recheck_sdio_irq = true,
        .hs400_tune = false,
        .pad_tune_reg = MSDC_PAD_TUNE,
        .async_fifo = false,
@@ -548,6 +548,7 @@ static const struct mtk_mmc_compatible mt7620_compat = {
 
 static const struct mtk_mmc_compatible mt6779_compat = {
        .clk_div_bits = 12,
+       .recheck_sdio_irq = false,
        .hs400_tune = false,
        .pad_tune_reg = MSDC_PAD_TUNE0,
        .async_fifo = true,
@@ -2603,7 +2604,6 @@ static int msdc_drv_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM
 static void msdc_save_reg(struct msdc_host *host)
 {
        u32 tune_reg = host->dev_comp->pad_tune_reg;
@@ -2662,7 +2662,7 @@ static void msdc_restore_reg(struct msdc_host *host)
                __msdc_enable_sdio_irq(host, 1);
 }
 
-static int msdc_runtime_suspend(struct device *dev)
+static int __maybe_unused msdc_runtime_suspend(struct device *dev)
 {
        struct mmc_host *mmc = dev_get_drvdata(dev);
        struct msdc_host *host = mmc_priv(mmc);
@@ -2672,7 +2672,7 @@ static int msdc_runtime_suspend(struct device *dev)
        return 0;
 }
 
-static int msdc_runtime_resume(struct device *dev)
+static int __maybe_unused msdc_runtime_resume(struct device *dev)
 {
        struct mmc_host *mmc = dev_get_drvdata(dev);
        struct msdc_host *host = mmc_priv(mmc);
@@ -2681,11 +2681,28 @@ static int msdc_runtime_resume(struct device *dev)
        msdc_restore_reg(host);
        return 0;
 }
-#endif
+
+static int __maybe_unused msdc_suspend(struct device *dev)
+{
+       struct mmc_host *mmc = dev_get_drvdata(dev);
+       int ret;
+
+       if (mmc->caps2 & MMC_CAP2_CQE) {
+               ret = cqhci_suspend(mmc);
+               if (ret)
+                       return ret;
+       }
+
+       return pm_runtime_force_suspend(dev);
+}
+
+static int __maybe_unused msdc_resume(struct device *dev)
+{
+       return pm_runtime_force_resume(dev);
+}
 
 static const struct dev_pm_ops msdc_dev_pm_ops = {
-       SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
-                               pm_runtime_force_resume)
+       SET_SYSTEM_SLEEP_PM_OPS(msdc_suspend, msdc_resume)
        SET_RUNTIME_PM_OPS(msdc_runtime_suspend, msdc_runtime_resume, NULL)
 };
 
index 829ccef..3b8d456 100644 (file)
 #define SDHCI_ARASAN_VENDOR_REGISTER   0x78
 
 #define SDHCI_ARASAN_ITAPDLY_REGISTER  0xF0F8
+#define SDHCI_ARASAN_ITAPDLY_SEL_MASK  0xFF
+
 #define SDHCI_ARASAN_OTAPDLY_REGISTER  0xF0FC
+#define SDHCI_ARASAN_OTAPDLY_SEL_MASK  0x3F
 
 #define SDHCI_ARASAN_CQE_BASE_ADDR     0x200
 #define VENDOR_ENHANCED_STROBE         BIT(0)
@@ -600,14 +603,8 @@ static int sdhci_zynqmp_sdcardclk_set_phase(struct clk_hw *hw, int degrees)
        u8 tap_delay, tap_max = 0;
        int ret;
 
-       /*
-        * This is applicable for SDHCI_SPEC_300 and above
-        * ZynqMP does not set phase for <=25MHz clock.
-        * If degrees is zero, no need to do anything.
-        */
-       if (host->version < SDHCI_SPEC_300 ||
-           host->timing == MMC_TIMING_LEGACY ||
-           host->timing == MMC_TIMING_UHS_SDR12 || !degrees)
+       /* This is applicable for SDHCI_SPEC_300 and above */
+       if (host->version < SDHCI_SPEC_300)
                return 0;
 
        switch (host->timing) {
@@ -638,6 +635,9 @@ static int sdhci_zynqmp_sdcardclk_set_phase(struct clk_hw *hw, int degrees)
        if (ret)
                pr_err("Error setting Output Tap Delay\n");
 
+       /* Release DLL Reset */
+       zynqmp_pm_sd_dll_reset(node_id, PM_DLL_RESET_RELEASE);
+
        return ret;
 }
 
@@ -668,16 +668,13 @@ static int sdhci_zynqmp_sampleclk_set_phase(struct clk_hw *hw, int degrees)
        u8 tap_delay, tap_max = 0;
        int ret;
 
-       /*
-        * This is applicable for SDHCI_SPEC_300 and above
-        * ZynqMP does not set phase for <=25MHz clock.
-        * If degrees is zero, no need to do anything.
-        */
-       if (host->version < SDHCI_SPEC_300 ||
-           host->timing == MMC_TIMING_LEGACY ||
-           host->timing == MMC_TIMING_UHS_SDR12 || !degrees)
+       /* This is applicable for SDHCI_SPEC_300 and above */
+       if (host->version < SDHCI_SPEC_300)
                return 0;
 
+       /* Assert DLL Reset */
+       zynqmp_pm_sd_dll_reset(node_id, PM_DLL_RESET_ASSERT);
+
        switch (host->timing) {
        case MMC_TIMING_MMC_HS:
        case MMC_TIMING_SD_HS:
@@ -733,14 +730,8 @@ static int sdhci_versal_sdcardclk_set_phase(struct clk_hw *hw, int degrees)
        struct sdhci_host *host = sdhci_arasan->host;
        u8 tap_delay, tap_max = 0;
 
-       /*
-        * This is applicable for SDHCI_SPEC_300 and above
-        * Versal does not set phase for <=25MHz clock.
-        * If degrees is zero, no need to do anything.
-        */
-       if (host->version < SDHCI_SPEC_300 ||
-           host->timing == MMC_TIMING_LEGACY ||
-           host->timing == MMC_TIMING_UHS_SDR12 || !degrees)
+       /* This is applicable for SDHCI_SPEC_300 and above */
+       if (host->version < SDHCI_SPEC_300)
                return 0;
 
        switch (host->timing) {
@@ -773,6 +764,7 @@ static int sdhci_versal_sdcardclk_set_phase(struct clk_hw *hw, int degrees)
                regval = sdhci_readl(host, SDHCI_ARASAN_OTAPDLY_REGISTER);
                regval |= SDHCI_OTAPDLY_ENABLE;
                sdhci_writel(host, regval, SDHCI_ARASAN_OTAPDLY_REGISTER);
+               regval &= ~SDHCI_ARASAN_OTAPDLY_SEL_MASK;
                regval |= tap_delay;
                sdhci_writel(host, regval, SDHCI_ARASAN_OTAPDLY_REGISTER);
        }
@@ -804,14 +796,8 @@ static int sdhci_versal_sampleclk_set_phase(struct clk_hw *hw, int degrees)
        struct sdhci_host *host = sdhci_arasan->host;
        u8 tap_delay, tap_max = 0;
 
-       /*
-        * This is applicable for SDHCI_SPEC_300 and above
-        * Versal does not set phase for <=25MHz clock.
-        * If degrees is zero, no need to do anything.
-        */
-       if (host->version < SDHCI_SPEC_300 ||
-           host->timing == MMC_TIMING_LEGACY ||
-           host->timing == MMC_TIMING_UHS_SDR12 || !degrees)
+       /* This is applicable for SDHCI_SPEC_300 and above */
+       if (host->version < SDHCI_SPEC_300)
                return 0;
 
        switch (host->timing) {
@@ -846,6 +832,7 @@ static int sdhci_versal_sampleclk_set_phase(struct clk_hw *hw, int degrees)
                sdhci_writel(host, regval, SDHCI_ARASAN_ITAPDLY_REGISTER);
                regval |= SDHCI_ITAPDLY_ENABLE;
                sdhci_writel(host, regval, SDHCI_ARASAN_ITAPDLY_REGISTER);
+               regval &= ~SDHCI_ARASAN_ITAPDLY_SEL_MASK;
                regval |= tap_delay;
                sdhci_writel(host, regval, SDHCI_ARASAN_ITAPDLY_REGISTER);
                regval &= ~SDHCI_ITAPDLY_CHGWIN;
@@ -1199,16 +1186,19 @@ static struct sdhci_arasan_of_data sdhci_arasan_versal_data = {
 static struct sdhci_arasan_of_data intel_keembay_emmc_data = {
        .soc_ctl_map = &intel_keembay_soc_ctl_map,
        .pdata = &sdhci_keembay_emmc_pdata,
+       .clk_ops = &arasan_clk_ops,
 };
 
 static struct sdhci_arasan_of_data intel_keembay_sd_data = {
        .soc_ctl_map = &intel_keembay_soc_ctl_map,
        .pdata = &sdhci_keembay_sd_pdata,
+       .clk_ops = &arasan_clk_ops,
 };
 
 static struct sdhci_arasan_of_data intel_keembay_sdio_data = {
        .soc_ctl_map = &intel_keembay_soc_ctl_map,
        .pdata = &sdhci_keembay_sdio_pdata,
+       .clk_ops = &arasan_clk_ops,
 };
 
 static const struct of_device_id sdhci_arasan_of_match[] = {
index 23da7f7..9552708 100644 (file)
@@ -665,6 +665,15 @@ static void sdhci_intel_set_power(struct sdhci_host *host, unsigned char mode,
        }
 }
 
+static void sdhci_intel_set_uhs_signaling(struct sdhci_host *host,
+                                         unsigned int timing)
+{
+       /* Set UHS timing to SDR25 for High Speed mode */
+       if (timing == MMC_TIMING_MMC_HS || timing == MMC_TIMING_SD_HS)
+               timing = MMC_TIMING_UHS_SDR25;
+       sdhci_set_uhs_signaling(host, timing);
+}
+
 #define INTEL_HS400_ES_REG 0x78
 #define INTEL_HS400_ES_BIT BIT(0)
 
@@ -721,7 +730,7 @@ static const struct sdhci_ops sdhci_intel_byt_ops = {
        .enable_dma             = sdhci_pci_enable_dma,
        .set_bus_width          = sdhci_set_bus_width,
        .reset                  = sdhci_reset,
-       .set_uhs_signaling      = sdhci_set_uhs_signaling,
+       .set_uhs_signaling      = sdhci_intel_set_uhs_signaling,
        .hw_reset               = sdhci_pci_hw_reset,
 };
 
@@ -731,7 +740,7 @@ static const struct sdhci_ops sdhci_intel_glk_ops = {
        .enable_dma             = sdhci_pci_enable_dma,
        .set_bus_width          = sdhci_set_bus_width,
        .reset                  = sdhci_cqhci_reset,
-       .set_uhs_signaling      = sdhci_set_uhs_signaling,
+       .set_uhs_signaling      = sdhci_intel_set_uhs_signaling,
        .hw_reset               = sdhci_pci_hw_reset,
        .irq                    = sdhci_cqhci_irq,
 };
index cb4149f..ac4e787 100644 (file)
@@ -927,9 +927,9 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
        switch (ios->power_mode) {
        case MMC_POWER_OFF:
                tmio_mmc_power_off(host);
-               /* Downgrade ensures a sane state for tuning HW (e.g. SCC) */
-               if (host->mmc->ops->hs400_downgrade)
-                       host->mmc->ops->hs400_downgrade(host->mmc);
+               /* For R-Car Gen2+, we need to reset SDHI specific SCC */
+               if (host->pdata->flags & TMIO_MMC_MIN_RCAR2)
+                       host->reset(host);
                host->set_clock(host, 0);
                break;
        case MMC_POWER_UP:
index d3c5cc5..ff1697f 100644 (file)
@@ -215,8 +215,19 @@ static int gpio_nand_setup_interface(struct nand_chip *this, int csline,
        return 0;
 }
 
+static int gpio_nand_attach_chip(struct nand_chip *chip)
+{
+       chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
+       if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+               chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+       return 0;
+}
+
 static const struct nand_controller_ops gpio_nand_ops = {
        .exec_op = gpio_nand_exec_op,
+       .attach_chip = gpio_nand_attach_chip,
        .setup_interface = gpio_nand_setup_interface,
 };
 
@@ -260,9 +271,6 @@ static int gpio_nand_probe(struct platform_device *pdev)
                return err;
        }
 
-       this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-       this->ecc.algo = NAND_ECC_ALGO_HAMMING;
-
        platform_set_drvdata(pdev, priv);
 
        /* Set chip enabled but write protected */
index 79b0574..7b6b354 100644 (file)
@@ -236,8 +236,19 @@ static int au1550nd_exec_op(struct nand_chip *this,
        return ret;
 }
 
+static int au1550nd_attach_chip(struct nand_chip *chip)
+{
+       chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
+       if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+               chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+       return 0;
+}
+
 static const struct nand_controller_ops au1550nd_ops = {
        .exec_op = au1550nd_exec_op,
+       .attach_chip = au1550nd_attach_chip,
 };
 
 static int au1550nd_probe(struct platform_device *pdev)
@@ -294,8 +305,6 @@ static int au1550nd_probe(struct platform_device *pdev)
        nand_controller_init(&ctx->controller);
        ctx->controller.ops = &au1550nd_ops;
        this->controller = &ctx->controller;
-       this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-       this->ecc.algo = NAND_ECC_ALGO_HAMMING;
 
        if (pd->devwidth)
                this->options |= NAND_BUSWIDTH_16;
index b7f3f63..282203d 100644 (file)
@@ -243,8 +243,24 @@ static int cs_calculate_ecc(struct nand_chip *this, const u_char *dat,
 
 static struct cs553x_nand_controller *controllers[4];
 
+static int cs553x_attach_chip(struct nand_chip *chip)
+{
+       if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST)
+               return 0;
+
+       chip->ecc.size = 256;
+       chip->ecc.bytes = 3;
+       chip->ecc.hwctl  = cs_enable_hwecc;
+       chip->ecc.calculate = cs_calculate_ecc;
+       chip->ecc.correct  = nand_correct_data;
+       chip->ecc.strength = 1;
+
+       return 0;
+}
+
 static const struct nand_controller_ops cs553x_nand_controller_ops = {
        .exec_op = cs553x_exec_op,
+       .attach_chip = cs553x_attach_chip,
 };
 
 static int __init cs553x_init_one(int cs, int mmio, unsigned long adr)
@@ -286,14 +302,6 @@ static int __init cs553x_init_one(int cs, int mmio, unsigned long adr)
                goto out_mtd;
        }
 
-       this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
-       this->ecc.size = 256;
-       this->ecc.bytes = 3;
-       this->ecc.hwctl  = cs_enable_hwecc;
-       this->ecc.calculate = cs_calculate_ecc;
-       this->ecc.correct  = nand_correct_data;
-       this->ecc.strength = 1;
-
        /* Enable the following for a flash based bad block table */
        this->bbt_options = NAND_BBT_USE_FLASH;
 
index 427f320..f8c36d1 100644 (file)
@@ -585,6 +585,10 @@ static int davinci_nand_attach_chip(struct nand_chip *chip)
        if (IS_ERR(pdata))
                return PTR_ERR(pdata);
 
+       /* Use board-specific ECC config */
+       info->chip.ecc.engine_type = pdata->engine_type;
+       info->chip.ecc.placement = pdata->ecc_placement;
+
        switch (info->chip.ecc.engine_type) {
        case NAND_ECC_ENGINE_TYPE_NONE:
                pdata->ecc_bits = 0;
@@ -850,10 +854,6 @@ static int nand_davinci_probe(struct platform_device *pdev)
        info->mask_ale          = pdata->mask_ale ? : MASK_ALE;
        info->mask_cle          = pdata->mask_cle ? : MASK_CLE;
 
-       /* Use board-specific ECC config */
-       info->chip.ecc.engine_type = pdata->engine_type;
-       info->chip.ecc.placement = pdata->ecc_placement;
-
        spin_lock_irq(&davinci_nand_lock);
 
        /* put CSxNAND into NAND mode */
index 94432a4..26b265e 100644 (file)
@@ -1269,12 +1269,31 @@ static inline int __init doc2001plus_init(struct mtd_info *mtd)
        return 1;
 }
 
+static int doc200x_attach_chip(struct nand_chip *chip)
+{
+       if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST)
+               return 0;
+
+       chip->ecc.placement = NAND_ECC_PLACEMENT_INTERLEAVED;
+       chip->ecc.size = 512;
+       chip->ecc.bytes = 6;
+       chip->ecc.strength = 2;
+       chip->ecc.options = NAND_ECC_GENERIC_ERASED_CHECK;
+       chip->ecc.hwctl = doc200x_enable_hwecc;
+       chip->ecc.calculate = doc200x_calculate_ecc;
+       chip->ecc.correct = doc200x_correct_data;
+
+       return 0;
+}
+
 static const struct nand_controller_ops doc200x_ops = {
        .exec_op = doc200x_exec_op,
+       .attach_chip = doc200x_attach_chip,
 };
 
 static const struct nand_controller_ops doc2001plus_ops = {
        .exec_op = doc2001plus_exec_op,
+       .attach_chip = doc200x_attach_chip,
 };
 
 static int __init doc_probe(unsigned long physadr)
@@ -1452,16 +1471,6 @@ static int __init doc_probe(unsigned long physadr)
 
        nand->controller        = &doc->base;
        nand_set_controller_data(nand, doc);
-       nand->ecc.hwctl         = doc200x_enable_hwecc;
-       nand->ecc.calculate     = doc200x_calculate_ecc;
-       nand->ecc.correct       = doc200x_correct_data;
-
-       nand->ecc.engine_type   = NAND_ECC_ENGINE_TYPE_ON_HOST;
-       nand->ecc.placement     = NAND_ECC_PLACEMENT_INTERLEAVED;
-       nand->ecc.size          = 512;
-       nand->ecc.bytes         = 6;
-       nand->ecc.strength      = 2;
-       nand->ecc.options       = NAND_ECC_GENERIC_ERASED_CHECK;
        nand->bbt_options       = NAND_BBT_USE_FLASH;
        /* Skip the automatic BBT scan so we can run it manually */
        nand->options           |= NAND_SKIP_BBTSCAN | NAND_NO_BBM_QUIRK;
index 4191831..c88421a 100644 (file)
@@ -880,6 +880,20 @@ static int fsmc_nand_attach_chip(struct nand_chip *nand)
        struct mtd_info *mtd = nand_to_mtd(nand);
        struct fsmc_nand_data *host = nand_to_fsmc(nand);
 
+       if (nand->ecc.engine_type == NAND_ECC_ENGINE_TYPE_INVALID)
+               nand->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
+
+       if (!nand->ecc.size)
+               nand->ecc.size = 512;
+
+       if (AMBA_REV_BITS(host->pid) >= 8) {
+               nand->ecc.read_page = fsmc_read_page_hwecc;
+               nand->ecc.calculate = fsmc_read_hwecc_ecc4;
+               nand->ecc.correct = fsmc_bch8_correct_data;
+               nand->ecc.bytes = 13;
+               nand->ecc.strength = 8;
+       }
+
        if (AMBA_REV_BITS(host->pid) >= 8) {
                switch (mtd->oobsize) {
                case 16:
@@ -905,6 +919,7 @@ static int fsmc_nand_attach_chip(struct nand_chip *nand)
                dev_info(host->dev, "Using 1-bit HW ECC scheme\n");
                nand->ecc.calculate = fsmc_read_hwecc_ecc1;
                nand->ecc.correct = nand_correct_data;
+               nand->ecc.hwctl = fsmc_enable_hwecc;
                nand->ecc.bytes = 3;
                nand->ecc.strength = 1;
                nand->ecc.options |= NAND_ECC_SOFT_HAMMING_SM_ORDER;
@@ -1055,13 +1070,6 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 
        mtd->dev.parent = &pdev->dev;
 
-       /*
-        * Setup default ECC mode. nand_dt_init() called from nand_scan_ident()
-        * can overwrite this value if the DT provides a different value.
-        */
-       nand->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
-       nand->ecc.hwctl = fsmc_enable_hwecc;
-       nand->ecc.size = 512;
        nand->badblockbits = 7;
 
        if (host->mode == USE_DMA_ACCESS) {
@@ -1084,14 +1092,6 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
                nand->options |= NAND_KEEP_TIMINGS;
        }
 
-       if (AMBA_REV_BITS(host->pid) >= 8) {
-               nand->ecc.read_page = fsmc_read_page_hwecc;
-               nand->ecc.calculate = fsmc_read_hwecc_ecc4;
-               nand->ecc.correct = fsmc_bch8_correct_data;
-               nand->ecc.bytes = 13;
-               nand->ecc.strength = 8;
-       }
-
        nand_controller_init(&host->base);
        host->base.ops = &fsmc_nand_controller_ops;
        nand->controller = &host->base;
index 4ec0a1e..fb7a086 100644 (file)
@@ -161,8 +161,19 @@ static int gpio_nand_exec_op(struct nand_chip *chip,
        return ret;
 }
 
+static int gpio_nand_attach_chip(struct nand_chip *chip)
+{
+       chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
+       if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+               chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+       return 0;
+}
+
 static const struct nand_controller_ops gpio_nand_ops = {
        .exec_op = gpio_nand_exec_op,
+       .attach_chip = gpio_nand_attach_chip,
 };
 
 #ifdef CONFIG_OF
@@ -342,8 +353,6 @@ static int gpio_nand_probe(struct platform_device *pdev)
        gpiomtd->base.ops = &gpio_nand_ops;
 
        nand_set_flash_node(chip, pdev->dev.of_node);
-       chip->ecc.engine_type   = NAND_ECC_ENGINE_TYPE_SOFT;
-       chip->ecc.algo          = NAND_ECC_ALGO_HAMMING;
        chip->options           = gpiomtd->plat.options;
        chip->controller        = &gpiomtd->base;
 
index 4940bb2..9e728c7 100644 (file)
@@ -648,6 +648,9 @@ static int lpc32xx_nand_attach_chip(struct nand_chip *chip)
        struct lpc32xx_nand_host *host = nand_get_controller_data(chip);
        struct device *dev = &host->pdev->dev;
 
+       if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST)
+               return 0;
+
        host->dma_buf = devm_kzalloc(dev, mtd->writesize, GFP_KERNEL);
        if (!host->dma_buf)
                return -ENOMEM;
@@ -656,8 +659,17 @@ static int lpc32xx_nand_attach_chip(struct nand_chip *chip)
        if (!host->dummy_buf)
                return -ENOMEM;
 
-       chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
        chip->ecc.size = 512;
+       chip->ecc.hwctl = lpc32xx_ecc_enable;
+       chip->ecc.read_page_raw = lpc32xx_read_page;
+       chip->ecc.read_page = lpc32xx_read_page;
+       chip->ecc.write_page_raw = lpc32xx_write_page_lowlevel;
+       chip->ecc.write_page = lpc32xx_write_page_lowlevel;
+       chip->ecc.write_oob = lpc32xx_write_oob;
+       chip->ecc.read_oob = lpc32xx_read_oob;
+       chip->ecc.strength = 4;
+       chip->ecc.bytes = 10;
+
        mtd_set_ooblayout(mtd, &lpc32xx_ooblayout_ops);
        host->mlcsubpages = mtd->writesize / 512;
 
@@ -741,15 +753,6 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, host);
 
        /* Initialize function pointers */
-       nand_chip->ecc.hwctl = lpc32xx_ecc_enable;
-       nand_chip->ecc.read_page_raw = lpc32xx_read_page;
-       nand_chip->ecc.read_page = lpc32xx_read_page;
-       nand_chip->ecc.write_page_raw = lpc32xx_write_page_lowlevel;
-       nand_chip->ecc.write_page = lpc32xx_write_page_lowlevel;
-       nand_chip->ecc.write_oob = lpc32xx_write_oob;
-       nand_chip->ecc.read_oob = lpc32xx_read_oob;
-       nand_chip->ecc.strength = 4;
-       nand_chip->ecc.bytes = 10;
        nand_chip->legacy.waitfunc = lpc32xx_waitfunc;
 
        nand_chip->options = NAND_NO_SUBPAGE_WRITE;
index 6db9d2e..dc7785e 100644 (file)
@@ -775,6 +775,9 @@ static int lpc32xx_nand_attach_chip(struct nand_chip *chip)
        struct mtd_info *mtd = nand_to_mtd(chip);
        struct lpc32xx_nand_host *host = nand_get_controller_data(chip);
 
+       if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST)
+               return 0;
+
        /* OOB and ECC CPU and DMA work areas */
        host->ecc_buf = (uint32_t *)(host->data_buf + LPC32XX_DMA_DATA_SIZE);
 
@@ -786,11 +789,22 @@ static int lpc32xx_nand_attach_chip(struct nand_chip *chip)
        if (mtd->writesize <= 512)
                mtd_set_ooblayout(mtd, &lpc32xx_ooblayout_ops);
 
+       chip->ecc.placement = NAND_ECC_PLACEMENT_INTERLEAVED;
        /* These sizes remain the same regardless of page size */
        chip->ecc.size = 256;
+       chip->ecc.strength = 1;
        chip->ecc.bytes = LPC32XX_SLC_DEV_ECC_BYTES;
        chip->ecc.prepad = 0;
        chip->ecc.postpad = 0;
+       chip->ecc.read_page_raw = lpc32xx_nand_read_page_raw_syndrome;
+       chip->ecc.read_page = lpc32xx_nand_read_page_syndrome;
+       chip->ecc.write_page_raw = lpc32xx_nand_write_page_raw_syndrome;
+       chip->ecc.write_page = lpc32xx_nand_write_page_syndrome;
+       chip->ecc.write_oob = lpc32xx_nand_write_oob_syndrome;
+       chip->ecc.read_oob = lpc32xx_nand_read_oob_syndrome;
+       chip->ecc.calculate = lpc32xx_nand_ecc_calculate;
+       chip->ecc.correct = nand_correct_data;
+       chip->ecc.hwctl = lpc32xx_nand_ecc_enable;
 
        /*
         * Use a custom BBT marker setup for small page FLASH that
@@ -881,21 +895,9 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, host);
 
        /* NAND callbacks for LPC32xx SLC hardware */
-       chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
-       chip->ecc.placement = NAND_ECC_PLACEMENT_INTERLEAVED;
        chip->legacy.read_byte = lpc32xx_nand_read_byte;
        chip->legacy.read_buf = lpc32xx_nand_read_buf;
        chip->legacy.write_buf = lpc32xx_nand_write_buf;
-       chip->ecc.read_page_raw = lpc32xx_nand_read_page_raw_syndrome;
-       chip->ecc.read_page = lpc32xx_nand_read_page_syndrome;
-       chip->ecc.write_page_raw = lpc32xx_nand_write_page_raw_syndrome;
-       chip->ecc.write_page = lpc32xx_nand_write_page_syndrome;
-       chip->ecc.write_oob = lpc32xx_nand_write_oob_syndrome;
-       chip->ecc.read_oob = lpc32xx_nand_read_oob_syndrome;
-       chip->ecc.calculate = lpc32xx_nand_ecc_calculate;
-       chip->ecc.correct = nand_correct_data;
-       chip->ecc.strength = 1;
-       chip->ecc.hwctl = lpc32xx_nand_ecc_enable;
 
        /*
         * Allocate a large enough buffer for a single huge page plus
index dfd0d3e..bcd4a55 100644 (file)
 #define NFC_TIMEOUT            (HZ / 10)       /* 1/10 s */
 
 struct mpc5121_nfc_prv {
+       struct nand_controller  controller;
        struct nand_chip        chip;
        int                     irq;
        void __iomem            *regs;
@@ -602,6 +603,20 @@ static void mpc5121_nfc_free(struct device *dev, struct mtd_info *mtd)
                iounmap(prv->csreg);
 }
 
+static int mpc5121_nfc_attach_chip(struct nand_chip *chip)
+{
+       chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
+       if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+               chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+       return 0;
+}
+
+static const struct nand_controller_ops mpc5121_nfc_ops = {
+       .attach_chip = mpc5121_nfc_attach_chip,
+};
+
 static int mpc5121_nfc_probe(struct platform_device *op)
 {
        struct device_node *dn = op->dev.of_node;
@@ -634,6 +649,10 @@ static int mpc5121_nfc_probe(struct platform_device *op)
        chip = &prv->chip;
        mtd = nand_to_mtd(chip);
 
+       nand_controller_init(&prv->controller);
+       prv->controller.ops = &mpc5121_nfc_ops;
+       chip->controller = &prv->controller;
+
        mtd->dev.parent = dev;
        nand_set_controller_data(chip, prv);
        nand_set_flash_node(chip, dn);
@@ -688,8 +707,6 @@ static int mpc5121_nfc_probe(struct platform_device *op)
        chip->legacy.set_features = nand_get_set_features_notsupp;
        chip->legacy.get_features = nand_get_set_features_notsupp;
        chip->bbt_options = NAND_BBT_USE_FLASH;
-       chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-       chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
 
        /* Support external chip-select logic on ADS5121 board */
        if (of_machine_is_compatible("fsl,mpc5121ads")) {
index df9c0f8..66211c9 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/platform_data/mtd-orion_nand.h>
 
 struct orion_nand_info {
+       struct nand_controller controller;
        struct nand_chip chip;
        struct clk *clk;
 };
@@ -82,6 +83,20 @@ static void orion_nand_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
                buf[i++] = readb(io_base);
 }
 
+static int orion_nand_attach_chip(struct nand_chip *chip)
+{
+       chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
+       if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+               chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+       return 0;
+}
+
+static const struct nand_controller_ops orion_nand_ops = {
+       .attach_chip = orion_nand_attach_chip,
+};
+
 static int __init orion_nand_probe(struct platform_device *pdev)
 {
        struct orion_nand_info *info;
@@ -101,6 +116,10 @@ static int __init orion_nand_probe(struct platform_device *pdev)
        nc = &info->chip;
        mtd = nand_to_mtd(nc);
 
+       nand_controller_init(&info->controller);
+       info->controller.ops = &orion_nand_ops;
+       nc->controller = &info->controller;
+
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        io_base = devm_ioremap_resource(&pdev->dev, res);
 
@@ -139,8 +158,6 @@ static int __init orion_nand_probe(struct platform_device *pdev)
        nc->legacy.IO_ADDR_R = nc->legacy.IO_ADDR_W = io_base;
        nc->legacy.cmd_ctrl = orion_nand_cmd_ctrl;
        nc->legacy.read_buf = orion_nand_read_buf;
-       nc->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-       nc->ecc.algo = NAND_ECC_ALGO_HAMMING;
 
        if (board->chip_delay)
                nc->legacy.chip_delay = board->chip_delay;
index 2b8f155..68c0877 100644 (file)
@@ -29,6 +29,7 @@
 
 static unsigned int lpcctl;
 static struct mtd_info *pasemi_nand_mtd;
+static struct nand_controller controller;
 static const char driver_name[] = "pasemi-nand";
 
 static void pasemi_read_buf(struct nand_chip *chip, u_char *buf, int len)
@@ -73,6 +74,20 @@ static int pasemi_device_ready(struct nand_chip *chip)
        return !!(inl(lpcctl) & LBICTRL_LPCCTL_NR);
 }
 
+static int pasemi_attach_chip(struct nand_chip *chip)
+{
+       chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
+       if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+               chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+       return 0;
+}
+
+static const struct nand_controller_ops pasemi_ops = {
+       .attach_chip = pasemi_attach_chip,
+};
+
 static int pasemi_nand_probe(struct platform_device *ofdev)
 {
        struct device *dev = &ofdev->dev;
@@ -100,6 +115,10 @@ static int pasemi_nand_probe(struct platform_device *ofdev)
                goto out;
        }
 
+       controller.ops = &pasemi_ops;
+       nand_controller_init(&controller);
+       chip->controller = &controller;
+
        pasemi_nand_mtd = nand_to_mtd(chip);
 
        /* Link the private data with the MTD structure */
@@ -132,8 +151,6 @@ static int pasemi_nand_probe(struct platform_device *ofdev)
        chip->legacy.read_buf = pasemi_read_buf;
        chip->legacy.write_buf = pasemi_write_buf;
        chip->legacy.chip_delay = 0;
-       chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-       chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
 
        /* Enable the following for a flash based bad block table */
        chip->bbt_options = NAND_BBT_USE_FLASH;
index b98c0d5..7711e10 100644 (file)
 #include <linux/mtd/platnand.h>
 
 struct plat_nand_data {
+       struct nand_controller  controller;
        struct nand_chip        chip;
        void __iomem            *io_base;
 };
 
+static int plat_nand_attach_chip(struct nand_chip *chip)
+{
+       chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
+       if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+               chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+       return 0;
+}
+
+static const struct nand_controller_ops plat_nand_ops = {
+       .attach_chip = plat_nand_attach_chip,
+};
+
 /*
  * Probe for the NAND device.
  */
@@ -46,6 +61,10 @@ static int plat_nand_probe(struct platform_device *pdev)
        if (!data)
                return -ENOMEM;
 
+       data->controller.ops = &plat_nand_ops;
+       nand_controller_init(&data->controller);
+       data->chip.controller = &data->controller;
+
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        data->io_base = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(data->io_base))
@@ -66,9 +85,6 @@ static int plat_nand_probe(struct platform_device *pdev)
        data->chip.options |= pdata->chip.options;
        data->chip.bbt_options |= pdata->chip.bbt_options;
 
-       data->chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-       data->chip.ecc.algo = NAND_ECC_ALGO_HAMMING;
-
        platform_set_drvdata(pdev, data);
 
        /* Handle any platform specific setup */
index 6b7addd..c742354 100644 (file)
@@ -817,6 +817,29 @@ out:
        return ret;
 }
 
+static int r852_attach_chip(struct nand_chip *chip)
+{
+       if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST)
+               return 0;
+
+       chip->ecc.placement = NAND_ECC_PLACEMENT_INTERLEAVED;
+       chip->ecc.size = R852_DMA_LEN;
+       chip->ecc.bytes = SM_OOB_SIZE;
+       chip->ecc.strength = 2;
+       chip->ecc.hwctl = r852_ecc_hwctl;
+       chip->ecc.calculate = r852_ecc_calculate;
+       chip->ecc.correct = r852_ecc_correct;
+
+       /* TODO: hack */
+       chip->ecc.read_oob = r852_read_oob;
+
+       return 0;
+}
+
+static const struct nand_controller_ops r852_ops = {
+       .attach_chip = r852_attach_chip,
+};
+
 static int  r852_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
 {
        int error;
@@ -858,19 +881,6 @@ static int  r852_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
        chip->legacy.read_buf = r852_read_buf;
        chip->legacy.write_buf = r852_write_buf;
 
-       /* ecc */
-       chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
-       chip->ecc.placement = NAND_ECC_PLACEMENT_INTERLEAVED;
-       chip->ecc.size = R852_DMA_LEN;
-       chip->ecc.bytes = SM_OOB_SIZE;
-       chip->ecc.strength = 2;
-       chip->ecc.hwctl = r852_ecc_hwctl;
-       chip->ecc.calculate = r852_ecc_calculate;
-       chip->ecc.correct = r852_ecc_correct;
-
-       /* TODO: hack */
-       chip->ecc.read_oob = r852_read_oob;
-
        /* init our device structure */
        dev = kzalloc(sizeof(struct r852_device), GFP_KERNEL);
 
@@ -882,6 +892,10 @@ static int  r852_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
        dev->pci_dev = pci_dev;
        pci_set_drvdata(pci_dev, dev);
 
+       nand_controller_init(&dev->controller);
+       dev->controller.ops = &r852_ops;
+       chip->controller = &dev->controller;
+
        dev->bounce_buffer = dma_alloc_coherent(&pci_dev->dev, R852_DMA_LEN,
                &dev->phys_bounce_buffer, GFP_KERNEL);
 
index e9ce299..96fe301 100644 (file)
 #define DMA_MEMORY     1
 
 struct r852_device {
+       struct nand_controller          controller;
        void __iomem *mmio;             /* mmio */
        struct nand_chip *chip;         /* nand chip backpointer */
        struct pci_dev *pci_dev;        /* pci backpointer */
index 1327bfb..af98bcc 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/io.h>
 
 struct sharpsl_nand {
+       struct nand_controller  controller;
        struct nand_chip        chip;
 
        void __iomem            *io;
@@ -96,6 +97,25 @@ static int sharpsl_nand_calculate_ecc(struct nand_chip *chip,
        return readb(sharpsl->io + ECCCNTR) != 0;
 }
 
+static int sharpsl_attach_chip(struct nand_chip *chip)
+{
+       if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST)
+               return 0;
+
+       chip->ecc.size = 256;
+       chip->ecc.bytes = 3;
+       chip->ecc.strength = 1;
+       chip->ecc.hwctl = sharpsl_nand_enable_hwecc;
+       chip->ecc.calculate = sharpsl_nand_calculate_ecc;
+       chip->ecc.correct = nand_correct_data;
+
+       return 0;
+}
+
+static const struct nand_controller_ops sharpsl_ops = {
+       .attach_chip = sharpsl_attach_chip,
+};
+
 /*
  * Main initialization routine
  */
@@ -136,6 +156,10 @@ static int sharpsl_nand_probe(struct platform_device *pdev)
        /* Get pointer to private data */
        this = (struct nand_chip *)(&sharpsl->chip);
 
+       nand_controller_init(&sharpsl->controller);
+       sharpsl->controller.ops = &sharpsl_ops;
+       this->controller = &sharpsl->controller;
+
        /* Link the private data with the MTD structure */
        mtd = nand_to_mtd(this);
        mtd->dev.parent = &pdev->dev;
@@ -156,15 +180,7 @@ static int sharpsl_nand_probe(struct platform_device *pdev)
        this->legacy.dev_ready = sharpsl_nand_dev_ready;
        /* 15 us command delay time */
        this->legacy.chip_delay = 15;
-       /* set eccmode using hardware ECC */
-       this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
-       this->ecc.size = 256;
-       this->ecc.bytes = 3;
-       this->ecc.strength = 1;
        this->badblock_pattern = data->badblock_pattern;
-       this->ecc.hwctl = sharpsl_nand_enable_hwecc;
-       this->ecc.calculate = sharpsl_nand_calculate_ecc;
-       this->ecc.correct = nand_correct_data;
 
        /* Scan to find existence of the device */
        err = nand_scan(this, 1);
index 0f63ff6..70f8305 100644 (file)
@@ -22,6 +22,7 @@
 #define FPGA_NAND_DATA_SHIFT           16
 
 struct socrates_nand_host {
+       struct nand_controller  controller;
        struct nand_chip        nand_chip;
        void __iomem            *io_base;
        struct device           *dev;
@@ -116,6 +117,20 @@ static int socrates_nand_device_ready(struct nand_chip *nand_chip)
        return 1;
 }
 
+static int socrates_attach_chip(struct nand_chip *chip)
+{
+       chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
+       if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+               chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+       return 0;
+}
+
+static const struct nand_controller_ops socrates_ops = {
+       .attach_chip = socrates_attach_chip,
+};
+
 /*
  * Probe for the NAND device.
  */
@@ -141,6 +156,10 @@ static int socrates_nand_probe(struct platform_device *ofdev)
        mtd = nand_to_mtd(nand_chip);
        host->dev = &ofdev->dev;
 
+       nand_controller_init(&host->controller);
+       host->controller.ops = &socrates_ops;
+       nand_chip->controller = &host->controller;
+
        /* link the private data structures */
        nand_set_controller_data(nand_chip, host);
        nand_set_flash_node(nand_chip, ofdev->dev.of_node);
@@ -153,10 +172,6 @@ static int socrates_nand_probe(struct platform_device *ofdev)
        nand_chip->legacy.read_buf = socrates_nand_read_buf;
        nand_chip->legacy.dev_ready = socrates_nand_device_ready;
 
-       /* enable ECC */
-       nand_chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-       nand_chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
-
        /* TODO: I have no idea what real delay is. */
        nand_chip->legacy.chip_delay = 20;      /* 20us command delay time */
 
index 235a2f7..aa6c7e7 100644 (file)
 /*--------------------------------------------------------------------------*/
 
 struct tmio_nand {
+       struct nand_controller controller;
        struct nand_chip chip;
        struct completion comp;
 
@@ -355,6 +356,25 @@ static void tmio_hw_stop(struct platform_device *dev, struct tmio_nand *tmio)
                cell->disable(dev);
 }
 
+static int tmio_attach_chip(struct nand_chip *chip)
+{
+       if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST)
+               return 0;
+
+       chip->ecc.size = 512;
+       chip->ecc.bytes = 6;
+       chip->ecc.strength = 2;
+       chip->ecc.hwctl = tmio_nand_enable_hwecc;
+       chip->ecc.calculate = tmio_nand_calculate_ecc;
+       chip->ecc.correct = tmio_nand_correct_data;
+
+       return 0;
+}
+
+static const struct nand_controller_ops tmio_ops = {
+       .attach_chip = tmio_attach_chip,
+};
+
 static int tmio_probe(struct platform_device *dev)
 {
        struct tmio_nand_data *data = dev_get_platdata(&dev->dev);
@@ -385,6 +405,10 @@ static int tmio_probe(struct platform_device *dev)
        mtd->name = "tmio-nand";
        mtd->dev.parent = &dev->dev;
 
+       nand_controller_init(&tmio->controller);
+       tmio->controller.ops = &tmio_ops;
+       nand_chip->controller = &tmio->controller;
+
        tmio->ccr = devm_ioremap(&dev->dev, ccr->start, resource_size(ccr));
        if (!tmio->ccr)
                return -EIO;
@@ -409,15 +433,6 @@ static int tmio_probe(struct platform_device *dev)
        nand_chip->legacy.write_buf = tmio_nand_write_buf;
        nand_chip->legacy.read_buf = tmio_nand_read_buf;
 
-       /* set eccmode using hardware ECC */
-       nand_chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
-       nand_chip->ecc.size = 512;
-       nand_chip->ecc.bytes = 6;
-       nand_chip->ecc.strength = 2;
-       nand_chip->ecc.hwctl = tmio_nand_enable_hwecc;
-       nand_chip->ecc.calculate = tmio_nand_calculate_ecc;
-       nand_chip->ecc.correct = tmio_nand_correct_data;
-
        if (data)
                nand_chip->badblock_pattern = data->badblock_pattern;
 
index ef81dce..fe8ed24 100644 (file)
@@ -253,6 +253,11 @@ static int txx9ndfmc_attach_chip(struct nand_chip *chip)
 {
        struct mtd_info *mtd = nand_to_mtd(chip);
 
+       if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST)
+               return 0;
+
+       chip->ecc.strength = 1;
+
        if (mtd->writesize >= 512) {
                chip->ecc.size = 512;
                chip->ecc.bytes = 6;
@@ -261,6 +266,10 @@ static int txx9ndfmc_attach_chip(struct nand_chip *chip)
                chip->ecc.bytes = 3;
        }
 
+       chip->ecc.calculate = txx9ndfmc_calculate_ecc;
+       chip->ecc.correct = txx9ndfmc_correct_data;
+       chip->ecc.hwctl = txx9ndfmc_enable_hwecc;
+
        return 0;
 }
 
@@ -326,11 +335,6 @@ static int __init txx9ndfmc_probe(struct platform_device *dev)
                chip->legacy.write_buf = txx9ndfmc_write_buf;
                chip->legacy.cmd_ctrl = txx9ndfmc_cmd_ctrl;
                chip->legacy.dev_ready = txx9ndfmc_dev_ready;
-               chip->ecc.calculate = txx9ndfmc_calculate_ecc;
-               chip->ecc.correct = txx9ndfmc_correct_data;
-               chip->ecc.hwctl = txx9ndfmc_enable_hwecc;
-               chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
-               chip->ecc.strength = 1;
                chip->legacy.chip_delay = 100;
                chip->controller = &drvdata->controller;
 
index f2dbd63..2675197 100644 (file)
@@ -62,6 +62,7 @@
 #define NAND_CON_NANDM         1
 
 struct xway_nand_data {
+       struct nand_controller  controller;
        struct nand_chip        chip;
        unsigned long           csflags;
        void __iomem            *nandaddr;
@@ -145,6 +146,20 @@ static void xway_write_buf(struct nand_chip *chip, const u_char *buf, int len)
                xway_writeb(nand_to_mtd(chip), NAND_WRITE_DATA, buf[i]);
 }
 
+static int xway_attach_chip(struct nand_chip *chip)
+{
+       chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
+
+       if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN)
+               chip->ecc.algo = NAND_ECC_ALGO_HAMMING;
+
+       return 0;
+}
+
+static const struct nand_controller_ops xway_nand_ops = {
+       .attach_chip = xway_attach_chip,
+};
+
 /*
  * Probe for the NAND device.
  */
@@ -180,8 +195,9 @@ static int xway_nand_probe(struct platform_device *pdev)
        data->chip.legacy.read_byte = xway_read_byte;
        data->chip.legacy.chip_delay = 30;
 
-       data->chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
-       data->chip.ecc.algo = NAND_ECC_ALGO_HAMMING;
+       nand_controller_init(&data->controller);
+       data->controller.ops = &xway_nand_ops;
+       data->chip.controller = &data->controller;
 
        platform_set_drvdata(pdev, data);
        nand_set_controller_data(&data->chip, data);
index 84ecbc6..47afc59 100644 (file)
@@ -1460,7 +1460,39 @@ static void bond_upper_dev_unlink(struct bonding *bond, struct slave *slave)
        slave->dev->flags &= ~IFF_SLAVE;
 }
 
-static struct slave *bond_alloc_slave(struct bonding *bond)
+static void slave_kobj_release(struct kobject *kobj)
+{
+       struct slave *slave = to_slave(kobj);
+       struct bonding *bond = bond_get_bond_by_slave(slave);
+
+       cancel_delayed_work_sync(&slave->notify_work);
+       if (BOND_MODE(bond) == BOND_MODE_8023AD)
+               kfree(SLAVE_AD_INFO(slave));
+
+       kfree(slave);
+}
+
+static struct kobj_type slave_ktype = {
+       .release = slave_kobj_release,
+#ifdef CONFIG_SYSFS
+       .sysfs_ops = &slave_sysfs_ops,
+#endif
+};
+
+static int bond_kobj_init(struct slave *slave)
+{
+       int err;
+
+       err = kobject_init_and_add(&slave->kobj, &slave_ktype,
+                                  &(slave->dev->dev.kobj), "bonding_slave");
+       if (err)
+               kobject_put(&slave->kobj);
+
+       return err;
+}
+
+static struct slave *bond_alloc_slave(struct bonding *bond,
+                                     struct net_device *slave_dev)
 {
        struct slave *slave = NULL;
 
@@ -1468,11 +1500,17 @@ static struct slave *bond_alloc_slave(struct bonding *bond)
        if (!slave)
                return NULL;
 
+       slave->bond = bond;
+       slave->dev = slave_dev;
+
+       if (bond_kobj_init(slave))
+               return NULL;
+
        if (BOND_MODE(bond) == BOND_MODE_8023AD) {
                SLAVE_AD_INFO(slave) = kzalloc(sizeof(struct ad_slave_info),
                                               GFP_KERNEL);
                if (!SLAVE_AD_INFO(slave)) {
-                       kfree(slave);
+                       kobject_put(&slave->kobj);
                        return NULL;
                }
        }
@@ -1481,17 +1519,6 @@ static struct slave *bond_alloc_slave(struct bonding *bond)
        return slave;
 }
 
-static void bond_free_slave(struct slave *slave)
-{
-       struct bonding *bond = bond_get_bond_by_slave(slave);
-
-       cancel_delayed_work_sync(&slave->notify_work);
-       if (BOND_MODE(bond) == BOND_MODE_8023AD)
-               kfree(SLAVE_AD_INFO(slave));
-
-       kfree(slave);
-}
-
 static void bond_fill_ifbond(struct bonding *bond, struct ifbond *info)
 {
        info->bond_mode = BOND_MODE(bond);
@@ -1678,14 +1705,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
                        goto err_undo_flags;
        }
 
-       new_slave = bond_alloc_slave(bond);
+       new_slave = bond_alloc_slave(bond, slave_dev);
        if (!new_slave) {
                res = -ENOMEM;
                goto err_undo_flags;
        }
 
-       new_slave->bond = bond;
-       new_slave->dev = slave_dev;
        /* Set the new_slave's queue_id to be zero.  Queue ID mapping
         * is set via sysfs or module option if desired.
         */
@@ -2007,7 +2032,7 @@ err_restore_mtu:
        dev_set_mtu(slave_dev, new_slave->original_mtu);
 
 err_free:
-       bond_free_slave(new_slave);
+       kobject_put(&new_slave->kobj);
 
 err_undo_flags:
        /* Enslave of first slave has failed and we need to fix master's mac */
@@ -2187,7 +2212,7 @@ static int __bond_release_one(struct net_device *bond_dev,
        if (!netif_is_bond_master(slave_dev))
                slave_dev->priv_flags &= ~IFF_BONDING;
 
-       bond_free_slave(slave);
+       kobject_put(&slave->kobj);
 
        return 0;
 }
index 9abfaae..a4e4e15 100644 (file)
@@ -745,6 +745,19 @@ const struct bond_option *bond_opt_get(unsigned int option)
        return &bond_opts[option];
 }
 
+static void bond_set_xfrm_features(struct net_device *bond_dev, u64 mode)
+{
+       if (!IS_ENABLED(CONFIG_XFRM_OFFLOAD))
+               return;
+
+       if (mode == BOND_MODE_ACTIVEBACKUP)
+               bond_dev->wanted_features |= BOND_XFRM_FEATURES;
+       else
+               bond_dev->wanted_features &= ~BOND_XFRM_FEATURES;
+
+       netdev_update_features(bond_dev);
+}
+
 static int bond_option_mode_set(struct bonding *bond,
                                const struct bond_opt_value *newval)
 {
@@ -767,13 +780,8 @@ static int bond_option_mode_set(struct bonding *bond,
        if (newval->value == BOND_MODE_ALB)
                bond->params.tlb_dynamic_lb = 1;
 
-#ifdef CONFIG_XFRM_OFFLOAD
-       if (newval->value == BOND_MODE_ACTIVEBACKUP)
-               bond->dev->wanted_features |= BOND_XFRM_FEATURES;
-       else
-               bond->dev->wanted_features &= ~BOND_XFRM_FEATURES;
-       netdev_change_features(bond->dev);
-#endif /* CONFIG_XFRM_OFFLOAD */
+       if (bond->dev->reg_state == NETREG_REGISTERED)
+               bond_set_xfrm_features(bond->dev, newval->value);
 
        /* don't cache arp_validate between modes */
        bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
index 9b83466..fd07561 100644 (file)
@@ -121,7 +121,6 @@ static const struct slave_attribute *slave_attrs[] = {
 };
 
 #define to_slave_attr(_at) container_of(_at, struct slave_attribute, attr)
-#define to_slave(obj)  container_of(obj, struct slave, kobj)
 
 static ssize_t slave_show(struct kobject *kobj,
                          struct attribute *attr, char *buf)
@@ -132,28 +131,15 @@ static ssize_t slave_show(struct kobject *kobj,
        return slave_attr->show(slave, buf);
 }
 
-static const struct sysfs_ops slave_sysfs_ops = {
+const struct sysfs_ops slave_sysfs_ops = {
        .show = slave_show,
 };
 
-static struct kobj_type slave_ktype = {
-#ifdef CONFIG_SYSFS
-       .sysfs_ops = &slave_sysfs_ops,
-#endif
-};
-
 int bond_sysfs_slave_add(struct slave *slave)
 {
        const struct slave_attribute **a;
        int err;
 
-       err = kobject_init_and_add(&slave->kobj, &slave_ktype,
-                                  &(slave->dev->dev.kobj), "bonding_slave");
-       if (err) {
-               kobject_put(&slave->kobj);
-               return err;
-       }
-
        for (a = slave_attrs; *a; ++a) {
                err = sysfs_create_file(&slave->kobj, &((*a)->attr));
                if (err) {
@@ -171,6 +157,4 @@ void bond_sysfs_slave_del(struct slave *slave)
 
        for (a = slave_attrs; *a; ++a)
                sysfs_remove_file(&slave->kobj, &((*a)->attr));
-
-       kobject_put(&slave->kobj);
 }
index 1ccdbe8..1a9e9b9 100644 (file)
@@ -1295,12 +1295,22 @@ int c_can_power_up(struct net_device *dev)
                                time_after(time_out, jiffies))
                cpu_relax();
 
-       if (time_after(jiffies, time_out))
-               return -ETIMEDOUT;
+       if (time_after(jiffies, time_out)) {
+               ret = -ETIMEDOUT;
+               goto err_out;
+       }
 
        ret = c_can_start(dev);
-       if (!ret)
-               c_can_irq_control(priv, true);
+       if (ret)
+               goto err_out;
+
+       c_can_irq_control(priv, true);
+
+       return 0;
+
+err_out:
+       c_can_reset_ram(priv, false);
+       c_can_pm_runtime_put_sync(priv);
 
        return ret;
 }
index 6dee4f8..81e39d7 100644 (file)
@@ -592,7 +592,7 @@ static void can_restart(struct net_device *dev)
 
        cf->can_id |= CAN_ERR_RESTARTED;
 
-       netif_rx(skb);
+       netif_rx_ni(skb);
 
        stats->rx_packets++;
        stats->rx_bytes += cf->can_dlc;
index 881799b..99e5f27 100644 (file)
@@ -728,8 +728,10 @@ static int flexcan_get_berr_counter(const struct net_device *dev,
        int err;
 
        err = pm_runtime_get_sync(priv->dev);
-       if (err < 0)
+       if (err < 0) {
+               pm_runtime_put_noidle(priv->dev);
                return err;
+       }
 
        err = __flexcan_get_berr_counter(dev, bec);
 
@@ -1565,14 +1567,10 @@ static int flexcan_chip_start(struct net_device *dev)
                priv->write(reg_ctrl2, &regs->ctrl2);
        }
 
-       err = flexcan_transceiver_enable(priv);
-       if (err)
-               goto out_chip_disable;
-
        /* synchronize with the can bus */
        err = flexcan_chip_unfreeze(priv);
        if (err)
-               goto out_transceiver_disable;
+               goto out_chip_disable;
 
        priv->can.state = CAN_STATE_ERROR_ACTIVE;
 
@@ -1590,8 +1588,6 @@ static int flexcan_chip_start(struct net_device *dev)
 
        return 0;
 
- out_transceiver_disable:
-       flexcan_transceiver_disable(priv);
  out_chip_disable:
        flexcan_chip_disable(priv);
        return err;
@@ -1621,7 +1617,6 @@ static int __flexcan_chip_stop(struct net_device *dev, bool disable_on_error)
        priv->write(priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_ALL,
                    &regs->ctrl);
 
-       flexcan_transceiver_disable(priv);
        priv->can.state = CAN_STATE_STOPPED;
 
        return 0;
@@ -1654,17 +1649,23 @@ static int flexcan_open(struct net_device *dev)
        }
 
        err = pm_runtime_get_sync(priv->dev);
-       if (err < 0)
+       if (err < 0) {
+               pm_runtime_put_noidle(priv->dev);
                return err;
+       }
 
        err = open_candev(dev);
        if (err)
                goto out_runtime_put;
 
-       err = request_irq(dev->irq, flexcan_irq, IRQF_SHARED, dev->name, dev);
+       err = flexcan_transceiver_enable(priv);
        if (err)
                goto out_close;
 
+       err = request_irq(dev->irq, flexcan_irq, IRQF_SHARED, dev->name, dev);
+       if (err)
+               goto out_transceiver_disable;
+
        if (priv->can.ctrlmode & CAN_CTRLMODE_FD)
                priv->mb_size = sizeof(struct flexcan_mb) + CANFD_MAX_DLEN;
        else
@@ -1716,6 +1717,8 @@ static int flexcan_open(struct net_device *dev)
        can_rx_offload_del(&priv->offload);
  out_free_irq:
        free_irq(dev->irq, dev);
+ out_transceiver_disable:
+       flexcan_transceiver_disable(priv);
  out_close:
        close_candev(dev);
  out_runtime_put:
@@ -1734,6 +1737,7 @@ static int flexcan_close(struct net_device *dev)
 
        can_rx_offload_del(&priv->offload);
        free_irq(dev->irq, dev);
+       flexcan_transceiver_disable(priv);
 
        close_candev(dev);
        pm_runtime_put(priv->dev);
@@ -1852,7 +1856,7 @@ static int flexcan_setup_stop_mode(struct platform_device *pdev)
                return -EINVAL;
 
        /* stop mode property format is:
-        * <&gpr req_gpr>.
+        * <&gpr req_gpr req_bit>.
         */
        ret = of_property_read_u32_array(np, "fsl,stop-mode", out_val,
                                         ARRAY_SIZE(out_val));
index 6f76691..43151dd 100644 (file)
@@ -287,12 +287,12 @@ struct kvaser_pciefd_tx_packet {
 static const struct can_bittiming_const kvaser_pciefd_bittiming_const = {
        .name = KVASER_PCIEFD_DRV_NAME,
        .tseg1_min = 1,
-       .tseg1_max = 255,
+       .tseg1_max = 512,
        .tseg2_min = 1,
        .tseg2_max = 32,
        .sjw_max = 16,
        .brp_min = 1,
-       .brp_max = 4096,
+       .brp_max = 8192,
        .brp_inc = 1,
 };
 
@@ -692,8 +692,10 @@ static int kvaser_pciefd_open(struct net_device *netdev)
                return err;
 
        err = kvaser_pciefd_bus_on(can);
-       if (err)
+       if (err) {
+               close_candev(netdev);
                return err;
+       }
 
        return 0;
 }
index 48be627..5f9f819 100644 (file)
@@ -16,7 +16,8 @@ config CAN_M_CAN_PLATFORM
 
 config CAN_M_CAN_TCAN4X5X
        depends on CAN_M_CAN
-       depends on REGMAP_SPI
+       depends on SPI
+       select REGMAP_SPI
        tristate "TCAN4X5X M_CAN device"
        help
          Say Y here if you want support for Texas Instruments TCAN4x5x
index 02c5795..61a93b1 100644 (file)
@@ -665,7 +665,7 @@ static int m_can_handle_state_change(struct net_device *dev,
        unsigned int ecr;
 
        switch (new_state) {
-       case CAN_STATE_ERROR_ACTIVE:
+       case CAN_STATE_ERROR_WARNING:
                /* error warning state */
                cdev->can.can_stats.error_warning++;
                cdev->can.state = CAN_STATE_ERROR_WARNING;
@@ -694,7 +694,7 @@ static int m_can_handle_state_change(struct net_device *dev,
        __m_can_get_berr_counter(dev, &bec);
 
        switch (new_state) {
-       case CAN_STATE_ERROR_ACTIVE:
+       case CAN_STATE_ERROR_WARNING:
                /* error warning state */
                cf->can_id |= CAN_ERR_CRTL;
                cf->data[1] = (bec.txerr > bec.rxerr) ?
@@ -956,6 +956,8 @@ static irqreturn_t m_can_isr(int irq, void *dev_id)
        struct net_device_stats *stats = &dev->stats;
        u32 ir;
 
+       if (pm_runtime_suspended(cdev->dev))
+               return IRQ_NONE;
        ir = m_can_read(cdev, M_CAN_IR);
        if (!ir)
                return IRQ_NONE;
@@ -1031,7 +1033,7 @@ static const struct can_bittiming_const m_can_bittiming_const_31X = {
        .name = KBUILD_MODNAME,
        .tseg1_min = 2,         /* Time segment 1 = prop_seg + phase_seg1 */
        .tseg1_max = 256,
-       .tseg2_min = 1,         /* Time segment 2 = phase_seg2 */
+       .tseg2_min = 2,         /* Time segment 2 = phase_seg2 */
        .tseg2_max = 128,
        .sjw_max = 128,
        .brp_min = 1,
@@ -1383,6 +1385,8 @@ static int m_can_dev_setup(struct m_can_classdev *m_can_dev)
                                                &m_can_data_bittiming_const_31X;
                break;
        case 32:
+       case 33:
+               /* Support both MCAN version v3.2.x and v3.3.0 */
                m_can_dev->can.bittiming_const = m_can_dev->bit_timing ?
                        m_can_dev->bit_timing : &m_can_bittiming_const_31X;
 
@@ -1414,6 +1418,9 @@ static void m_can_stop(struct net_device *dev)
        /* disable all interrupts */
        m_can_disable_all_interrupts(cdev);
 
+       /* Set init mode to disengage from the network */
+       m_can_config_endisable(cdev, true);
+
        /* set the state as STOPPED */
        cdev->can.state = CAN_STATE_STOPPED;
 }
@@ -1648,7 +1655,7 @@ static int m_can_open(struct net_device *dev)
                INIT_WORK(&cdev->tx_work, m_can_tx_work_queue);
 
                err = request_threaded_irq(dev->irq, NULL, m_can_isr,
-                                          IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
+                                          IRQF_ONESHOT,
                                           dev->name, dev);
        } else {
                err = request_irq(dev->irq, m_can_isr, IRQF_SHARED, dev->name,
@@ -1812,6 +1819,12 @@ out:
 }
 EXPORT_SYMBOL_GPL(m_can_class_allocate_dev);
 
+void m_can_class_free_dev(struct net_device *net)
+{
+       free_candev(net);
+}
+EXPORT_SYMBOL_GPL(m_can_class_free_dev);
+
 int m_can_class_register(struct m_can_classdev *m_can_dev)
 {
        int ret;
@@ -1850,7 +1863,6 @@ pm_runtime_fail:
        if (ret) {
                if (m_can_dev->pm_clock_support)
                        pm_runtime_disable(m_can_dev->dev);
-               free_candev(m_can_dev->net);
        }
 
        return ret;
@@ -1908,8 +1920,6 @@ void m_can_class_unregister(struct m_can_classdev *m_can_dev)
        unregister_candev(m_can_dev->net);
 
        m_can_clk_stop(m_can_dev);
-
-       free_candev(m_can_dev->net);
 }
 EXPORT_SYMBOL_GPL(m_can_class_unregister);
 
index 49f42b5..b2699a7 100644 (file)
@@ -99,6 +99,7 @@ struct m_can_classdev {
 };
 
 struct m_can_classdev *m_can_class_allocate_dev(struct device *dev);
+void m_can_class_free_dev(struct net_device *net);
 int m_can_class_register(struct m_can_classdev *cdev);
 void m_can_class_unregister(struct m_can_classdev *cdev);
 int m_can_class_get_clocks(struct m_can_classdev *cdev);
index e6d0cb9..161cb9b 100644 (file)
@@ -67,32 +67,36 @@ static int m_can_plat_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
-       if (!priv)
-               return -ENOMEM;
+       if (!priv) {
+               ret = -ENOMEM;
+               goto probe_fail;
+       }
 
        mcan_class->device_data = priv;
 
-       m_can_class_get_clocks(mcan_class);
+       ret = m_can_class_get_clocks(mcan_class);
+       if (ret)
+               goto probe_fail;
 
        res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "m_can");
        addr = devm_ioremap_resource(&pdev->dev, res);
        irq = platform_get_irq_byname(pdev, "int0");
        if (IS_ERR(addr) || irq < 0) {
                ret = -EINVAL;
-               goto failed_ret;
+               goto probe_fail;
        }
 
        /* message ram could be shared */
        res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "message_ram");
        if (!res) {
                ret = -ENODEV;
-               goto failed_ret;
+               goto probe_fail;
        }
 
        mram_addr = devm_ioremap(&pdev->dev, res->start, resource_size(res));
        if (!mram_addr) {
                ret = -ENOMEM;
-               goto failed_ret;
+               goto probe_fail;
        }
 
        priv->base = addr;
@@ -111,9 +115,10 @@ static int m_can_plat_probe(struct platform_device *pdev)
 
        m_can_init_ram(mcan_class);
 
-       ret = m_can_class_register(mcan_class);
+       return m_can_class_register(mcan_class);
 
-failed_ret:
+probe_fail:
+       m_can_class_free_dev(mcan_class->net);
        return ret;
 }
 
@@ -134,6 +139,8 @@ static int m_can_plat_remove(struct platform_device *pdev)
 
        m_can_class_unregister(mcan_class);
 
+       m_can_class_free_dev(mcan_class->net);
+
        platform_set_drvdata(pdev, NULL);
 
        return 0;
index eacd428..7347ab3 100644 (file)
@@ -440,14 +440,18 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
                return -ENOMEM;
 
        priv = devm_kzalloc(&spi->dev, sizeof(*priv), GFP_KERNEL);
-       if (!priv)
-               return -ENOMEM;
+       if (!priv) {
+               ret = -ENOMEM;
+               goto out_m_can_class_free_dev;
+       }
 
        priv->power = devm_regulator_get_optional(&spi->dev, "vsup");
-       if (PTR_ERR(priv->power) == -EPROBE_DEFER)
-               return -EPROBE_DEFER;
-       else
+       if (PTR_ERR(priv->power) == -EPROBE_DEFER) {
+               ret = -EPROBE_DEFER;
+               goto out_m_can_class_free_dev;
+       } else {
                priv->power = NULL;
+       }
 
        mcan_class->device_data = priv;
 
@@ -460,8 +464,10 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
        }
 
        /* Sanity check */
-       if (freq < 20000000 || freq > TCAN4X5X_EXT_CLK_DEF)
-               return -ERANGE;
+       if (freq < 20000000 || freq > TCAN4X5X_EXT_CLK_DEF) {
+               ret = -ERANGE;
+               goto out_m_can_class_free_dev;
+       }
 
        priv->reg_offset = TCAN4X5X_MCAN_OFFSET;
        priv->mram_start = TCAN4X5X_MRAM_START;
@@ -483,14 +489,18 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
        spi->bits_per_word = 32;
        ret = spi_setup(spi);
        if (ret)
-               goto out_clk;
+               goto out_m_can_class_free_dev;
 
        priv->regmap = devm_regmap_init(&spi->dev, &tcan4x5x_bus,
                                        &spi->dev, &tcan4x5x_regmap);
+       if (IS_ERR(priv->regmap)) {
+               ret = PTR_ERR(priv->regmap);
+               goto out_m_can_class_free_dev;
+       }
 
        ret = tcan4x5x_power_enable(priv->power, 1);
        if (ret)
-               goto out_clk;
+               goto out_m_can_class_free_dev;
 
        ret = tcan4x5x_parse_config(mcan_class);
        if (ret)
@@ -509,13 +519,10 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
 
 out_power:
        tcan4x5x_power_enable(priv->power, 0);
-out_clk:
-       if (!IS_ERR(mcan_class->cclk)) {
-               clk_disable_unprepare(mcan_class->cclk);
-               clk_disable_unprepare(mcan_class->hclk);
-       }
-
+ out_m_can_class_free_dev:
+       m_can_class_free_dev(mcan_class->net);
        dev_err(&spi->dev, "Probe failed, err=%d\n", ret);
+
        return ret;
 }
 
@@ -523,9 +530,11 @@ static int tcan4x5x_can_remove(struct spi_device *spi)
 {
        struct tcan4x5x_priv *priv = spi_get_drvdata(spi);
 
+       m_can_class_unregister(priv->mcan_dev);
+
        tcan4x5x_power_enable(priv->power, 0);
 
-       m_can_class_unregister(priv->mcan_dev);
+       m_can_class_free_dev(priv->mcan_dev->net);
 
        return 0;
 }
index 9f10779..25a4d7d 100644 (file)
@@ -474,7 +474,6 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status)
                netdev_dbg(dev, "arbitration lost interrupt\n");
                alc = priv->read_reg(priv, SJA1000_ALC);
                priv->can.can_stats.arbitration_lost++;
-               stats->tx_errors++;
                cf->can_id |= CAN_ERR_LOSTARB;
                cf->data[0] = alc & 0x1f;
        }
index 9d2faaa..c9ca8b9 100644 (file)
@@ -382,8 +382,13 @@ static int softing_netdev_open(struct net_device *ndev)
 
        /* check or determine and set bittime */
        ret = open_candev(ndev);
-       if (!ret)
-               ret = softing_startstop(ndev, 1);
+       if (ret)
+               return ret;
+
+       ret = softing_startstop(ndev, 1);
+       if (ret < 0)
+               close_candev(ndev);
+
        return ret;
 }
 
index 9c215f7..8a39be0 100644 (file)
@@ -2738,6 +2738,10 @@ static int mcp251xfd_probe(struct spi_device *spi)
        u32 freq;
        int err;
 
+       if (!spi->irq)
+               return dev_err_probe(&spi->dev, -ENXIO,
+                                    "No IRQ specified (maybe node \"interrupts-extended\" in DT missing)!\n");
+
        rx_int = devm_gpiod_get_optional(&spi->dev, "microchip,rx-int",
                                         GPIOD_IN);
        if (PTR_ERR(rx_int) == -EPROBE_DEFER)
index e2c6cf4..b3f2f4f 100644 (file)
@@ -604,7 +604,6 @@ static int sun4i_can_err(struct net_device *dev, u8 isrc, u8 status)
                netdev_dbg(dev, "arbitration lost interrupt\n");
                alc = readl(priv->base + SUN4I_REG_STA_ADDR);
                priv->can.can_stats.arbitration_lost++;
-               stats->tx_errors++;
                if (likely(skb)) {
                        cf->can_id |= CAN_ERR_LOSTARB;
                        cf->data[0] = (alc >> 8) & 0x1f;
index 9913f54..2c22f40 100644 (file)
@@ -881,7 +881,8 @@ static int ti_hecc_probe(struct platform_device *pdev)
        priv->base = devm_platform_ioremap_resource_byname(pdev, "hecc");
        if (IS_ERR(priv->base)) {
                dev_err(&pdev->dev, "hecc ioremap failed\n");
-               return PTR_ERR(priv->base);
+               err = PTR_ERR(priv->base);
+               goto probe_exit_candev;
        }
 
        /* handle hecc-ram memory */
@@ -889,20 +890,22 @@ static int ti_hecc_probe(struct platform_device *pdev)
                                                               "hecc-ram");
        if (IS_ERR(priv->hecc_ram)) {
                dev_err(&pdev->dev, "hecc-ram ioremap failed\n");
-               return PTR_ERR(priv->hecc_ram);
+               err = PTR_ERR(priv->hecc_ram);
+               goto probe_exit_candev;
        }
 
        /* handle mbx memory */
        priv->mbx = devm_platform_ioremap_resource_byname(pdev, "mbx");
        if (IS_ERR(priv->mbx)) {
                dev_err(&pdev->dev, "mbx ioremap failed\n");
-               return PTR_ERR(priv->mbx);
+               err = PTR_ERR(priv->mbx);
+               goto probe_exit_candev;
        }
 
        irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
        if (!irq) {
                dev_err(&pdev->dev, "No irq resource\n");
-               goto probe_exit;
+               goto probe_exit_candev;
        }
 
        priv->ndev = ndev;
@@ -966,7 +969,7 @@ probe_exit_release_clk:
        clk_put(priv->clk);
 probe_exit_candev:
        free_candev(ndev);
-probe_exit:
+
        return err;
 }
 
index 3005157..018ca3b 100644 (file)
@@ -63,21 +63,27 @@ enum gs_can_identify_mode {
 };
 
 /* data types passed between host and device */
+
+/* The firmware on the original USB2CAN by Geschwister Schneider
+ * Technologie Entwicklungs- und Vertriebs UG exchanges all data
+ * between the host and the device in host byte order. This is done
+ * with the struct gs_host_config::byte_order member, which is sent
+ * first to indicate the desired byte order.
+ *
+ * The widely used open source firmware candleLight doesn't support
+ * this feature and exchanges the data in little endian byte order.
+ */
 struct gs_host_config {
-       u32 byte_order;
+       __le32 byte_order;
 } __packed;
-/* All data exchanged between host and device is exchanged in host byte order,
- * thanks to the struct gs_host_config byte_order member, which is sent first
- * to indicate the desired byte order.
- */
 
 struct gs_device_config {
        u8 reserved1;
        u8 reserved2;
        u8 reserved3;
        u8 icount;
-       u32 sw_version;
-       u32 hw_version;
+       __le32 sw_version;
+       __le32 hw_version;
 } __packed;
 
 #define GS_CAN_MODE_NORMAL               0
@@ -87,26 +93,26 @@ struct gs_device_config {
 #define GS_CAN_MODE_ONE_SHOT             BIT(3)
 
 struct gs_device_mode {
-       u32 mode;
-       u32 flags;
+       __le32 mode;
+       __le32 flags;
 } __packed;
 
 struct gs_device_state {
-       u32 state;
-       u32 rxerr;
-       u32 txerr;
+       __le32 state;
+       __le32 rxerr;
+       __le32 txerr;
 } __packed;
 
 struct gs_device_bittiming {
-       u32 prop_seg;
-       u32 phase_seg1;
-       u32 phase_seg2;
-       u32 sjw;
-       u32 brp;
+       __le32 prop_seg;
+       __le32 phase_seg1;
+       __le32 phase_seg2;
+       __le32 sjw;
+       __le32 brp;
 } __packed;
 
 struct gs_identify_mode {
-       u32 mode;
+       __le32 mode;
 } __packed;
 
 #define GS_CAN_FEATURE_LISTEN_ONLY      BIT(0)
@@ -117,23 +123,23 @@ struct gs_identify_mode {
 #define GS_CAN_FEATURE_IDENTIFY         BIT(5)
 
 struct gs_device_bt_const {
-       u32 feature;
-       u32 fclk_can;
-       u32 tseg1_min;
-       u32 tseg1_max;
-       u32 tseg2_min;
-       u32 tseg2_max;
-       u32 sjw_max;
-       u32 brp_min;
-       u32 brp_max;
-       u32 brp_inc;
+       __le32 feature;
+       __le32 fclk_can;
+       __le32 tseg1_min;
+       __le32 tseg1_max;
+       __le32 tseg2_min;
+       __le32 tseg2_max;
+       __le32 sjw_max;
+       __le32 brp_min;
+       __le32 brp_max;
+       __le32 brp_inc;
 } __packed;
 
 #define GS_CAN_FLAG_OVERFLOW 1
 
 struct gs_host_frame {
        u32 echo_id;
-       u32 can_id;
+       __le32 can_id;
 
        u8 can_dlc;
        u8 channel;
@@ -329,13 +335,13 @@ static void gs_usb_receive_bulk_callback(struct urb *urb)
                if (!skb)
                        return;
 
-               cf->can_id = hf->can_id;
+               cf->can_id = le32_to_cpu(hf->can_id);
 
                cf->can_dlc = get_can_dlc(hf->can_dlc);
                memcpy(cf->data, hf->data, 8);
 
                /* ERROR frames tell us information about the controller */
-               if (hf->can_id & CAN_ERR_FLAG)
+               if (le32_to_cpu(hf->can_id) & CAN_ERR_FLAG)
                        gs_update_state(dev, cf);
 
                netdev->stats.rx_packets++;
@@ -418,11 +424,11 @@ static int gs_usb_set_bittiming(struct net_device *netdev)
        if (!dbt)
                return -ENOMEM;
 
-       dbt->prop_seg = bt->prop_seg;
-       dbt->phase_seg1 = bt->phase_seg1;
-       dbt->phase_seg2 = bt->phase_seg2;
-       dbt->sjw = bt->sjw;
-       dbt->brp = bt->brp;
+       dbt->prop_seg = cpu_to_le32(bt->prop_seg);
+       dbt->phase_seg1 = cpu_to_le32(bt->phase_seg1);
+       dbt->phase_seg2 = cpu_to_le32(bt->phase_seg2);
+       dbt->sjw = cpu_to_le32(bt->sjw);
+       dbt->brp = cpu_to_le32(bt->brp);
 
        /* request bit timings */
        rc = usb_control_msg(interface_to_usbdev(intf),
@@ -503,7 +509,7 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb,
 
        cf = (struct can_frame *)skb->data;
 
-       hf->can_id = cf->can_id;
+       hf->can_id = cpu_to_le32(cf->can_id);
        hf->can_dlc = cf->can_dlc;
        memcpy(hf->data, cf->data, cf->can_dlc);
 
@@ -573,6 +579,7 @@ static int gs_can_open(struct net_device *netdev)
        int rc, i;
        struct gs_device_mode *dm;
        u32 ctrlmode;
+       u32 flags = 0;
 
        rc = open_candev(netdev);
        if (rc)
@@ -640,24 +647,24 @@ static int gs_can_open(struct net_device *netdev)
 
        /* flags */
        ctrlmode = dev->can.ctrlmode;
-       dm->flags = 0;
 
        if (ctrlmode & CAN_CTRLMODE_LOOPBACK)
-               dm->flags |= GS_CAN_MODE_LOOP_BACK;
+               flags |= GS_CAN_MODE_LOOP_BACK;
        else if (ctrlmode & CAN_CTRLMODE_LISTENONLY)
-               dm->flags |= GS_CAN_MODE_LISTEN_ONLY;
+               flags |= GS_CAN_MODE_LISTEN_ONLY;
 
        /* Controller is not allowed to retry TX
         * this mode is unavailable on atmels uc3c hardware
         */
        if (ctrlmode & CAN_CTRLMODE_ONE_SHOT)
-               dm->flags |= GS_CAN_MODE_ONE_SHOT;
+               flags |= GS_CAN_MODE_ONE_SHOT;
 
        if (ctrlmode & CAN_CTRLMODE_3_SAMPLES)
-               dm->flags |= GS_CAN_MODE_TRIPLE_SAMPLE;
+               flags |= GS_CAN_MODE_TRIPLE_SAMPLE;
 
        /* finally start device */
-       dm->mode = GS_CAN_MODE_START;
+       dm->mode = cpu_to_le32(GS_CAN_MODE_START);
+       dm->flags = cpu_to_le32(flags);
        rc = usb_control_msg(interface_to_usbdev(dev->iface),
                             usb_sndctrlpipe(interface_to_usbdev(dev->iface), 0),
                             GS_USB_BREQ_MODE,
@@ -737,9 +744,9 @@ static int gs_usb_set_identify(struct net_device *netdev, bool do_identify)
                return -ENOMEM;
 
        if (do_identify)
-               imode->mode = GS_CAN_IDENTIFY_ON;
+               imode->mode = cpu_to_le32(GS_CAN_IDENTIFY_ON);
        else
-               imode->mode = GS_CAN_IDENTIFY_OFF;
+               imode->mode = cpu_to_le32(GS_CAN_IDENTIFY_OFF);
 
        rc = usb_control_msg(interface_to_usbdev(dev->iface),
                             usb_sndctrlpipe(interface_to_usbdev(dev->iface),
@@ -790,6 +797,7 @@ static struct gs_can *gs_make_candev(unsigned int channel,
        struct net_device *netdev;
        int rc;
        struct gs_device_bt_const *bt_const;
+       u32 feature;
 
        bt_const = kmalloc(sizeof(*bt_const), GFP_KERNEL);
        if (!bt_const)
@@ -830,14 +838,14 @@ static struct gs_can *gs_make_candev(unsigned int channel,
 
        /* dev setup */
        strcpy(dev->bt_const.name, "gs_usb");
-       dev->bt_const.tseg1_min = bt_const->tseg1_min;
-       dev->bt_const.tseg1_max = bt_const->tseg1_max;
-       dev->bt_const.tseg2_min = bt_const->tseg2_min;
-       dev->bt_const.tseg2_max = bt_const->tseg2_max;
-       dev->bt_const.sjw_max = bt_const->sjw_max;
-       dev->bt_const.brp_min = bt_const->brp_min;
-       dev->bt_const.brp_max = bt_const->brp_max;
-       dev->bt_const.brp_inc = bt_const->brp_inc;
+       dev->bt_const.tseg1_min = le32_to_cpu(bt_const->tseg1_min);
+       dev->bt_const.tseg1_max = le32_to_cpu(bt_const->tseg1_max);
+       dev->bt_const.tseg2_min = le32_to_cpu(bt_const->tseg2_min);
+       dev->bt_const.tseg2_max = le32_to_cpu(bt_const->tseg2_max);
+       dev->bt_const.sjw_max = le32_to_cpu(bt_const->sjw_max);
+       dev->bt_const.brp_min = le32_to_cpu(bt_const->brp_min);
+       dev->bt_const.brp_max = le32_to_cpu(bt_const->brp_max);
+       dev->bt_const.brp_inc = le32_to_cpu(bt_const->brp_inc);
 
        dev->udev = interface_to_usbdev(intf);
        dev->iface = intf;
@@ -854,28 +862,29 @@ static struct gs_can *gs_make_candev(unsigned int channel,
 
        /* can setup */
        dev->can.state = CAN_STATE_STOPPED;
-       dev->can.clock.freq = bt_const->fclk_can;
+       dev->can.clock.freq = le32_to_cpu(bt_const->fclk_can);
        dev->can.bittiming_const = &dev->bt_const;
        dev->can.do_set_bittiming = gs_usb_set_bittiming;
 
        dev->can.ctrlmode_supported = 0;
 
-       if (bt_const->feature & GS_CAN_FEATURE_LISTEN_ONLY)
+       feature = le32_to_cpu(bt_const->feature);
+       if (feature & GS_CAN_FEATURE_LISTEN_ONLY)
                dev->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY;
 
-       if (bt_const->feature & GS_CAN_FEATURE_LOOP_BACK)
+       if (feature & GS_CAN_FEATURE_LOOP_BACK)
                dev->can.ctrlmode_supported |= CAN_CTRLMODE_LOOPBACK;
 
-       if (bt_const->feature & GS_CAN_FEATURE_TRIPLE_SAMPLE)
+       if (feature & GS_CAN_FEATURE_TRIPLE_SAMPLE)
                dev->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES;
 
-       if (bt_const->feature & GS_CAN_FEATURE_ONE_SHOT)
+       if (feature & GS_CAN_FEATURE_ONE_SHOT)
                dev->can.ctrlmode_supported |= CAN_CTRLMODE_ONE_SHOT;
 
        SET_NETDEV_DEV(netdev, &intf->dev);
 
-       if (dconf->sw_version > 1)
-               if (bt_const->feature & GS_CAN_FEATURE_IDENTIFY)
+       if (le32_to_cpu(dconf->sw_version) > 1)
+               if (feature & GS_CAN_FEATURE_IDENTIFY)
                        netdev->ethtool_ops = &gs_usb_ethtool_ops;
 
        kfree(bt_const);
@@ -910,7 +919,7 @@ static int gs_usb_probe(struct usb_interface *intf,
        if (!hconf)
                return -ENOMEM;
 
-       hconf->byte_order = 0x0000beef;
+       hconf->byte_order = cpu_to_le32(0x0000beef);
 
        /* send host config */
        rc = usb_control_msg(interface_to_usbdev(intf),
index 7ab87a7..218fadc 100644 (file)
@@ -367,7 +367,7 @@ static const struct can_bittiming_const kvaser_usb_hydra_kcan_bittiming_c = {
        .tseg2_max = 32,
        .sjw_max = 16,
        .brp_min = 1,
-       .brp_max = 4096,
+       .brp_max = 8192,
        .brp_inc = 1,
 };
 
index 5857b37..e97f2e0 100644 (file)
@@ -326,8 +326,6 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb,
        if (!ctx)
                return NETDEV_TX_BUSY;
 
-       can_put_echo_skb(skb, priv->netdev, ctx->ndx);
-
        if (cf->can_id & CAN_EFF_FLAG) {
                /* SIDH    | SIDL                 | EIDH   | EIDL
                 * 28 - 21 | 20 19 18 x x x 17 16 | 15 - 8 | 7 - 0
@@ -357,6 +355,8 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb,
        if (cf->can_id & CAN_RTR_FLAG)
                usb_msg.dlc |= MCBA_DLC_RTR_MASK;
 
+       can_put_echo_skb(skb, priv->netdev, ctx->ndx);
+
        err = mcba_usb_xmit(priv, (struct mcba_usb_msg *)&usb_msg, ctx);
        if (err)
                goto xmit_failed;
index c276479..204ccb2 100644 (file)
@@ -156,7 +156,7 @@ void peak_usb_get_ts_time(struct peak_time_ref *time_ref, u32 ts, ktime_t *time)
                if (time_ref->ts_dev_1 < time_ref->ts_dev_2) {
                        /* case when event time (tsw) wraps */
                        if (ts < time_ref->ts_dev_1)
-                               delta_ts = 1 << time_ref->adapter->ts_used_bits;
+                               delta_ts = BIT_ULL(time_ref->adapter->ts_used_bits);
 
                /* Otherwise, sync time counter (ts_dev_2) has wrapped:
                 * handle case when event time (tsn) hasn't.
@@ -168,7 +168,7 @@ void peak_usb_get_ts_time(struct peak_time_ref *time_ref, u32 ts, ktime_t *time)
                 *              tsn            ts
                 */
                } else if (time_ref->ts_dev_1 < ts) {
-                       delta_ts = -(1 << time_ref->adapter->ts_used_bits);
+                       delta_ts = -BIT_ULL(time_ref->adapter->ts_used_bits);
                }
 
                /* add delay between last sync and event timestamps */
index 74db81d..09701c1 100644 (file)
@@ -26,6 +26,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/delay.h>
 #include <linux/etherdevice.h>
 #include <linux/firmware.h>
 #include <linux/if_bridge.h>
@@ -1837,6 +1838,16 @@ static int gswip_gphy_fw_list(struct gswip_priv *priv,
                i++;
        }
 
+       /* The standalone PHY11G requires 300ms to be fully
+        * initialized and ready for any MDIO communication after being
+        * taken out of reset. For the SoC-internal GPHY variant there
+        * is no (known) documentation for the minimum time after a
+        * reset. Use the same value as for the standalone variant as
+        * some users have reported internal PHYs not being detected
+        * without any delay.
+        */
+       msleep(300);
+
        return 0;
 
 remove_gphy:
index bd297ae..34cca0a 100644 (file)
@@ -2297,6 +2297,8 @@ static void mv88e6xxx_hardware_reset(struct mv88e6xxx_chip *chip)
                usleep_range(10000, 20000);
                gpiod_set_value_cansleep(gpiod, 0);
                usleep_range(10000, 20000);
+
+               mv88e6xxx_g1_wait_eeprom_done(chip);
        }
 }
 
index f62aa83..33d443a 100644 (file)
@@ -75,6 +75,37 @@ static int mv88e6xxx_g1_wait_init_ready(struct mv88e6xxx_chip *chip)
        return mv88e6xxx_g1_wait_bit(chip, MV88E6XXX_G1_STS, bit, 1);
 }
 
+void mv88e6xxx_g1_wait_eeprom_done(struct mv88e6xxx_chip *chip)
+{
+       const unsigned long timeout = jiffies + 1 * HZ;
+       u16 val;
+       int err;
+
+       /* Wait up to 1 second for the switch to finish reading the
+        * EEPROM.
+        */
+       while (time_before(jiffies, timeout)) {
+               err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, &val);
+               if (err) {
+                       dev_err(chip->dev, "Error reading status");
+                       return;
+               }
+
+               /* If the switch is still resetting, it may not
+                * respond on the bus, and so MDIO read returns
+                * 0xffff. Differentiate between that, and waiting for
+                * the EEPROM to be done by bit 0 being set.
+                */
+               if (val != 0xffff &&
+                   val & BIT(MV88E6XXX_G1_STS_IRQ_EEPROM_DONE))
+                       return;
+
+               usleep_range(1000, 2000);
+       }
+
+       dev_err(chip->dev, "Timeout waiting for EEPROM done");
+}
+
 /* Offset 0x01: Switch MAC Address Register Bytes 0 & 1
  * Offset 0x02: Switch MAC Address Register Bytes 2 & 3
  * Offset 0x03: Switch MAC Address Register Bytes 4 & 5
index 1e3546f..e05abe6 100644 (file)
@@ -278,6 +278,7 @@ int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr);
 int mv88e6185_g1_reset(struct mv88e6xxx_chip *chip);
 int mv88e6352_g1_reset(struct mv88e6xxx_chip *chip);
 int mv88e6250_g1_reset(struct mv88e6xxx_chip *chip);
+void mv88e6xxx_g1_wait_eeprom_done(struct mv88e6xxx_chip *chip);
 
 int mv88e6185_g1_ppu_enable(struct mv88e6xxx_chip *chip);
 int mv88e6185_g1_ppu_disable(struct mv88e6xxx_chip *chip);
index 48390b7..1048509 100644 (file)
@@ -125,11 +125,9 @@ static int mv88e6xxx_g1_vtu_vid_write(struct mv88e6xxx_chip *chip,
  * Offset 0x08: VTU/STU Data Register 2
  * Offset 0x09: VTU/STU Data Register 3
  */
-
-static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip,
-                                     struct mv88e6xxx_vtu_entry *entry)
+static int mv88e6185_g1_vtu_stu_data_read(struct mv88e6xxx_chip *chip,
+                                         u16 *regs)
 {
-       u16 regs[3];
        int i;
 
        /* Read all 3 VTU/STU Data registers */
@@ -142,12 +140,45 @@ static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip,
                        return err;
        }
 
-       /* Extract MemberTag and PortState data */
+       return 0;
+}
+
+static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip,
+                                     struct mv88e6xxx_vtu_entry *entry)
+{
+       u16 regs[3];
+       int err;
+       int i;
+
+       err = mv88e6185_g1_vtu_stu_data_read(chip, regs);
+       if (err)
+               return err;
+
+       /* Extract MemberTag data */
        for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
                unsigned int member_offset = (i % 4) * 4;
-               unsigned int state_offset = member_offset + 2;
 
                entry->member[i] = (regs[i / 4] >> member_offset) & 0x3;
+       }
+
+       return 0;
+}
+
+static int mv88e6185_g1_stu_data_read(struct mv88e6xxx_chip *chip,
+                                     struct mv88e6xxx_vtu_entry *entry)
+{
+       u16 regs[3];
+       int err;
+       int i;
+
+       err = mv88e6185_g1_vtu_stu_data_read(chip, regs);
+       if (err)
+               return err;
+
+       /* Extract PortState data */
+       for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
+               unsigned int state_offset = (i % 4) * 4 + 2;
+
                entry->state[i] = (regs[i / 4] >> state_offset) & 0x3;
        }
 
@@ -349,6 +380,10 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
                if (err)
                        return err;
 
+               err = mv88e6185_g1_stu_data_read(chip, entry);
+               if (err)
+                       return err;
+
                /* VTU DBNum[3:0] are located in VTU Operation 3:0
                 * VTU DBNum[7:4] are located in VTU Operation 11:8
                 */
@@ -374,16 +409,20 @@ int mv88e6352_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
                return err;
 
        if (entry->valid) {
-               /* Fetch (and mask) VLAN PortState data from the STU */
-               err = mv88e6xxx_g1_vtu_stu_get(chip, entry);
+               err = mv88e6185_g1_vtu_data_read(chip, entry);
                if (err)
                        return err;
 
-               err = mv88e6185_g1_vtu_data_read(chip, entry);
+               err = mv88e6xxx_g1_vtu_fid_read(chip, entry);
                if (err)
                        return err;
 
-               err = mv88e6xxx_g1_vtu_fid_read(chip, entry);
+               /* Fetch VLAN PortState data from the STU */
+               err = mv88e6xxx_g1_vtu_stu_get(chip, entry);
+               if (err)
+                       return err;
+
+               err = mv88e6185_g1_stu_data_read(chip, entry);
                if (err)
                        return err;
        }
index f791860..c444ef3 100644 (file)
@@ -569,7 +569,6 @@ static int felix_setup(struct dsa_switch *ds)
        struct ocelot *ocelot = ds->priv;
        struct felix *felix = ocelot_to_felix(ocelot);
        int port, err;
-       int tc;
 
        err = felix_init_structs(felix, ds->num_ports);
        if (err)
@@ -608,12 +607,6 @@ static int felix_setup(struct dsa_switch *ds)
        ocelot_write_rix(ocelot,
                         ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)),
                         ANA_PGID_PGID, PGID_UC);
-       /* Setup the per-traffic class flooding PGIDs */
-       for (tc = 0; tc < FELIX_NUM_TC; tc++)
-               ocelot_write_rix(ocelot, ANA_FLOODING_FLD_MULTICAST(PGID_MC) |
-                                ANA_FLOODING_FLD_BROADCAST(PGID_MC) |
-                                ANA_FLOODING_FLD_UNICAST(PGID_UC),
-                                ANA_FLOODING, tc);
 
        ds->mtu_enforcement_ingress = true;
        ds->configure_vlan_while_not_filtering = true;
index 3e925b8..2e5bbdc 100644 (file)
@@ -1429,6 +1429,7 @@ static int felix_pci_probe(struct pci_dev *pdev,
        pci_set_drvdata(pdev, felix);
        ocelot = &felix->ocelot;
        ocelot->dev = &pdev->dev;
+       ocelot->num_flooding_pgids = FELIX_NUM_TC;
        felix->info = &felix_info_vsc9959;
        felix->switch_base = pci_resource_start(pdev,
                                                felix->info->switch_pci_bar);
index 1d420c4..ebbaf68 100644 (file)
@@ -1210,6 +1210,7 @@ static int seville_probe(struct platform_device *pdev)
 
        ocelot = &felix->ocelot;
        ocelot->dev = &pdev->dev;
+       ocelot->num_flooding_pgids = 1;
        felix->info = &seville_info_vsc9953;
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
index d92516a..9cd7501 100644 (file)
@@ -21,6 +21,7 @@ config ET131X
        tristate "Agere ET-1310 Gigabit Ethernet support"
        depends on PCI
        select PHYLIB
+       select CRC32
        help
          This driver supports Agere ET-1310 ethernet adapters.
 
index ad30cac..032ab9f 100644 (file)
@@ -516,6 +516,7 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
 {
        struct ena_com_rx_buf_info *ena_buf = &ena_rx_ctx->ena_bufs[0];
        struct ena_eth_io_rx_cdesc_base *cdesc = NULL;
+       u16 q_depth = io_cq->q_depth;
        u16 cdesc_idx = 0;
        u16 nb_hw_desc;
        u16 i = 0;
@@ -543,6 +544,8 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
        do {
                ena_buf[i].len = cdesc->length;
                ena_buf[i].req_id = cdesc->req_id;
+               if (unlikely(ena_buf[i].req_id >= q_depth))
+                       return -EIO;
 
                if (++i >= nb_hw_desc)
                        break;
index e8131da..df1884d 100644 (file)
@@ -789,24 +789,6 @@ static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
                                              adapter->num_io_queues);
 }
 
-static int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id)
-{
-       if (likely(req_id < rx_ring->ring_size))
-               return 0;
-
-       netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
-                 "Invalid rx req_id: %hu\n", req_id);
-
-       u64_stats_update_begin(&rx_ring->syncp);
-       rx_ring->rx_stats.bad_req_id++;
-       u64_stats_update_end(&rx_ring->syncp);
-
-       /* Trigger device reset */
-       rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
-       set_bit(ENA_FLAG_TRIGGER_RESET, &rx_ring->adapter->flags);
-       return -EFAULT;
-}
-
 /* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
  * @adapter: network interface device structure
  * @qid: queue index
@@ -926,10 +908,14 @@ static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
 static int ena_alloc_rx_page(struct ena_ring *rx_ring,
                                    struct ena_rx_buffer *rx_info, gfp_t gfp)
 {
+       int headroom = rx_ring->rx_headroom;
        struct ena_com_buf *ena_buf;
        struct page *page;
        dma_addr_t dma;
 
+       /* restore page offset value in case it has been changed by device */
+       rx_info->page_offset = headroom;
+
        /* if previous allocated page is not used */
        if (unlikely(rx_info->page))
                return 0;
@@ -959,10 +945,9 @@ static int ena_alloc_rx_page(struct ena_ring *rx_ring,
                  "Allocate page %p, rx_info %p\n", page, rx_info);
 
        rx_info->page = page;
-       rx_info->page_offset = 0;
        ena_buf = &rx_info->ena_buf;
-       ena_buf->paddr = dma + rx_ring->rx_headroom;
-       ena_buf->len = ENA_PAGE_SIZE - rx_ring->rx_headroom;
+       ena_buf->paddr = dma + headroom;
+       ena_buf->len = ENA_PAGE_SIZE - headroom;
 
        return 0;
 }
@@ -1356,15 +1341,10 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
        struct ena_rx_buffer *rx_info;
        u16 len, req_id, buf = 0;
        void *va;
-       int rc;
 
        len = ena_bufs[buf].len;
        req_id = ena_bufs[buf].req_id;
 
-       rc = validate_rx_req_id(rx_ring, req_id);
-       if (unlikely(rc < 0))
-               return NULL;
-
        rx_info = &rx_ring->rx_buffer_info[req_id];
 
        if (unlikely(!rx_info->page)) {
@@ -1379,7 +1359,8 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 
        /* save virt address of first buffer */
        va = page_address(rx_info->page) + rx_info->page_offset;
-       prefetch(va + NET_IP_ALIGN);
+
+       prefetch(va);
 
        if (len <= rx_ring->rx_copybreak) {
                skb = ena_alloc_skb(rx_ring, false);
@@ -1420,8 +1401,6 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 
                skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
                                rx_info->page_offset, len, ENA_PAGE_SIZE);
-               /* The offset is non zero only for the first buffer */
-               rx_info->page_offset = 0;
 
                netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
                          "RX skb updated. len %d. data_len %d\n",
@@ -1440,10 +1419,6 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
                len = ena_bufs[buf].len;
                req_id = ena_bufs[buf].req_id;
 
-               rc = validate_rx_req_id(rx_ring, req_id);
-               if (unlikely(rc < 0))
-                       return NULL;
-
                rx_info = &rx_ring->rx_buffer_info[req_id];
        } while (1);
 
@@ -1544,8 +1519,7 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp)
        int ret;
 
        rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
-       xdp->data = page_address(rx_info->page) +
-               rx_info->page_offset + rx_ring->rx_headroom;
+       xdp->data = page_address(rx_info->page) + rx_info->page_offset;
        xdp_set_data_meta_invalid(xdp);
        xdp->data_hard_start = page_address(rx_info->page);
        xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len;
@@ -1612,8 +1586,9 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
                if (unlikely(ena_rx_ctx.descs == 0))
                        break;
 
+               /* First descriptor might have an offset set by the device */
                rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
-               rx_info->page_offset = ena_rx_ctx.pkt_offset;
+               rx_info->page_offset += ena_rx_ctx.pkt_offset;
 
                netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
                          "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
@@ -1697,12 +1672,18 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
 error:
        adapter = netdev_priv(rx_ring->netdev);
 
-       u64_stats_update_begin(&rx_ring->syncp);
-       rx_ring->rx_stats.bad_desc_num++;
-       u64_stats_update_end(&rx_ring->syncp);
+       if (rc == -ENOSPC) {
+               u64_stats_update_begin(&rx_ring->syncp);
+               rx_ring->rx_stats.bad_desc_num++;
+               u64_stats_update_end(&rx_ring->syncp);
+               adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
+       } else {
+               u64_stats_update_begin(&rx_ring->syncp);
+               rx_ring->rx_stats.bad_req_id++;
+               u64_stats_update_end(&rx_ring->syncp);
+               adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
+       }
 
-       /* Too many desc from the device. Trigger reset */
-       adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
        set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
 
        return 0;
@@ -3388,16 +3369,9 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
                goto err_mmio_read_less;
        }
 
-       rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width));
+       rc = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_width));
        if (rc) {
-               dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc);
-               goto err_mmio_read_less;
-       }
-
-       rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width));
-       if (rc) {
-               dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n",
-                       rc);
+               dev_err(dev, "dma_set_mask_and_coherent failed %d\n", rc);
                goto err_mmio_read_less;
        }
 
@@ -4167,6 +4141,12 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                return rc;
        }
 
+       rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ENA_MAX_PHYS_ADDR_SIZE_BITS));
+       if (rc) {
+               dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", rc);
+               goto err_disable_device;
+       }
+
        pci_set_master(pdev);
 
        ena_dev = vzalloc(sizeof(*ena_dev));
index 4f91365..24122cc 100644 (file)
@@ -413,85 +413,63 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
                                              buff->rxdata.pg_off,
                                              buff->len, DMA_FROM_DEVICE);
 
-               /* for single fragment packets use build_skb() */
-               if (buff->is_eop &&
-                   buff->len <= AQ_CFG_RX_FRAME_MAX - AQ_SKB_ALIGN) {
-                       skb = build_skb(aq_buf_vaddr(&buff->rxdata),
+               skb = napi_alloc_skb(napi, AQ_CFG_RX_HDR_SIZE);
+               if (unlikely(!skb)) {
+                       u64_stats_update_begin(&self->stats.rx.syncp);
+                       self->stats.rx.skb_alloc_fails++;
+                       u64_stats_update_end(&self->stats.rx.syncp);
+                       err = -ENOMEM;
+                       goto err_exit;
+               }
+               if (is_ptp_ring)
+                       buff->len -=
+                               aq_ptp_extract_ts(self->aq_nic, skb,
+                                                 aq_buf_vaddr(&buff->rxdata),
+                                                 buff->len);
+
+               hdr_len = buff->len;
+               if (hdr_len > AQ_CFG_RX_HDR_SIZE)
+                       hdr_len = eth_get_headlen(skb->dev,
+                                                 aq_buf_vaddr(&buff->rxdata),
+                                                 AQ_CFG_RX_HDR_SIZE);
+
+               memcpy(__skb_put(skb, hdr_len), aq_buf_vaddr(&buff->rxdata),
+                      ALIGN(hdr_len, sizeof(long)));
+
+               if (buff->len - hdr_len > 0) {
+                       skb_add_rx_frag(skb, 0, buff->rxdata.page,
+                                       buff->rxdata.pg_off + hdr_len,
+                                       buff->len - hdr_len,
                                        AQ_CFG_RX_FRAME_MAX);
-                       if (unlikely(!skb)) {
-                               u64_stats_update_begin(&self->stats.rx.syncp);
-                               self->stats.rx.skb_alloc_fails++;
-                               u64_stats_update_end(&self->stats.rx.syncp);
-                               err = -ENOMEM;
-                               goto err_exit;
-                       }
-                       if (is_ptp_ring)
-                               buff->len -=
-                                       aq_ptp_extract_ts(self->aq_nic, skb,
-                                               aq_buf_vaddr(&buff->rxdata),
-                                               buff->len);
-                       skb_put(skb, buff->len);
                        page_ref_inc(buff->rxdata.page);
-               } else {
-                       skb = napi_alloc_skb(napi, AQ_CFG_RX_HDR_SIZE);
-                       if (unlikely(!skb)) {
-                               u64_stats_update_begin(&self->stats.rx.syncp);
-                               self->stats.rx.skb_alloc_fails++;
-                               u64_stats_update_end(&self->stats.rx.syncp);
-                               err = -ENOMEM;
-                               goto err_exit;
-                       }
-                       if (is_ptp_ring)
-                               buff->len -=
-                                       aq_ptp_extract_ts(self->aq_nic, skb,
-                                               aq_buf_vaddr(&buff->rxdata),
-                                               buff->len);
-
-                       hdr_len = buff->len;
-                       if (hdr_len > AQ_CFG_RX_HDR_SIZE)
-                               hdr_len = eth_get_headlen(skb->dev,
-                                                         aq_buf_vaddr(&buff->rxdata),
-                                                         AQ_CFG_RX_HDR_SIZE);
-
-                       memcpy(__skb_put(skb, hdr_len), aq_buf_vaddr(&buff->rxdata),
-                              ALIGN(hdr_len, sizeof(long)));
-
-                       if (buff->len - hdr_len > 0) {
-                               skb_add_rx_frag(skb, 0, buff->rxdata.page,
-                                               buff->rxdata.pg_off + hdr_len,
-                                               buff->len - hdr_len,
-                                               AQ_CFG_RX_FRAME_MAX);
-                               page_ref_inc(buff->rxdata.page);
-                       }
+               }
 
-                       if (!buff->is_eop) {
-                               buff_ = buff;
-                               i = 1U;
-                               do {
-                                       next_ = buff_->next,
-                                       buff_ = &self->buff_ring[next_];
+               if (!buff->is_eop) {
+                       buff_ = buff;
+                       i = 1U;
+                       do {
+                               next_ = buff_->next;
+                               buff_ = &self->buff_ring[next_];
 
-                                       dma_sync_single_range_for_cpu(
-                                                       aq_nic_get_dev(self->aq_nic),
-                                                       buff_->rxdata.daddr,
-                                                       buff_->rxdata.pg_off,
-                                                       buff_->len,
-                                                       DMA_FROM_DEVICE);
-                                       skb_add_rx_frag(skb, i++,
-                                                       buff_->rxdata.page,
-                                                       buff_->rxdata.pg_off,
-                                                       buff_->len,
-                                                       AQ_CFG_RX_FRAME_MAX);
-                                       page_ref_inc(buff_->rxdata.page);
-                                       buff_->is_cleaned = 1;
-
-                                       buff->is_ip_cso &= buff_->is_ip_cso;
-                                       buff->is_udp_cso &= buff_->is_udp_cso;
-                                       buff->is_tcp_cso &= buff_->is_tcp_cso;
-                                       buff->is_cso_err |= buff_->is_cso_err;
+                               dma_sync_single_range_for_cpu(aq_nic_get_dev(self->aq_nic),
+                                                             buff_->rxdata.daddr,
+                                                             buff_->rxdata.pg_off,
+                                                             buff_->len,
+                                                             DMA_FROM_DEVICE);
+                               skb_add_rx_frag(skb, i++,
+                                               buff_->rxdata.page,
+                                               buff_->rxdata.pg_off,
+                                               buff_->len,
+                                               AQ_CFG_RX_FRAME_MAX);
+                               page_ref_inc(buff_->rxdata.page);
+                               buff_->is_cleaned = 1;
 
-                               } while (!buff_->is_eop);
-                       }
+                               buff->is_ip_cso &= buff_->is_ip_cso;
+                               buff->is_udp_cso &= buff_->is_udp_cso;
+                               buff->is_tcp_cso &= buff_->is_tcp_cso;
+                               buff->is_cso_err |= buff_->is_cso_err;
+
+                       } while (!buff_->is_eop);
                }
 
                if (buff->is_vlan)
index 0c12cf7..3f65f2b 100644 (file)
@@ -2543,8 +2543,8 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
         * various kernel subsystems to support the mechanics required by a
         * fixed-high-32-bit system.
         */
-       if ((dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0) ||
-           (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0)) {
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+       if (err) {
                dev_err(&pdev->dev, "No usable DMA configuration,aborting\n");
                goto err_dma;
        }
index 098b032..ff9f96d 100644 (file)
@@ -2312,8 +2312,8 @@ static int atl1e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
         * various kernel subsystems to support the mechanics required by a
         * fixed-high-32-bit system.
         */
-       if ((dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0) ||
-           (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0)) {
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+       if (err) {
                dev_err(&pdev->dev, "No usable DMA configuration,aborting\n");
                goto err_dma;
        }
index 7fb42f3..7b79528 100644 (file)
@@ -88,6 +88,7 @@ config BNX2
 config CNIC
        tristate "QLogic CNIC support"
        depends on PCI && (IPV6 || IPV6=n)
+       depends on MMU
        select BNX2
        select UIO
        help
index 74c1778..b455b60 100644 (file)
@@ -2383,7 +2383,8 @@ static int b44_init_one(struct ssb_device *sdev,
                goto err_out_free_dev;
        }
 
-       if (dma_set_mask_and_coherent(sdev->dma_dev, DMA_BIT_MASK(30))) {
+       err = dma_set_mask_and_coherent(sdev->dma_dev, DMA_BIT_MASK(30));
+       if (err) {
                dev_err(sdev->dev,
                        "Required 30BIT DMA mask unsupported by the system\n");
                goto err_out_powerdown;
index 7975f59..0af0af2 100644 (file)
@@ -4099,7 +4099,8 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init)
        bnxt_free_ntp_fltrs(bp, irq_re_init);
        if (irq_re_init) {
                bnxt_free_ring_stats(bp);
-               if (!(bp->fw_cap & BNXT_FW_CAP_PORT_STATS_NO_RESET))
+               if (!(bp->fw_cap & BNXT_FW_CAP_PORT_STATS_NO_RESET) ||
+                   test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
                        bnxt_free_port_stats(bp);
                bnxt_free_ring_grps(bp);
                bnxt_free_vnics(bp);
@@ -7757,6 +7758,7 @@ static void bnxt_add_one_ctr(u64 hw, u64 *sw, u64 mask)
 {
        u64 sw_tmp;
 
+       hw &= mask;
        sw_tmp = (*sw & ~mask) | hw;
        if (hw < (*sw & mask))
                sw_tmp += mask + 1;
@@ -11588,7 +11590,8 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
        if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) != 0 &&
            dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)) != 0) {
                dev_err(&pdev->dev, "System does not support DMA, aborting\n");
-               goto init_err_disable;
+               rc = -EIO;
+               goto init_err_release;
        }
 
        pci_set_master(pdev);
@@ -12672,6 +12675,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                                create_singlethread_workqueue("bnxt_pf_wq");
                        if (!bnxt_pf_wq) {
                                dev_err(&pdev->dev, "Unable to create workqueue.\n");
+                               rc = -ENOMEM;
                                goto init_err_pci_clean;
                        }
                }
index 53687bc..1471c9a 100644 (file)
@@ -2079,6 +2079,9 @@ int bnxt_hwrm_nvm_get_dev_info(struct bnxt *bp,
        struct hwrm_nvm_get_dev_info_input req = {0};
        int rc;
 
+       if (BNXT_VF(bp))
+               return -EOPNOTSUPP;
+
        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_DEV_INFO, -1, -1);
        mutex_lock(&bp->hwrm_cmd_lock);
        rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
@@ -2997,7 +3000,7 @@ static int bnxt_get_module_eeprom(struct net_device *dev,
        /* Read A2 portion of the EEPROM */
        if (length) {
                start -= ETH_MODULE_SFF_8436_LEN;
-               rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1,
+               rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 0,
                                                      start, length, data);
        }
        return rc;
index 8585816..e432a68 100644 (file)
@@ -23,6 +23,7 @@ config MACB
        tristate "Cadence MACB/GEM support"
        depends on HAS_DMA && COMMON_CLK
        select PHYLINK
+       select CRC32
        help
          The Cadence MACB ethernet interface is found on many Atmel AT32 and
          AT91 parts.  This driver also supports the Cadence GEM (Gigabit
index 87cc0ef..8ba0e08 100644 (file)
@@ -68,7 +68,7 @@ config CHELSIO_T3
 
 config CHELSIO_T4
        tristate "Chelsio Communications T4/T5/T6 Ethernet support"
-       depends on PCI && (IPV6 || IPV6=n)
+       depends on PCI && (IPV6 || IPV6=n) && (TLS || TLS=n)
        select FW_LOADER
        select MDIO
        select ZLIB_DEFLATE
index e18e9ce..1cc3c51 100644 (file)
@@ -3175,6 +3175,7 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
                          GFP_KERNEL | __GFP_COMP);
        if (!avail) {
                CH_ALERT(adapter, "free list queue 0 initialization failed\n");
+               ret = -ENOMEM;
                goto err;
        }
        if (avail < q->fl[0].size)
index 4e55f70..83b4644 100644 (file)
@@ -880,7 +880,8 @@ int set_filter_wr(struct adapter *adapter, int fidx)
                 FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
                 FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
                 FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
-       fwr->smac_sel = f->smt->idx;
+       if (f->fs.newsmac)
+               fwr->smac_sel = f->smt->idx;
        fwr->rx_chan_rx_rpl_iq =
                htons(FW_FILTER_WR_RX_CHAN_V(0) |
                      FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
index c24485c..1b7e8c9 100644 (file)
@@ -544,7 +544,9 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk,
                /* need to wait for hw response, can't free tx_info yet. */
                if (tx_info->open_state == CH_KTLS_OPEN_PENDING)
                        tx_info->pending_close = true;
-               /* free the lock after the cleanup */
+               else
+                       spin_unlock_bh(&tx_info->lock);
+               /* if in pending close, free the lock after the cleanup */
                goto put_module;
        }
        spin_unlock_bh(&tx_info->lock);
@@ -985,9 +987,7 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
        struct fw_eth_tx_pkt_wr *wr;
        struct cpl_tx_pkt_core *cpl;
        u32 ctrl, iplen, maclen;
-#if IS_ENABLED(CONFIG_IPV6)
        struct ipv6hdr *ip6;
-#endif
        unsigned int ndesc;
        struct tcphdr *tcp;
        int len16, pktlen;
@@ -1041,17 +1041,15 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
        cpl->len = htons(pktlen);
 
        memcpy(buf, skb->data, pktlen);
-       if (tx_info->ip_family == AF_INET) {
+       if (!IS_ENABLED(CONFIG_IPV6) || tx_info->ip_family == AF_INET) {
                /* we need to correct ip header len */
                ip = (struct iphdr *)(buf + maclen);
                ip->tot_len = htons(pktlen - maclen);
                cntrl1 = TXPKT_CSUM_TYPE_V(TX_CSUM_TCPIP);
-#if IS_ENABLED(CONFIG_IPV6)
        } else {
                ip6 = (struct ipv6hdr *)(buf + maclen);
                ip6->payload_len = htons(pktlen - maclen - iplen);
                cntrl1 = TXPKT_CSUM_TYPE_V(TX_CSUM_TCPIP6);
-#endif
        }
 
        cntrl1 |= T6_TXPKT_ETHHDR_LEN_V(maclen - ETH_HLEN) |
index 96d5616..50e3a70 100644 (file)
@@ -1206,6 +1206,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
        sk_setup_caps(newsk, dst);
        ctx = tls_get_ctx(lsk);
        newsk->sk_destruct = ctx->sk_destruct;
+       newsk->sk_prot_creator = lsk->sk_prot_creator;
        csk->sk = newsk;
        csk->passive_reap_next = oreq;
        csk->tx_chan = cxgb4_port_chan(ndev);
index 62c8290..a4fb463 100644 (file)
@@ -391,6 +391,7 @@ int chtls_setkey(struct chtls_sock *csk, u32 keylen,
        csk->wr_unacked += DIV_ROUND_UP(len, 16);
        enqueue_wr(csk, skb);
        cxgb4_ofld_send(csk->egress_dev, skb);
+       skb = NULL;
 
        chtls_set_scmd(csk);
        /* Clear quiesce for Rx key */
index c2677ec..3d1e9a3 100644 (file)
@@ -33,6 +33,7 @@ config FTGMAC100
        depends on !64BIT || BROKEN
        select PHYLIB
        select MDIO_ASPEED if MACH_ASPEED_G6
+       select CRC32
        help
          This driver supports the FTGMAC100 Gigabit Ethernet controller
          from Faraday. It is used on Faraday A369, Andes AG102 and some
index 00024dd..80fb1f5 100644 (file)
@@ -1907,6 +1907,8 @@ err_register_netdev:
        clk_disable_unprepare(priv->rclk);
        clk_disable_unprepare(priv->clk);
 err_ncsi_dev:
+       if (priv->ndev)
+               ncsi_unregister_dev(priv->ndev);
        ftgmac100_destroy_mdio(netdev);
 err_setup_mdio:
        iounmap(priv->base);
@@ -1926,6 +1928,8 @@ static int ftgmac100_remove(struct platform_device *pdev)
        netdev = platform_get_drvdata(pdev);
        priv = netdev_priv(netdev);
 
+       if (priv->ndev)
+               ncsi_unregister_dev(priv->ndev);
        unregister_netdev(netdev);
 
        clk_disable_unprepare(priv->rclk);
index a1d53dd..3f9175b 100644 (file)
@@ -25,6 +25,7 @@ config FEC
        depends on (M523x || M527x || M5272 || M528x || M520x || M532x || \
                   ARCH_MXC || SOC_IMX28 || COMPILE_TEST)
        default ARCH_MXC || SOC_IMX28 if ARM
+       select CRC32
        select PHYLIB
        imply PTP_1588_CLOCK
        help
index d9c2859..cb7c028 100644 (file)
@@ -2120,6 +2120,15 @@ workaround:
        skb_copy_header(new_skb, skb);
        new_skb->dev = skb->dev;
 
+       /* Copy relevant timestamp info from the old skb to the new */
+       if (priv->tx_tstamp) {
+               skb_shinfo(new_skb)->tx_flags = skb_shinfo(skb)->tx_flags;
+               skb_shinfo(new_skb)->hwtstamps = skb_shinfo(skb)->hwtstamps;
+               skb_shinfo(new_skb)->tskey = skb_shinfo(skb)->tskey;
+               if (skb->sk)
+                       skb_set_owner_w(new_skb, skb->sk);
+       }
+
        /* We move the headroom when we align it so we have to reset the
         * network and transport header offsets relative to the new data
         * pointer. The checksum offload relies on these offsets.
@@ -2127,7 +2136,6 @@ workaround:
        skb_set_network_header(new_skb, skb_network_offset(skb));
        skb_set_transport_header(new_skb, skb_transport_offset(skb));
 
-       /* TODO: does timestamping need the result in the old skb? */
        dev_kfree_skb(skb);
        *s = new_skb;
 
index cfd369c..ee7a906 100644 (file)
@@ -4,6 +4,8 @@ config FSL_DPAA2_ETH
        depends on FSL_MC_BUS && FSL_MC_DPIO
        select PHYLINK
        select PCS_LYNX
+       select FSL_XGMAC_MDIO
+       select NET_DEVLINK
        help
          This is the DPAA2 Ethernet driver supporting Freescale SoCs
          with DPAA2 (DataPath Acceleration Architecture v2).
index 90cd243..828c177 100644 (file)
@@ -269,6 +269,7 @@ static int dpaa2_pcs_create(struct dpaa2_mac *mac,
 
        if (!of_device_is_available(node)) {
                netdev_err(mac->net_dev, "pcs-handle node not available\n");
+               of_node_put(node);
                return -ENODEV;
        }
 
index 0fa18b0..d99ea0f 100644 (file)
@@ -16,6 +16,7 @@ config FSL_ENETC
 config FSL_ENETC_VF
        tristate "ENETC VF driver"
        depends on PCI && PCI_MSI
+       select FSL_ENETC_MDIO
        select PHYLINK
        select DIMLIB
        help
index 52be6e3..fc2075e 100644 (file)
@@ -33,7 +33,10 @@ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
                return NETDEV_TX_BUSY;
        }
 
+       enetc_lock_mdio();
        count = enetc_map_tx_buffs(tx_ring, skb, priv->active_offloads);
+       enetc_unlock_mdio();
+
        if (unlikely(!count))
                goto drop_packet_err;
 
@@ -239,7 +242,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
        skb_tx_timestamp(skb);
 
        /* let H/W know BD ring has been updated */
-       enetc_wr_reg(tx_ring->tpir, i); /* includes wmb() */
+       enetc_wr_reg_hot(tx_ring->tpir, i); /* includes wmb() */
 
        return count;
 
@@ -262,12 +265,16 @@ static irqreturn_t enetc_msix(int irq, void *data)
        struct enetc_int_vector *v = data;
        int i;
 
+       enetc_lock_mdio();
+
        /* disable interrupts */
-       enetc_wr_reg(v->rbier, 0);
-       enetc_wr_reg(v->ricr1, v->rx_ictt);
+       enetc_wr_reg_hot(v->rbier, 0);
+       enetc_wr_reg_hot(v->ricr1, v->rx_ictt);
 
        for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS)
-               enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i), 0);
+               enetc_wr_reg_hot(v->tbier_base + ENETC_BDR_OFF(i), 0);
+
+       enetc_unlock_mdio();
 
        napi_schedule(&v->napi);
 
@@ -334,19 +341,23 @@ static int enetc_poll(struct napi_struct *napi, int budget)
 
        v->rx_napi_work = false;
 
+       enetc_lock_mdio();
+
        /* enable interrupts */
-       enetc_wr_reg(v->rbier, ENETC_RBIER_RXTIE);
+       enetc_wr_reg_hot(v->rbier, ENETC_RBIER_RXTIE);
 
        for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS)
-               enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i),
-                            ENETC_TBIER_TXTIE);
+               enetc_wr_reg_hot(v->tbier_base + ENETC_BDR_OFF(i),
+                                ENETC_TBIER_TXTIE);
+
+       enetc_unlock_mdio();
 
        return work_done;
 }
 
 static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci)
 {
-       int pi = enetc_rd_reg(tx_ring->tcir) & ENETC_TBCIR_IDX_MASK;
+       int pi = enetc_rd_reg_hot(tx_ring->tcir) & ENETC_TBCIR_IDX_MASK;
 
        return pi >= ci ? pi - ci : tx_ring->bd_count - ci + pi;
 }
@@ -386,7 +397,10 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 
        i = tx_ring->next_to_clean;
        tx_swbd = &tx_ring->tx_swbd[i];
+
+       enetc_lock_mdio();
        bds_to_clean = enetc_bd_ready_count(tx_ring, i);
+       enetc_unlock_mdio();
 
        do_tstamp = false;
 
@@ -429,16 +443,20 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
                        tx_swbd = tx_ring->tx_swbd;
                }
 
+               enetc_lock_mdio();
+
                /* BD iteration loop end */
                if (is_eof) {
                        tx_frm_cnt++;
                        /* re-arm interrupt source */
-                       enetc_wr_reg(tx_ring->idr, BIT(tx_ring->index) |
-                                    BIT(16 + tx_ring->index));
+                       enetc_wr_reg_hot(tx_ring->idr, BIT(tx_ring->index) |
+                                        BIT(16 + tx_ring->index));
                }
 
                if (unlikely(!bds_to_clean))
                        bds_to_clean = enetc_bd_ready_count(tx_ring, i);
+
+               enetc_unlock_mdio();
        }
 
        tx_ring->next_to_clean = i;
@@ -515,8 +533,6 @@ static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
        if (likely(j)) {
                rx_ring->next_to_alloc = i; /* keep track from page reuse */
                rx_ring->next_to_use = i;
-               /* update ENETC's consumer index */
-               enetc_wr_reg(rx_ring->rcir, i);
        }
 
        return j;
@@ -534,8 +550,8 @@ static void enetc_get_rx_tstamp(struct net_device *ndev,
        u64 tstamp;
 
        if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_TSTMP) {
-               lo = enetc_rd(hw, ENETC_SICTR0);
-               hi = enetc_rd(hw, ENETC_SICTR1);
+               lo = enetc_rd_reg_hot(hw->reg + ENETC_SICTR0);
+               hi = enetc_rd_reg_hot(hw->reg + ENETC_SICTR1);
                rxbd = enetc_rxbd_ext(rxbd);
                tstamp_lo = le32_to_cpu(rxbd->ext.tstamp);
                if (lo <= tstamp_lo)
@@ -684,23 +700,31 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
                u32 bd_status;
                u16 size;
 
+               enetc_lock_mdio();
+
                if (cleaned_cnt >= ENETC_RXBD_BUNDLE) {
                        int count = enetc_refill_rx_ring(rx_ring, cleaned_cnt);
 
+                       /* update ENETC's consumer index */
+                       enetc_wr_reg_hot(rx_ring->rcir, rx_ring->next_to_use);
                        cleaned_cnt -= count;
                }
 
                rxbd = enetc_rxbd(rx_ring, i);
                bd_status = le32_to_cpu(rxbd->r.lstatus);
-               if (!bd_status)
+               if (!bd_status) {
+                       enetc_unlock_mdio();
                        break;
+               }
 
-               enetc_wr_reg(rx_ring->idr, BIT(rx_ring->index));
+               enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index));
                dma_rmb(); /* for reading other rxbd fields */
                size = le16_to_cpu(rxbd->r.buf_len);
                skb = enetc_map_rx_buff_to_skb(rx_ring, i, size);
-               if (!skb)
+               if (!skb) {
+                       enetc_unlock_mdio();
                        break;
+               }
 
                enetc_get_offloads(rx_ring, rxbd, skb);
 
@@ -712,6 +736,7 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 
                if (unlikely(bd_status &
                             ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))) {
+                       enetc_unlock_mdio();
                        dev_kfree_skb(skb);
                        while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
                                dma_rmb();
@@ -751,6 +776,8 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 
                enetc_process_skb(rx_ring, skb);
 
+               enetc_unlock_mdio();
+
                napi_gro_receive(napi, skb);
 
                rx_frm_cnt++;
@@ -1225,6 +1252,7 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
        rx_ring->idr = hw->reg + ENETC_SIRXIDR;
 
        enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring));
+       enetc_wr(hw, ENETC_SIRXIDR, rx_ring->next_to_use);
 
        /* enable ring */
        enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr);
index 8ed1ebd..89e5581 100644 (file)
@@ -143,8 +143,8 @@ static const struct {
        { ENETC_PM0_R255,   "MAC rx 128-255 byte packets" },
        { ENETC_PM0_R511,   "MAC rx 256-511 byte packets" },
        { ENETC_PM0_R1023,  "MAC rx 512-1023 byte packets" },
-       { ENETC_PM0_R1518,  "MAC rx 1024-1518 byte packets" },
-       { ENETC_PM0_R1519X, "MAC rx 1519 to max-octet packets" },
+       { ENETC_PM0_R1522,  "MAC rx 1024-1522 byte packets" },
+       { ENETC_PM0_R1523X, "MAC rx 1523 to max-octet packets" },
        { ENETC_PM0_ROVR,   "MAC rx oversized packets" },
        { ENETC_PM0_RJBR,   "MAC rx jabber packets" },
        { ENETC_PM0_RFRG,   "MAC rx fragment packets" },
@@ -163,9 +163,13 @@ static const struct {
        { ENETC_PM0_TBCA,   "MAC tx broadcast frames" },
        { ENETC_PM0_TPKT,   "MAC tx packets" },
        { ENETC_PM0_TUND,   "MAC tx undersized packets" },
+       { ENETC_PM0_T64,    "MAC tx 64 byte packets" },
        { ENETC_PM0_T127,   "MAC tx 65-127 byte packets" },
+       { ENETC_PM0_T255,   "MAC tx 128-255 byte packets" },
+       { ENETC_PM0_T511,   "MAC tx 256-511 byte packets" },
        { ENETC_PM0_T1023,  "MAC tx 512-1023 byte packets" },
-       { ENETC_PM0_T1518,  "MAC tx 1024-1518 byte packets" },
+       { ENETC_PM0_T1522,  "MAC tx 1024-1522 byte packets" },
+       { ENETC_PM0_T1523X, "MAC tx 1523 to max-octet packets" },
        { ENETC_PM0_TCNP,   "MAC tx control packets" },
        { ENETC_PM0_TDFR,   "MAC tx deferred packets" },
        { ENETC_PM0_TMCOL,  "MAC tx multiple collisions" },
index 17cf7c9..4cbf166 100644 (file)
@@ -267,8 +267,8 @@ enum enetc_bdr_type {TX, RX};
 #define ENETC_PM0_R255         0x8180
 #define ENETC_PM0_R511         0x8188
 #define ENETC_PM0_R1023                0x8190
-#define ENETC_PM0_R1518                0x8198
-#define ENETC_PM0_R1519X       0x81A0
+#define ENETC_PM0_R1522                0x8198
+#define ENETC_PM0_R1523X       0x81A0
 #define ENETC_PM0_ROVR         0x81A8
 #define ENETC_PM0_RJBR         0x81B0
 #define ENETC_PM0_RFRG         0x81B8
@@ -287,9 +287,13 @@ enum enetc_bdr_type {TX, RX};
 #define ENETC_PM0_TBCA         0x8250
 #define ENETC_PM0_TPKT         0x8260
 #define ENETC_PM0_TUND         0x8268
+#define ENETC_PM0_T64          0x8270
 #define ENETC_PM0_T127         0x8278
+#define ENETC_PM0_T255         0x8280
+#define ENETC_PM0_T511         0x8288
 #define ENETC_PM0_T1023                0x8290
-#define ENETC_PM0_T1518                0x8298
+#define ENETC_PM0_T1522                0x8298
+#define ENETC_PM0_T1523X       0x82A0
 #define ENETC_PM0_TCNP         0x82C0
 #define ENETC_PM0_TDFR         0x82D0
 #define ENETC_PM0_TMCOL                0x82D8
@@ -324,14 +328,100 @@ struct enetc_hw {
        void __iomem *global;
 };
 
-/* general register accessors */
-#define enetc_rd_reg(reg)      ioread32((reg))
-#define enetc_wr_reg(reg, val) iowrite32((val), (reg))
+/* ENETC register accessors */
+
+/* MDIO issue workaround (on LS1028A) -
+ * Due to a hardware issue, an access to MDIO registers
+ * that is concurrent with other ENETC register accesses
+ * may lead to the MDIO access being dropped or corrupted.
+ * To protect the MDIO accesses a readers-writers locking
+ * scheme is used, where the MDIO register accesses are
+ * protected by write locks to insure exclusivity, while
+ * the remaining ENETC registers are accessed under read
+ * locks since they only compete with MDIO accesses.
+ */
+extern rwlock_t enetc_mdio_lock;
+
+/* use this locking primitive only on the fast datapath to
+ * group together multiple non-MDIO register accesses to
+ * minimize the overhead of the lock
+ */
+static inline void enetc_lock_mdio(void)
+{
+       read_lock(&enetc_mdio_lock);
+}
+
+static inline void enetc_unlock_mdio(void)
+{
+       read_unlock(&enetc_mdio_lock);
+}
+
+/* use these accessors only on the fast datapath under
+ * the enetc_lock_mdio() locking primitive to minimize
+ * the overhead of the lock
+ */
+static inline u32 enetc_rd_reg_hot(void __iomem *reg)
+{
+       lockdep_assert_held(&enetc_mdio_lock);
+
+       return ioread32(reg);
+}
+
+static inline void enetc_wr_reg_hot(void __iomem *reg, u32 val)
+{
+       lockdep_assert_held(&enetc_mdio_lock);
+
+       iowrite32(val, reg);
+}
+
+/* internal helpers for the MDIO w/a */
+static inline u32 _enetc_rd_reg_wa(void __iomem *reg)
+{
+       u32 val;
+
+       enetc_lock_mdio();
+       val = ioread32(reg);
+       enetc_unlock_mdio();
+
+       return val;
+}
+
+static inline void _enetc_wr_reg_wa(void __iomem *reg, u32 val)
+{
+       enetc_lock_mdio();
+       iowrite32(val, reg);
+       enetc_unlock_mdio();
+}
+
+static inline u32 _enetc_rd_mdio_reg_wa(void __iomem *reg)
+{
+       unsigned long flags;
+       u32 val;
+
+       write_lock_irqsave(&enetc_mdio_lock, flags);
+       val = ioread32(reg);
+       write_unlock_irqrestore(&enetc_mdio_lock, flags);
+
+       return val;
+}
+
+static inline void _enetc_wr_mdio_reg_wa(void __iomem *reg, u32 val)
+{
+       unsigned long flags;
+
+       write_lock_irqsave(&enetc_mdio_lock, flags);
+       iowrite32(val, reg);
+       write_unlock_irqrestore(&enetc_mdio_lock, flags);
+}
+
 #ifdef ioread64
-#define enetc_rd_reg64(reg)    ioread64((reg))
+static inline u64 _enetc_rd_reg64(void __iomem *reg)
+{
+       return ioread64(reg);
+}
 #else
 /* using this to read out stats on 32b systems */
-static inline u64 enetc_rd_reg64(void __iomem *reg)
+static inline u64 _enetc_rd_reg64(void __iomem *reg)
 {
        u32 low, high, tmp;
 
@@ -345,12 +435,29 @@ static inline u64 enetc_rd_reg64(void __iomem *reg)
 }
 #endif
 
+static inline u64 _enetc_rd_reg64_wa(void __iomem *reg)
+{
+       u64 val;
+
+       enetc_lock_mdio();
+       val = _enetc_rd_reg64(reg);
+       enetc_unlock_mdio();
+
+       return val;
+}
+
+/* general register accessors */
+#define enetc_rd_reg(reg)              _enetc_rd_reg_wa((reg))
+#define enetc_wr_reg(reg, val)         _enetc_wr_reg_wa((reg), (val))
 #define enetc_rd(hw, off)              enetc_rd_reg((hw)->reg + (off))
 #define enetc_wr(hw, off, val)         enetc_wr_reg((hw)->reg + (off), val)
-#define enetc_rd64(hw, off)            enetc_rd_reg64((hw)->reg + (off))
+#define enetc_rd64(hw, off)            _enetc_rd_reg64_wa((hw)->reg + (off))
 /* port register accessors - PF only */
 #define enetc_port_rd(hw, off)         enetc_rd_reg((hw)->port + (off))
 #define enetc_port_wr(hw, off, val)    enetc_wr_reg((hw)->port + (off), val)
+#define enetc_port_rd_mdio(hw, off)    _enetc_rd_mdio_reg_wa((hw)->port + (off))
+#define enetc_port_wr_mdio(hw, off, val)       _enetc_wr_mdio_reg_wa(\
+                                                       (hw)->port + (off), val)
 /* global register accessors - PF only */
 #define enetc_global_rd(hw, off)       enetc_rd_reg((hw)->global + (off))
 #define enetc_global_wr(hw, off, val)  enetc_wr_reg((hw)->global + (off), val)
index 48c32a1..ee0116e 100644 (file)
 
 static inline u32 _enetc_mdio_rd(struct enetc_mdio_priv *mdio_priv, int off)
 {
-       return enetc_port_rd(mdio_priv->hw, mdio_priv->mdio_base + off);
+       return enetc_port_rd_mdio(mdio_priv->hw, mdio_priv->mdio_base + off);
 }
 
 static inline void _enetc_mdio_wr(struct enetc_mdio_priv *mdio_priv, int off,
                                  u32 val)
 {
-       enetc_port_wr(mdio_priv->hw, mdio_priv->mdio_base + off, val);
+       enetc_port_wr_mdio(mdio_priv->hw, mdio_priv->mdio_base + off, val);
 }
 
 #define enetc_mdio_rd(mdio_priv, off) \
@@ -174,3 +174,7 @@ struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs)
        return hw;
 }
 EXPORT_SYMBOL_GPL(enetc_hw_alloc);
+
+/* Lock for MDIO access errata on LS1028A */
+DEFINE_RWLOCK(enetc_mdio_lock);
+EXPORT_SYMBOL_GPL(enetc_mdio_lock);
index 827f74e..dbceb99 100644 (file)
@@ -92,18 +92,8 @@ static int enetc_setup_taprio(struct net_device *ndev,
        gcl_config->atc = 0xff;
        gcl_config->acl_len = cpu_to_le16(gcl_len);
 
-       if (!admin_conf->base_time) {
-               gcl_data->btl =
-                       cpu_to_le32(enetc_rd(&priv->si->hw, ENETC_SICTR0));
-               gcl_data->bth =
-                       cpu_to_le32(enetc_rd(&priv->si->hw, ENETC_SICTR1));
-       } else {
-               gcl_data->btl =
-                       cpu_to_le32(lower_32_bits(admin_conf->base_time));
-               gcl_data->bth =
-                       cpu_to_le32(upper_32_bits(admin_conf->base_time));
-       }
-
+       gcl_data->btl = cpu_to_le32(lower_32_bits(admin_conf->base_time));
+       gcl_data->bth = cpu_to_le32(upper_32_bits(admin_conf->base_time));
        gcl_data->ct = cpu_to_le32(admin_conf->cycle_time);
        gcl_data->cte = cpu_to_le32(admin_conf->cycle_time_extension);
 
index d791955..04f24c6 100644 (file)
@@ -1808,7 +1808,7 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
        int ret = 0, frame_start, frame_addr, frame_op;
        bool is_c45 = !!(regnum & MII_ADDR_C45);
 
-       ret = pm_runtime_get_sync(dev);
+       ret = pm_runtime_resume_and_get(dev);
        if (ret < 0)
                return ret;
 
@@ -1867,11 +1867,9 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
        int ret, frame_start, frame_addr;
        bool is_c45 = !!(regnum & MII_ADDR_C45);
 
-       ret = pm_runtime_get_sync(dev);
+       ret = pm_runtime_resume_and_get(dev);
        if (ret < 0)
                return ret;
-       else
-               ret = 0;
 
        if (is_c45) {
                frame_start = FEC_MMFR_ST_C45;
@@ -2275,7 +2273,7 @@ static void fec_enet_get_regs(struct net_device *ndev,
        u32 i, off;
        int ret;
 
-       ret = pm_runtime_get_sync(dev);
+       ret = pm_runtime_resume_and_get(dev);
        if (ret < 0)
                return;
 
@@ -2976,7 +2974,7 @@ fec_enet_open(struct net_device *ndev)
        int ret;
        bool reset_again;
 
-       ret = pm_runtime_get_sync(&fep->pdev->dev);
+       ret = pm_runtime_resume_and_get(&fep->pdev->dev);
        if (ret < 0)
                return ret;
 
@@ -3770,7 +3768,7 @@ fec_drv_remove(struct platform_device *pdev)
        struct device_node *np = pdev->dev.of_node;
        int ret;
 
-       ret = pm_runtime_get_sync(&pdev->dev);
+       ret = pm_runtime_resume_and_get(&pdev->dev);
        if (ret < 0)
                return ret;
 
index 3415018..48bf808 100644 (file)
@@ -4,6 +4,7 @@ config FSL_FMAN
        depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST
        select GENERIC_ALLOCATOR
        select PHYLIB
+       select CRC32
        default n
        help
                Freescale Data-Path Acceleration Architecture Frame Manager
index a9066e6..ca2ab6c 100644 (file)
@@ -35,8 +35,6 @@
 
 #define HCLGE_DBG_DFX_SSU_2_OFFSET 12
 
-#pragma pack(1)
-
 struct hclge_qos_pri_map_cmd {
        u8 pri0_tc  : 4,
           pri1_tc  : 4;
@@ -85,8 +83,6 @@ struct hclge_dbg_reg_type_info {
        struct hclge_dbg_reg_common_msg reg_msg;
 };
 
-#pragma pack()
-
 static const struct hclge_dbg_dfx_message hclge_dbg_bios_common_reg[] = {
        {false, "Reserved"},
        {true,  "BP_CPU_STATE"},
index da15913..da9450f 100644 (file)
@@ -834,7 +834,7 @@ static void release_napi(struct ibmvnic_adapter *adapter)
 static int ibmvnic_login(struct net_device *netdev)
 {
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-       unsigned long timeout = msecs_to_jiffies(30000);
+       unsigned long timeout = msecs_to_jiffies(20000);
        int retry_count = 0;
        int retries = 10;
        bool retry;
@@ -850,10 +850,8 @@ static int ibmvnic_login(struct net_device *netdev)
                adapter->init_done_rc = 0;
                reinit_completion(&adapter->init_done);
                rc = send_login(adapter);
-               if (rc) {
-                       netdev_warn(netdev, "Unable to login\n");
+               if (rc)
                        return rc;
-               }
 
                if (!wait_for_completion_timeout(&adapter->init_done,
                                                 timeout)) {
@@ -940,7 +938,7 @@ static void release_resources(struct ibmvnic_adapter *adapter)
 static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
 {
        struct net_device *netdev = adapter->netdev;
-       unsigned long timeout = msecs_to_jiffies(30000);
+       unsigned long timeout = msecs_to_jiffies(20000);
        union ibmvnic_crq crq;
        bool resend;
        int rc;
@@ -1857,7 +1855,7 @@ static int do_change_param_reset(struct ibmvnic_adapter *adapter,
        if (reset_state == VNIC_OPEN) {
                rc = __ibmvnic_close(netdev);
                if (rc)
-                       return rc;
+                       goto out;
        }
 
        release_resources(adapter);
@@ -1875,24 +1873,25 @@ static int do_change_param_reset(struct ibmvnic_adapter *adapter,
        }
 
        rc = ibmvnic_reset_init(adapter, true);
-       if (rc)
-               return IBMVNIC_INIT_FAILED;
+       if (rc) {
+               rc = IBMVNIC_INIT_FAILED;
+               goto out;
+       }
 
        /* If the adapter was in PROBE state prior to the reset,
         * exit here.
         */
        if (reset_state == VNIC_PROBED)
-               return 0;
+               goto out;
 
        rc = ibmvnic_login(netdev);
        if (rc) {
-               adapter->state = reset_state;
-               return rc;
+               goto out;
        }
 
        rc = init_resources(adapter);
        if (rc)
-               return rc;
+               goto out;
 
        ibmvnic_disable_irqs(adapter);
 
@@ -1902,8 +1901,10 @@ static int do_change_param_reset(struct ibmvnic_adapter *adapter,
                return 0;
 
        rc = __ibmvnic_open(netdev);
-       if (rc)
-               return IBMVNIC_OPEN_FAILED;
+       if (rc) {
+               rc = IBMVNIC_OPEN_FAILED;
+               goto out;
+       }
 
        /* refresh device's multicast list */
        ibmvnic_set_multi(netdev);
@@ -1912,7 +1913,10 @@ static int do_change_param_reset(struct ibmvnic_adapter *adapter,
        for (i = 0; i < adapter->req_rx_queues; i++)
                napi_schedule(&adapter->napi[i]);
 
-       return 0;
+out:
+       if (rc)
+               adapter->state = reset_state;
+       return rc;
 }
 
 /**
@@ -2015,7 +2019,6 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 
                rc = ibmvnic_login(netdev);
                if (rc) {
-                       adapter->state = reset_state;
                        goto out;
                }
 
@@ -2074,12 +2077,18 @@ static int do_reset(struct ibmvnic_adapter *adapter,
        for (i = 0; i < adapter->req_rx_queues; i++)
                napi_schedule(&adapter->napi[i]);
 
-       if (adapter->reset_reason != VNIC_RESET_FAILOVER)
+       if (adapter->reset_reason == VNIC_RESET_FAILOVER ||
+           adapter->reset_reason == VNIC_RESET_MOBILITY) {
                call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev);
+               call_netdevice_notifiers(NETDEV_RESEND_IGMP, netdev);
+       }
 
        rc = 0;
 
 out:
+       /* restore the adapter state if reset failed */
+       if (rc)
+               adapter->state = reset_state;
        rtnl_unlock();
 
        return rc;
@@ -2112,40 +2121,46 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
        if (rc) {
                netdev_err(adapter->netdev,
                           "Couldn't initialize crq. rc=%d\n", rc);
-               return rc;
+               goto out;
        }
 
        rc = ibmvnic_reset_init(adapter, false);
        if (rc)
-               return rc;
+               goto out;
 
        /* If the adapter was in PROBE state prior to the reset,
         * exit here.
         */
        if (reset_state == VNIC_PROBED)
-               return 0;
+               goto out;
 
        rc = ibmvnic_login(netdev);
-       if (rc) {
-               adapter->state = VNIC_PROBED;
-               return 0;
-       }
+       if (rc)
+               goto out;
 
        rc = init_resources(adapter);
        if (rc)
-               return rc;
+               goto out;
 
        ibmvnic_disable_irqs(adapter);
        adapter->state = VNIC_CLOSED;
 
        if (reset_state == VNIC_CLOSED)
-               return 0;
+               goto out;
 
        rc = __ibmvnic_open(netdev);
-       if (rc)
-               return IBMVNIC_OPEN_FAILED;
+       if (rc) {
+               rc = IBMVNIC_OPEN_FAILED;
+               goto out;
+       }
 
-       return 0;
+       call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev);
+       call_netdevice_notifiers(NETDEV_RESEND_IGMP, netdev);
+out:
+       /* restore adapter state if reset failed */
+       if (rc)
+               adapter->state = reset_state;
+       return rc;
 }
 
 static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter)
@@ -2167,17 +2182,6 @@ static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter)
        return rwi;
 }
 
-static void free_all_rwi(struct ibmvnic_adapter *adapter)
-{
-       struct ibmvnic_rwi *rwi;
-
-       rwi = get_next_rwi(adapter);
-       while (rwi) {
-               kfree(rwi);
-               rwi = get_next_rwi(adapter);
-       }
-}
-
 static void __ibmvnic_reset(struct work_struct *work)
 {
        struct ibmvnic_rwi *rwi;
@@ -2209,7 +2213,6 @@ static void __ibmvnic_reset(struct work_struct *work)
 
                if (!saved_state) {
                        reset_state = adapter->state;
-                       adapter->state = VNIC_RESETTING;
                        saved_state = true;
                }
                spin_unlock_irqrestore(&adapter->state_lock, flags);
@@ -2236,20 +2239,23 @@ static void __ibmvnic_reset(struct work_struct *work)
                                rc = do_hard_reset(adapter, rwi, reset_state);
                                rtnl_unlock();
                        }
+                       if (rc) {
+                               /* give backing device time to settle down */
+                               netdev_dbg(adapter->netdev,
+                                          "[S:%d] Hard reset failed, waiting 60 secs\n",
+                                          adapter->state);
+                               set_current_state(TASK_UNINTERRUPTIBLE);
+                               schedule_timeout(60 * HZ);
+                       }
                } else if (!(rwi->reset_reason == VNIC_RESET_FATAL &&
                                adapter->from_passive_init)) {
                        rc = do_reset(adapter, rwi, reset_state);
                }
                kfree(rwi);
-               if (rc == IBMVNIC_OPEN_FAILED) {
-                       if (list_empty(&adapter->rwi_list))
-                               adapter->state = VNIC_CLOSED;
-                       else
-                               adapter->state = reset_state;
-                       rc = 0;
-               } else if (rc && rc != IBMVNIC_INIT_FAILED &&
-                   !adapter->force_reset_recovery)
-                       break;
+               adapter->last_reset_time = jiffies;
+
+               if (rc)
+                       netdev_dbg(adapter->netdev, "Reset failed, rc=%d\n", rc);
 
                rwi = get_next_rwi(adapter);
 
@@ -2263,11 +2269,6 @@ static void __ibmvnic_reset(struct work_struct *work)
                complete(&adapter->reset_done);
        }
 
-       if (rc) {
-               netdev_dbg(adapter->netdev, "Reset failed\n");
-               free_all_rwi(adapter);
-       }
-
        clear_bit_unlock(0, &adapter->resetting);
 }
 
@@ -2350,6 +2351,18 @@ static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
        struct ibmvnic_adapter *adapter = netdev_priv(dev);
 
+       if (test_bit(0, &adapter->resetting)) {
+               netdev_err(adapter->netdev,
+                          "Adapter is resetting, skip timeout reset\n");
+               return;
+       }
+       /* No queuing up reset until at least 5 seconds (default watchdog val)
+        * after last reset
+        */
+       if (time_before(jiffies, (adapter->last_reset_time + dev->watchdog_timeo))) {
+               netdev_dbg(dev, "Not yet time to tx timeout.\n");
+               return;
+       }
        ibmvnic_reset(adapter, VNIC_RESET_TIMEOUT);
 }
 
@@ -2391,6 +2404,12 @@ restart_poll:
 
                if (!pending_scrq(adapter, adapter->rx_scrq[scrq_num]))
                        break;
+               /* The queue entry at the current index is peeked at above
+                * to determine that there is a valid descriptor awaiting
+                * processing. We want to be sure that the current slot
+                * holds a valid descriptor before reading its contents.
+                */
+               dma_rmb();
                next = ibmvnic_next_scrq(adapter, adapter->rx_scrq[scrq_num]);
                rx_buff =
                    (struct ibmvnic_rx_buff *)be64_to_cpu(next->
@@ -2849,15 +2868,26 @@ static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter,
 {
        int rc;
 
+       if (!scrq) {
+               netdev_dbg(adapter->netdev,
+                          "Invalid scrq reset. irq (%d) or msgs (%p).\n",
+                          scrq->irq, scrq->msgs);
+               return -EINVAL;
+       }
+
        if (scrq->irq) {
                free_irq(scrq->irq, scrq);
                irq_dispose_mapping(scrq->irq);
                scrq->irq = 0;
        }
-
-       memset(scrq->msgs, 0, 4 * PAGE_SIZE);
-       atomic_set(&scrq->used, 0);
-       scrq->cur = 0;
+       if (scrq->msgs) {
+               memset(scrq->msgs, 0, 4 * PAGE_SIZE);
+               atomic_set(&scrq->used, 0);
+               scrq->cur = 0;
+       } else {
+               netdev_dbg(adapter->netdev, "Invalid scrq reset\n");
+               return -EINVAL;
+       }
 
        rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token,
                           4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq);
@@ -2868,6 +2898,9 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter)
 {
        int i, rc;
 
+       if (!adapter->tx_scrq || !adapter->rx_scrq)
+               return -EINVAL;
+
        for (i = 0; i < adapter->req_tx_queues; i++) {
                netdev_dbg(adapter->netdev, "Re-setting tx_scrq[%d]\n", i);
                rc = reset_one_sub_crq_queue(adapter, adapter->tx_scrq[i]);
@@ -3086,13 +3119,18 @@ restart_loop:
                unsigned int pool = scrq->pool_index;
                int num_entries = 0;
 
+               /* The queue entry at the current index is peeked at above
+                * to determine that there is a valid descriptor awaiting
+                * processing. We want to be sure that the current slot
+                * holds a valid descriptor before reading its contents.
+                */
+               dma_rmb();
+
                next = ibmvnic_next_scrq(adapter, scrq);
                for (i = 0; i < next->tx_comp.num_comps; i++) {
-                       if (next->tx_comp.rcs[i]) {
+                       if (next->tx_comp.rcs[i])
                                dev_err(dev, "tx error %x\n",
                                        next->tx_comp.rcs[i]);
-                               continue;
-                       }
                        index = be32_to_cpu(next->tx_comp.correlators[i]);
                        if (index & IBMVNIC_TSO_POOL_MASK) {
                                tx_pool = &adapter->tso_pool[pool];
@@ -3486,6 +3524,11 @@ static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter,
        }
        spin_unlock_irqrestore(&scrq->lock, flags);
 
+       /* Ensure that the entire buffer descriptor has been
+        * loaded before reading its contents
+        */
+       dma_rmb();
+
        return entry;
 }
 
@@ -3707,15 +3750,16 @@ static int send_login(struct ibmvnic_adapter *adapter)
        struct ibmvnic_login_rsp_buffer *login_rsp_buffer;
        struct ibmvnic_login_buffer *login_buffer;
        struct device *dev = &adapter->vdev->dev;
+       struct vnic_login_client_data *vlcd;
        dma_addr_t rsp_buffer_token;
        dma_addr_t buffer_token;
        size_t rsp_buffer_size;
        union ibmvnic_crq crq;
+       int client_data_len;
        size_t buffer_size;
        __be64 *tx_list_p;
        __be64 *rx_list_p;
-       int client_data_len;
-       struct vnic_login_client_data *vlcd;
+       int rc;
        int i;
 
        if (!adapter->tx_scrq || !adapter->rx_scrq) {
@@ -3819,16 +3863,25 @@ static int send_login(struct ibmvnic_adapter *adapter)
        crq.login.cmd = LOGIN;
        crq.login.ioba = cpu_to_be32(buffer_token);
        crq.login.len = cpu_to_be32(buffer_size);
-       ibmvnic_send_crq(adapter, &crq);
+
+       adapter->login_pending = true;
+       rc = ibmvnic_send_crq(adapter, &crq);
+       if (rc) {
+               adapter->login_pending = false;
+               netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc);
+               goto buf_rsp_map_failed;
+       }
 
        return 0;
 
 buf_rsp_map_failed:
        kfree(login_rsp_buffer);
+       adapter->login_rsp_buf = NULL;
 buf_rsp_alloc_failed:
        dma_unmap_single(dev, buffer_token, buffer_size, DMA_TO_DEVICE);
 buf_map_failed:
        kfree(login_buffer);
+       adapter->login_buf = NULL;
 buf_alloc_failed:
        return -1;
 }
@@ -4371,6 +4424,15 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
        u64 *size_array;
        int i;
 
+       /* CHECK: Test/set of login_pending does not need to be atomic
+        * because only ibmvnic_tasklet tests/clears this.
+        */
+       if (!adapter->login_pending) {
+               netdev_warn(netdev, "Ignoring unexpected login response\n");
+               return 0;
+       }
+       adapter->login_pending = false;
+
        dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
                         DMA_TO_DEVICE);
        dma_unmap_single(dev, adapter->login_rsp_buf_token,
@@ -4400,7 +4462,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
             adapter->req_rx_add_queues !=
             be32_to_cpu(login_rsp->num_rxadd_subcrqs))) {
                dev_err(dev, "FATAL: Inconsistent login and login rsp\n");
-               ibmvnic_remove(adapter->vdev);
+               ibmvnic_reset(adapter, VNIC_RESET_FATAL);
                return -EIO;
        }
        size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
@@ -4742,6 +4804,11 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
                case IBMVNIC_CRQ_INIT:
                        dev_info(dev, "Partner initialized\n");
                        adapter->from_passive_init = true;
+                       /* Discard any stale login responses from prev reset.
+                        * CHECK: should we clear even on INIT_COMPLETE?
+                        */
+                       adapter->login_pending = false;
+
                        if (!completion_done(&adapter->init_done)) {
                                complete(&adapter->init_done);
                                adapter->init_done_rc = -EIO;
@@ -4958,6 +5025,9 @@ static int ibmvnic_reset_crq(struct ibmvnic_adapter *adapter)
        } while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 
        /* Clean out the queue */
+       if (!crq->msgs)
+               return -EINVAL;
+
        memset(crq->msgs, 0, PAGE_SIZE);
        crq->cur = 0;
        crq->active = false;
@@ -5076,7 +5146,7 @@ map_failed:
 static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
 {
        struct device *dev = &adapter->vdev->dev;
-       unsigned long timeout = msecs_to_jiffies(30000);
+       unsigned long timeout = msecs_to_jiffies(20000);
        u64 old_num_rx_queues, old_num_tx_queues;
        int rc;
 
@@ -5171,6 +5241,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
        dev_set_drvdata(&dev->dev, netdev);
        adapter->vdev = dev;
        adapter->netdev = netdev;
+       adapter->login_pending = false;
 
        ether_addr_copy(adapter->mac_addr, mac_addr_p);
        ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
@@ -5234,7 +5305,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
        adapter->state = VNIC_PROBED;
 
        adapter->wait_for_reset = false;
-
+       adapter->last_reset_time = jiffies;
        return 0;
 
 ibmvnic_register_fail:
@@ -5262,7 +5333,7 @@ static int ibmvnic_remove(struct vio_dev *dev)
        unsigned long flags;
 
        spin_lock_irqsave(&adapter->state_lock, flags);
-       if (adapter->state == VNIC_RESETTING) {
+       if (test_bit(0, &adapter->resetting)) {
                spin_unlock_irqrestore(&adapter->state_lock, flags);
                return -EBUSY;
        }
index 217dcc7..21e7ea8 100644 (file)
@@ -942,8 +942,7 @@ enum vnic_state {VNIC_PROBING = 1,
                 VNIC_CLOSING,
                 VNIC_CLOSED,
                 VNIC_REMOVING,
-                VNIC_REMOVED,
-                VNIC_RESETTING};
+                VNIC_REMOVED};
 
 enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1,
                           VNIC_RESET_MOBILITY,
@@ -1087,6 +1086,9 @@ struct ibmvnic_adapter {
        struct delayed_work ibmvnic_delayed_reset;
        unsigned long resetting;
        bool napi_enabled, from_passive_init;
+       bool login_pending;
+       /* last device reset time */
+       unsigned long last_reset_time;
 
        bool failover_pending;
        bool force_reset_recovery;
index b30f008..128ab68 100644 (file)
@@ -6475,13 +6475,13 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter)
 
        /* Ungate PGCB clock */
        mac_data = er32(FEXTNVM9);
-       mac_data |= BIT(28);
+       mac_data &= ~BIT(28);
        ew32(FEXTNVM9, mac_data);
 
        /* Enable K1 off to enable mPHY Power Gating */
        mac_data = er32(FEXTNVM6);
        mac_data |= BIT(31);
-       ew32(FEXTNVM12, mac_data);
+       ew32(FEXTNVM6, mac_data);
 
        /* Enable mPHY power gating for any link and speed */
        mac_data = er32(FEXTNVM8);
@@ -6525,11 +6525,11 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
        /* Disable K1 off */
        mac_data = er32(FEXTNVM6);
        mac_data &= ~BIT(31);
-       ew32(FEXTNVM12, mac_data);
+       ew32(FEXTNVM6, mac_data);
 
        /* Disable Ungate PGCB clock */
        mac_data = er32(FEXTNVM9);
-       mac_data &= ~BIT(28);
+       mac_data |= BIT(28);
        ew32(FEXTNVM9, mac_data);
 
        /* Cancel not waking from dynamic
index 537300e..d231a2c 100644 (file)
@@ -140,6 +140,7 @@ enum i40e_state_t {
        __I40E_CLIENT_RESET,
        __I40E_VIRTCHNL_OP_PENDING,
        __I40E_RECOVERY_MODE,
+       __I40E_VF_RESETS_DISABLED,      /* disable resets during i40e_remove */
        /* This must be last as it determines the size of the BITMAP */
        __I40E_STATE_SIZE__,
 };
index 4f8a215..1337686 100644 (file)
@@ -4010,8 +4010,16 @@ static irqreturn_t i40e_intr(int irq, void *data)
        }
 
        if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) {
-               ena_mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK;
-               set_bit(__I40E_VFLR_EVENT_PENDING, pf->state);
+               /* disable any further VFLR event notifications */
+               if (test_bit(__I40E_VF_RESETS_DISABLED, pf->state)) {
+                       u32 reg = rd32(hw, I40E_PFINT_ICR0_ENA);
+
+                       reg &= ~I40E_PFINT_ICR0_VFLR_MASK;
+                       wr32(hw, I40E_PFINT_ICR0_ENA, reg);
+               } else {
+                       ena_mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK;
+                       set_bit(__I40E_VFLR_EVENT_PENDING, pf->state);
+               }
        }
 
        if (icr0 & I40E_PFINT_ICR0_GRST_MASK) {
@@ -15311,6 +15319,11 @@ static void i40e_remove(struct pci_dev *pdev)
        while (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
                usleep_range(1000, 2000);
 
+       if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
+               set_bit(__I40E_VF_RESETS_DISABLED, pf->state);
+               i40e_free_vfs(pf);
+               pf->flags &= ~I40E_FLAG_SRIOV_ENABLED;
+       }
        /* no more scheduling of any task */
        set_bit(__I40E_SUSPENDED, pf->state);
        set_bit(__I40E_DOWN, pf->state);
@@ -15337,11 +15350,6 @@ static void i40e_remove(struct pci_dev *pdev)
         */
        i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
 
-       if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
-               i40e_free_vfs(pf);
-               pf->flags &= ~I40E_FLAG_SRIOV_ENABLED;
-       }
-
        i40e_fdir_teardown(pf);
 
        /* If there is a switch structure or any orphans, remove them.
index d43ce13..3f5825f 100644 (file)
@@ -1850,6 +1850,7 @@ static inline bool i40e_page_is_reusable(struct page *page)
  * the adapter for another receive
  *
  * @rx_buffer: buffer containing the page
+ * @rx_buffer_pgcnt: buffer page refcount pre xdp_do_redirect() call
  *
  * If page is reusable, rx_buffer->page_offset is adjusted to point to
  * an unused region in the page.
@@ -1872,7 +1873,8 @@ static inline bool i40e_page_is_reusable(struct page *page)
  *
  * In either case, if the page is reusable its refcount is increased.
  **/
-static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
+static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
+                                  int rx_buffer_pgcnt)
 {
        unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
        struct page *page = rx_buffer->page;
@@ -1883,7 +1885,7 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
 
 #if (PAGE_SIZE < 8192)
        /* if we are only owner of page we can reuse it */
-       if (unlikely((page_count(page) - pagecnt_bias) > 1))
+       if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
                return false;
 #else
 #define I40E_LAST_OFFSET \
@@ -1942,16 +1944,24 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
  * i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use
  * @rx_ring: rx descriptor ring to transact packets on
  * @size: size of buffer to add to skb
+ * @rx_buffer_pgcnt: buffer page refcount
  *
  * This function will pull an Rx buffer from the ring and synchronize it
  * for use by the CPU.
  */
 static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
-                                                const unsigned int size)
+                                                const unsigned int size,
+                                                int *rx_buffer_pgcnt)
 {
        struct i40e_rx_buffer *rx_buffer;
 
        rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
+       *rx_buffer_pgcnt =
+#if (PAGE_SIZE < 8192)
+               page_count(rx_buffer->page);
+#else
+               0;
+#endif
        prefetch_page_address(rx_buffer->page);
 
        /* we are reusing so sync this buffer for CPU use */
@@ -2102,14 +2112,16 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
  * i40e_put_rx_buffer - Clean up used buffer and either recycle or free
  * @rx_ring: rx descriptor ring to transact packets on
  * @rx_buffer: rx buffer to pull data from
+ * @rx_buffer_pgcnt: rx buffer page refcount pre xdp_do_redirect() call
  *
  * This function will clean up the contents of the rx_buffer.  It will
  * either recycle the buffer or unmap it and free the associated resources.
  */
 static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
-                              struct i40e_rx_buffer *rx_buffer)
+                              struct i40e_rx_buffer *rx_buffer,
+                              int rx_buffer_pgcnt)
 {
-       if (i40e_can_reuse_rx_page(rx_buffer)) {
+       if (i40e_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
                /* hand second half of page back to the ring */
                i40e_reuse_rx_page(rx_ring, rx_buffer);
        } else {
@@ -2336,6 +2348,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
        while (likely(total_rx_packets < (unsigned int)budget)) {
                struct i40e_rx_buffer *rx_buffer;
                union i40e_rx_desc *rx_desc;
+               int rx_buffer_pgcnt;
                unsigned int size;
                u64 qword;
 
@@ -2378,7 +2391,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
                        break;
 
                i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb);
-               rx_buffer = i40e_get_rx_buffer(rx_ring, size);
+               rx_buffer = i40e_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt);
 
                /* retrieve a buffer from the ring */
                if (!skb) {
@@ -2421,7 +2434,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
                        break;
                }
 
-               i40e_put_rx_buffer(rx_ring, rx_buffer);
+               i40e_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt);
                cleaned_count++;
 
                if (i40e_is_non_eop(rx_ring, rx_desc, skb))
index 4919d22..1b5390e 100644 (file)
@@ -1403,7 +1403,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf)
  * @vf: pointer to the VF structure
  * @flr: VFLR was issued or not
  *
- * Returns true if the VF is reset, false otherwise.
+ * Returns true if the VF is in reset, resets successfully, or resets
+ * are disabled and false otherwise.
  **/
 bool i40e_reset_vf(struct i40e_vf *vf, bool flr)
 {
@@ -1413,11 +1414,14 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr)
        u32 reg;
        int i;
 
+       if (test_bit(__I40E_VF_RESETS_DISABLED, pf->state))
+               return true;
+
        /* If the VFs have been disabled, this means something else is
         * resetting the VF, so we shouldn't continue.
         */
        if (test_and_set_bit(__I40E_VF_DISABLE, pf->state))
-               return false;
+               return true;
 
        i40e_trigger_vf_reset(vf, flr);
 
@@ -1581,6 +1585,15 @@ void i40e_free_vfs(struct i40e_pf *pf)
 
        i40e_notify_client_of_vf_enable(pf, 0);
 
+       /* Disable IOV before freeing resources. This lets any VF drivers
+        * running in the host get themselves cleaned up before we yank
+        * the carpet out from underneath their feet.
+        */
+       if (!pci_vfs_assigned(pf->pdev))
+               pci_disable_sriov(pf->pdev);
+       else
+               dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n");
+
        /* Amortize wait time by stopping all VFs at the same time */
        for (i = 0; i < pf->num_alloc_vfs; i++) {
                if (test_bit(I40E_VF_STATE_INIT, &pf->vf[i].vf_states))
@@ -1596,15 +1609,6 @@ void i40e_free_vfs(struct i40e_pf *pf)
                i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[i].lan_vsi_idx]);
        }
 
-       /* Disable IOV before freeing resources. This lets any VF drivers
-        * running in the host get themselves cleaned up before we yank
-        * the carpet out from underneath their feet.
-        */
-       if (!pci_vfs_assigned(pf->pdev))
-               pci_disable_sriov(pf->pdev);
-       else
-               dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n");
-
        /* free up VF resources */
        tmp = pf->num_alloc_vfs;
        pf->num_alloc_vfs = 0;
index eae7526..23eca2f 100644 (file)
@@ -762,13 +762,15 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
 /**
  * ice_can_reuse_rx_page - Determine if page can be reused for another Rx
  * @rx_buf: buffer containing the page
+ * @rx_buf_pgcnt: rx_buf page refcount pre xdp_do_redirect() call
  *
  * If page is reusable, we have a green light for calling ice_reuse_rx_page,
  * which will assign the current buffer to the buffer that next_to_alloc is
  * pointing to; otherwise, the DMA mapping needs to be destroyed and
  * page freed
  */
-static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
+static bool
+ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt)
 {
        unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
        struct page *page = rx_buf->page;
@@ -779,7 +781,7 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
 
 #if (PAGE_SIZE < 8192)
        /* if we are only owner of page we can reuse it */
-       if (unlikely((page_count(page) - pagecnt_bias) > 1))
+       if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1))
                return false;
 #else
 #define ICE_LAST_OFFSET \
@@ -864,17 +866,24 @@ ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf)
  * @rx_ring: Rx descriptor ring to transact packets on
  * @skb: skb to be used
  * @size: size of buffer to add to skb
+ * @rx_buf_pgcnt: rx_buf page refcount
  *
  * This function will pull an Rx buffer from the ring and synchronize it
  * for use by the CPU.
  */
 static struct ice_rx_buf *
 ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
-              const unsigned int size)
+              const unsigned int size, int *rx_buf_pgcnt)
 {
        struct ice_rx_buf *rx_buf;
 
        rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
+       *rx_buf_pgcnt =
+#if (PAGE_SIZE < 8192)
+               page_count(rx_buf->page);
+#else
+               0;
+#endif
        prefetchw(rx_buf->page);
        *skb = rx_buf->skb;
 
@@ -1006,12 +1015,15 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
  * ice_put_rx_buf - Clean up used buffer and either recycle or free
  * @rx_ring: Rx descriptor ring to transact packets on
  * @rx_buf: Rx buffer to pull data from
+ * @rx_buf_pgcnt: Rx buffer page count pre xdp_do_redirect()
  *
  * This function will update next_to_clean and then clean up the contents
  * of the rx_buf. It will either recycle the buffer or unmap it and free
  * the associated resources.
  */
-static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
+static void
+ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+              int rx_buf_pgcnt)
 {
        u16 ntc = rx_ring->next_to_clean + 1;
 
@@ -1022,7 +1034,7 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
        if (!rx_buf)
                return;
 
-       if (ice_can_reuse_rx_page(rx_buf)) {
+       if (ice_can_reuse_rx_page(rx_buf, rx_buf_pgcnt)) {
                /* hand second half of page back to the ring */
                ice_reuse_rx_page(rx_ring, rx_buf);
        } else {
@@ -1097,6 +1109,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
                struct sk_buff *skb;
                unsigned int size;
                u16 stat_err_bits;
+               int rx_buf_pgcnt;
                u16 vlan_tag = 0;
                u8 rx_ptype;
 
@@ -1119,7 +1132,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
                dma_rmb();
 
                if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) {
-                       ice_put_rx_buf(rx_ring, NULL);
+                       ice_put_rx_buf(rx_ring, NULL, 0);
                        cleaned_count++;
                        continue;
                }
@@ -1128,7 +1141,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
                        ICE_RX_FLX_DESC_PKT_LEN_M;
 
                /* retrieve a buffer from the ring */
-               rx_buf = ice_get_rx_buf(rx_ring, &skb, size);
+               rx_buf = ice_get_rx_buf(rx_ring, &skb, size, &rx_buf_pgcnt);
 
                if (!size) {
                        xdp.data = NULL;
@@ -1168,7 +1181,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
                total_rx_pkts++;
 
                cleaned_count++;
-               ice_put_rx_buf(rx_ring, rx_buf);
+               ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt);
                continue;
 construct_skb:
                if (skb) {
@@ -1187,7 +1200,7 @@ construct_skb:
                        break;
                }
 
-               ice_put_rx_buf(rx_ring, rx_buf);
+               ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt);
                cleaned_count++;
 
                /* skip if it is NOP desc */
index 0286d2f..aaa954a 100644 (file)
@@ -138,6 +138,8 @@ struct vf_mac_filter {
 /* this is the size past which hardware will drop packets when setting LPE=0 */
 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
 
+#define IGB_ETH_PKT_HDR_PAD    (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
+
 /* Supported Rx Buffer Sizes */
 #define IGB_RXBUFFER_256       256
 #define IGB_RXBUFFER_1536      1536
@@ -247,6 +249,9 @@ enum igb_tx_flags {
 #define IGB_SFF_ADDRESSING_MODE                0x4
 #define IGB_SFF_8472_UNSUP             0x00
 
+/* TX resources are shared between XDP and netstack
+ * and we need to tag the buffer type to distinguish them
+ */
 enum igb_tx_buf_type {
        IGB_TYPE_SKB = 0,
        IGB_TYPE_XDP,
index 5fc2c38..0d343d0 100644 (file)
@@ -2824,20 +2824,25 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
        }
 }
 
-static int igb_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
+static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf)
 {
-       int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+       int i, frame_size = dev->mtu + IGB_ETH_PKT_HDR_PAD;
        struct igb_adapter *adapter = netdev_priv(dev);
+       struct bpf_prog *prog = bpf->prog, *old_prog;
        bool running = netif_running(dev);
-       struct bpf_prog *old_prog;
        bool need_reset;
 
        /* verify igb ring attributes are sufficient for XDP */
        for (i = 0; i < adapter->num_rx_queues; i++) {
                struct igb_ring *ring = adapter->rx_ring[i];
 
-               if (frame_size > igb_rx_bufsz(ring))
+               if (frame_size > igb_rx_bufsz(ring)) {
+                       NL_SET_ERR_MSG_MOD(bpf->extack,
+                                          "The RX buffer size is too small for the frame size");
+                       netdev_warn(dev, "XDP RX buffer size %d is too small for the frame size %d\n",
+                                   igb_rx_bufsz(ring), frame_size);
                        return -EINVAL;
+               }
        }
 
        old_prog = xchg(&adapter->xdp_prog, prog);
@@ -2869,7 +2874,7 @@ static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 {
        switch (xdp->command) {
        case XDP_SETUP_PROG:
-               return igb_xdp_setup(dev, xdp->prog);
+               return igb_xdp_setup(dev, xdp);
        default:
                return -EINVAL;
        }
@@ -2910,10 +2915,12 @@ static int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp)
         */
        tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL;
        if (unlikely(!tx_ring))
-               return -ENXIO;
+               return IGB_XDP_CONSUMED;
 
        nq = txring_txq(tx_ring);
        __netif_tx_lock(nq, cpu);
+       /* Avoid transmit queue timeout since we share it with the slow path */
+       nq->trans_start = jiffies;
        ret = igb_xmit_xdp_ring(adapter, tx_ring, xdpf);
        __netif_tx_unlock(nq);
 
@@ -2946,6 +2953,9 @@ static int igb_xdp_xmit(struct net_device *dev, int n,
        nq = txring_txq(tx_ring);
        __netif_tx_lock(nq, cpu);
 
+       /* Avoid transmit queue timeout since we share it with the slow path */
+       nq->trans_start = jiffies;
+
        for (i = 0; i < n; i++) {
                struct xdp_frame *xdpf = frames[i];
                int err;
@@ -3950,8 +3960,7 @@ static int igb_sw_init(struct igb_adapter *adapter)
        /* set default work limits */
        adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
 
-       adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
-                                 VLAN_HLEN;
+       adapter->max_frame_size = netdev->mtu + IGB_ETH_PKT_HDR_PAD;
        adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
 
        spin_lock_init(&adapter->nfc_lock);
@@ -6491,7 +6500,7 @@ static void igb_get_stats64(struct net_device *netdev,
 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
-       int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+       int max_frame = new_mtu + IGB_ETH_PKT_HDR_PAD;
 
        if (adapter->xdp_prog) {
                int i;
@@ -6500,7 +6509,9 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
                        struct igb_ring *ring = adapter->rx_ring[i];
 
                        if (max_frame > igb_rx_bufsz(ring)) {
-                               netdev_warn(adapter->netdev, "Requested MTU size is not supported with XDP\n");
+                               netdev_warn(adapter->netdev,
+                                           "Requested MTU size is not supported with XDP. Max frame size is %d\n",
+                                           max_frame);
                                return -EINVAL;
                        }
                }
@@ -8351,6 +8362,7 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
                                SKB_DATA_ALIGN(xdp->data_end -
                                               xdp->data_hard_start);
 #endif
+       unsigned int metasize = xdp->data - xdp->data_meta;
        struct sk_buff *skb;
 
        /* prefetch first cache line of first page */
@@ -8365,6 +8377,9 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
        skb_reserve(skb, xdp->data - xdp->data_hard_start);
        __skb_put(skb, xdp->data_end - xdp->data);
 
+       if (metasize)
+               skb_metadata_set(skb, metasize);
+
        /* pull timestamp out of packet data */
        if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
                igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb);
@@ -8771,7 +8786,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
        rx_ring->skb = skb;
 
        if (xdp_xmit & IGB_XDP_REDIR)
-               xdp_do_flush_map();
+               xdp_do_flush();
 
        if (xdp_xmit & IGB_XDP_TX) {
                struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
index 45ae33e..f3f449f 100644 (file)
@@ -1945,7 +1945,8 @@ static inline bool ixgbe_page_is_reserved(struct page *page)
        return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
 }
 
-static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer)
+static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer,
+                                   int rx_buffer_pgcnt)
 {
        unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
        struct page *page = rx_buffer->page;
@@ -1956,7 +1957,7 @@ static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer)
 
 #if (PAGE_SIZE < 8192)
        /* if we are only owner of page we can reuse it */
-       if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
+       if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
                return false;
 #else
        /* The last offset is a bit aggressive in that we assume the
@@ -2021,11 +2022,18 @@ static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring,
 static struct ixgbe_rx_buffer *ixgbe_get_rx_buffer(struct ixgbe_ring *rx_ring,
                                                   union ixgbe_adv_rx_desc *rx_desc,
                                                   struct sk_buff **skb,
-                                                  const unsigned int size)
+                                                  const unsigned int size,
+                                                  int *rx_buffer_pgcnt)
 {
        struct ixgbe_rx_buffer *rx_buffer;
 
        rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+       *rx_buffer_pgcnt =
+#if (PAGE_SIZE < 8192)
+               page_count(rx_buffer->page);
+#else
+               0;
+#endif
        prefetchw(rx_buffer->page);
        *skb = rx_buffer->skb;
 
@@ -2055,9 +2063,10 @@ skip_sync:
 
 static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
                                struct ixgbe_rx_buffer *rx_buffer,
-                               struct sk_buff *skb)
+                               struct sk_buff *skb,
+                               int rx_buffer_pgcnt)
 {
-       if (ixgbe_can_reuse_rx_page(rx_buffer)) {
+       if (ixgbe_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
                /* hand second half of page back to the ring */
                ixgbe_reuse_rx_page(rx_ring, rx_buffer);
        } else {
@@ -2303,6 +2312,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
                union ixgbe_adv_rx_desc *rx_desc;
                struct ixgbe_rx_buffer *rx_buffer;
                struct sk_buff *skb;
+               int rx_buffer_pgcnt;
                unsigned int size;
 
                /* return some buffers to hardware, one at a time is too slow */
@@ -2322,7 +2332,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
                 */
                dma_rmb();
 
-               rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size);
+               rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size, &rx_buffer_pgcnt);
 
                /* retrieve a buffer from the ring */
                if (!skb) {
@@ -2367,7 +2377,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
                        break;
                }
 
-               ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb);
+               ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb, rx_buffer_pgcnt);
                cleaned_count++;
 
                /* place incomplete frames back on ring for completion */
index 54b0bf5..4a9041e 100644 (file)
@@ -2287,6 +2287,7 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
        dma_sync_single_for_cpu(dev->dev.parent,
                                rx_desc->buf_phys_addr,
                                len, dma_dir);
+       rx_desc->buf_phys_addr = 0;
 
        if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) {
                skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags];
@@ -2295,8 +2296,8 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
                skb_frag_size_set(frag, data_len);
                __skb_frag_set_page(frag, page);
                sinfo->nr_frags++;
-
-               rx_desc->buf_phys_addr = 0;
+       } else {
+               page_pool_put_full_page(rxq->page_pool, page, true);
        }
        *size -= len;
 }
index f6616c8..cea886c 100644 (file)
@@ -4426,6 +4426,7 @@ static int mvpp2_open(struct net_device *dev)
        if (!valid) {
                netdev_err(port->dev,
                           "invalid configuration: no dt or link IRQ");
+               err = -ENOENT;
                goto err_free_irq;
        }
 
index 0f20e07..da4b286 100644 (file)
@@ -318,8 +318,10 @@ static int prestera_port_create(struct prestera_switch *sw, u32 id)
                goto err_port_init;
        }
 
-       if (port->fp_id >= PRESTERA_MAC_ADDR_NUM_MAX)
+       if (port->fp_id >= PRESTERA_MAC_ADDR_NUM_MAX) {
+               err = -EINVAL;
                goto err_port_init;
+       }
 
        /* firmware requires that port's MAC address consist of the first
         * 5 bytes of the base MAC address
index 1b97ada..be56776 100644 (file)
@@ -676,7 +676,8 @@ static int prestera_pci_probe(struct pci_dev *pdev,
        if (err)
                return err;
 
-       if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(30))) {
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(30));
+       if (err) {
                dev_err(&pdev->dev, "fail to set DMA mask\n");
                goto err_dma_mask;
        }
@@ -702,8 +703,10 @@ static int prestera_pci_probe(struct pci_dev *pdev,
        dev_info(fw->dev.dev, "Prestera FW is ready\n");
 
        fw->wq = alloc_workqueue("prestera_fw_wq", WQ_HIGHPRI, 1);
-       if (!fw->wq)
+       if (!fw->wq) {
+               err = -ENOMEM;
                goto err_wq_alloc;
+       }
 
        INIT_WORK(&fw->evt_work, prestera_fw_evt_work_fn);
 
index 1325055..a8641a4 100644 (file)
@@ -966,6 +966,7 @@ static int mtk_star_enable(struct net_device *ndev)
                                      mtk_star_adjust_link, 0, priv->phy_intf);
        if (!priv->phydev) {
                netdev_err(ndev, "failed to connect to PHY\n");
+               ret = -ENODEV;
                goto err_free_irq;
        }
 
@@ -1053,7 +1054,7 @@ static int mtk_star_netdev_start_xmit(struct sk_buff *skb,
 err_drop_packet:
        dev_kfree_skb(skb);
        ndev->stats.tx_dropped++;
-       return NETDEV_TX_BUSY;
+       return NETDEV_TX_OK;
 }
 
 /* Returns the number of bytes sent or a negative number on the first
index 106513f..6f29031 100644 (file)
@@ -1378,8 +1378,10 @@ static void mlx4_en_tx_timeout(struct net_device *dev, unsigned int txqueue)
                tx_ring->cons, tx_ring->prod);
 
        priv->port_stats.tx_timeout++;
-       en_dbg(DRV, priv, "Scheduling watchdog\n");
-       queue_work(mdev->workqueue, &priv->watchdog_task);
+       if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) {
+               en_dbg(DRV, priv, "Scheduling port restart\n");
+               queue_work(mdev->workqueue, &priv->restart_task);
+       }
 }
 
 
@@ -1733,6 +1735,7 @@ int mlx4_en_start_port(struct net_device *dev)
                                mlx4_en_deactivate_cq(priv, cq);
                                goto tx_err;
                        }
+                       clear_bit(MLX4_EN_TX_RING_STATE_RECOVERING, &tx_ring->state);
                        if (t != TX_XDP) {
                                tx_ring->tx_queue = netdev_get_tx_queue(dev, i);
                                tx_ring->recycle_ring = NULL;
@@ -1829,6 +1832,7 @@ int mlx4_en_start_port(struct net_device *dev)
                local_bh_enable();
        }
 
+       clear_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state);
        netif_tx_start_all_queues(dev);
        netif_device_attach(dev);
 
@@ -1999,7 +2003,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 static void mlx4_en_restart(struct work_struct *work)
 {
        struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
-                                                watchdog_task);
+                                                restart_task);
        struct mlx4_en_dev *mdev = priv->mdev;
        struct net_device *dev = priv->dev;
 
@@ -2377,7 +2381,7 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
        if (netif_running(dev)) {
                mutex_lock(&mdev->state_lock);
                if (!mdev->device_up) {
-                       /* NIC is probably restarting - let watchdog task reset
+                       /* NIC is probably restarting - let restart task reset
                         * the port */
                        en_dbg(DRV, priv, "Change MTU called with card down!?\n");
                } else {
@@ -2386,7 +2390,9 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
                        if (err) {
                                en_err(priv, "Failed restarting port:%d\n",
                                         priv->port);
-                               queue_work(mdev->workqueue, &priv->watchdog_task);
+                               if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING,
+                                                     &priv->state))
+                                       queue_work(mdev->workqueue, &priv->restart_task);
                        }
                }
                mutex_unlock(&mdev->state_lock);
@@ -2792,7 +2798,8 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
                if (err) {
                        en_err(priv, "Failed starting port %d for XDP change\n",
                               priv->port);
-                       queue_work(mdev->workqueue, &priv->watchdog_task);
+                       if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state))
+                               queue_work(mdev->workqueue, &priv->restart_task);
                }
        }
 
@@ -3165,7 +3172,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
        priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev);
        spin_lock_init(&priv->stats_lock);
        INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode);
-       INIT_WORK(&priv->watchdog_task, mlx4_en_restart);
+       INIT_WORK(&priv->restart_task, mlx4_en_restart);
        INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate);
        INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats);
        INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task);
index 3ddb726..59b097c 100644 (file)
@@ -392,6 +392,35 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
        return cnt;
 }
 
+static void mlx4_en_handle_err_cqe(struct mlx4_en_priv *priv, struct mlx4_err_cqe *err_cqe,
+                                  u16 cqe_index, struct mlx4_en_tx_ring *ring)
+{
+       struct mlx4_en_dev *mdev = priv->mdev;
+       struct mlx4_en_tx_info *tx_info;
+       struct mlx4_en_tx_desc *tx_desc;
+       u16 wqe_index;
+       int desc_size;
+
+       en_err(priv, "CQE error - cqn 0x%x, ci 0x%x, vendor syndrome: 0x%x syndrome: 0x%x\n",
+              ring->sp_cqn, cqe_index, err_cqe->vendor_err_syndrome, err_cqe->syndrome);
+       print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, err_cqe, sizeof(*err_cqe),
+                      false);
+
+       wqe_index = be16_to_cpu(err_cqe->wqe_index) & ring->size_mask;
+       tx_info = &ring->tx_info[wqe_index];
+       desc_size = tx_info->nr_txbb << LOG_TXBB_SIZE;
+       en_err(priv, "Related WQE - qpn 0x%x, wqe index 0x%x, wqe size 0x%x\n", ring->qpn,
+              wqe_index, desc_size);
+       tx_desc = ring->buf + (wqe_index << LOG_TXBB_SIZE);
+       print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, tx_desc, desc_size, false);
+
+       if (test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state))
+               return;
+
+       en_err(priv, "Scheduling port restart\n");
+       queue_work(mdev->workqueue, &priv->restart_task);
+}
+
 int mlx4_en_process_tx_cq(struct net_device *dev,
                          struct mlx4_en_cq *cq, int napi_budget)
 {
@@ -438,13 +467,10 @@ int mlx4_en_process_tx_cq(struct net_device *dev,
                dma_rmb();
 
                if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
-                            MLX4_CQE_OPCODE_ERROR)) {
-                       struct mlx4_err_cqe *cqe_err = (struct mlx4_err_cqe *)cqe;
-
-                       en_err(priv, "CQE error - vendor syndrome: 0x%x syndrome: 0x%x\n",
-                              cqe_err->vendor_err_syndrome,
-                              cqe_err->syndrome);
-               }
+                            MLX4_CQE_OPCODE_ERROR))
+                       if (!test_and_set_bit(MLX4_EN_TX_RING_STATE_RECOVERING, &ring->state))
+                               mlx4_en_handle_err_cqe(priv, (struct mlx4_err_cqe *)cqe, index,
+                                                      ring);
 
                /* Skip over last polled CQE */
                new_index = be16_to_cpu(cqe->wqe_index) & size_mask;
index f6ff962..f6cfec8 100644 (file)
@@ -1864,8 +1864,8 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 #define         INIT_HCA_LOG_RD_OFFSET          (INIT_HCA_QPC_OFFSET + 0x77)
 #define INIT_HCA_MCAST_OFFSET           0x0c0
 #define         INIT_HCA_MC_BASE_OFFSET         (INIT_HCA_MCAST_OFFSET + 0x00)
-#define         INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12)
-#define         INIT_HCA_LOG_MC_HASH_SZ_OFFSET  (INIT_HCA_MCAST_OFFSET + 0x16)
+#define         INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x13)
+#define         INIT_HCA_LOG_MC_HASH_SZ_OFFSET  (INIT_HCA_MCAST_OFFSET + 0x17)
 #define  INIT_HCA_UC_STEERING_OFFSET    (INIT_HCA_MCAST_OFFSET + 0x18)
 #define         INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b)
 #define  INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN      0x6
@@ -1873,7 +1873,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 #define  INIT_HCA_DRIVER_VERSION_SZ       0x40
 #define  INIT_HCA_FS_PARAM_OFFSET         0x1d0
 #define  INIT_HCA_FS_BASE_OFFSET          (INIT_HCA_FS_PARAM_OFFSET + 0x00)
-#define  INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET  (INIT_HCA_FS_PARAM_OFFSET + 0x12)
+#define  INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET  (INIT_HCA_FS_PARAM_OFFSET + 0x13)
 #define  INIT_HCA_FS_A0_OFFSET           (INIT_HCA_FS_PARAM_OFFSET + 0x18)
 #define  INIT_HCA_FS_LOG_TABLE_SZ_OFFSET  (INIT_HCA_FS_PARAM_OFFSET + 0x1b)
 #define  INIT_HCA_FS_ETH_BITS_OFFSET      (INIT_HCA_FS_PARAM_OFFSET + 0x21)
index 650ae08..8f020f2 100644 (file)
@@ -182,8 +182,8 @@ struct mlx4_init_hca_param {
        u64 cmpt_base;
        u64 mtt_base;
        u64 global_caps;
-       u16 log_mc_entry_sz;
-       u16 log_mc_hash_sz;
+       u8 log_mc_entry_sz;
+       u8 log_mc_hash_sz;
        u16 hca_core_clock; /* Internal Clock Frequency (in MHz) */
        u8  log_num_qps;
        u8  log_num_srqs;
index a46efe3..30378e4 100644 (file)
@@ -271,6 +271,10 @@ struct mlx4_en_page_cache {
        } buf[MLX4_EN_CACHE_SIZE];
 };
 
+enum {
+       MLX4_EN_TX_RING_STATE_RECOVERING,
+};
+
 struct mlx4_en_priv;
 
 struct mlx4_en_tx_ring {
@@ -317,6 +321,7 @@ struct mlx4_en_tx_ring {
         * Only queue_stopped might be used if BQL is not properly working.
         */
        unsigned long           queue_stopped;
+       unsigned long           state;
        struct mlx4_hwq_resources sp_wqres;
        struct mlx4_qp          sp_qp;
        struct mlx4_qp_context  sp_context;
@@ -530,6 +535,10 @@ struct mlx4_en_stats_bitmap {
        struct mutex mutex; /* for mutual access to stats bitmap */
 };
 
+enum {
+       MLX4_EN_STATE_FLAG_RESTARTING,
+};
+
 struct mlx4_en_priv {
        struct mlx4_en_dev *mdev;
        struct mlx4_en_port_profile *prof;
@@ -595,7 +604,7 @@ struct mlx4_en_priv {
        struct mlx4_en_cq *rx_cq[MAX_RX_RINGS];
        struct mlx4_qp drop_qp;
        struct work_struct rx_mode_task;
-       struct work_struct watchdog_task;
+       struct work_struct restart_task;
        struct work_struct linkstate_task;
        struct delayed_work stats_task;
        struct delayed_work service_task;
@@ -641,6 +650,7 @@ struct mlx4_en_priv {
        u32 pflags;
        u8 rss_key[MLX4_EN_RSS_KEY_SIZE];
        u8 rss_hash_fn;
+       unsigned long state;
 };
 
 enum mlx4_en_wol {
index 99f1ec3..3e371d2 100644 (file)
@@ -198,6 +198,7 @@ config MLX5_EN_TLS
 config MLX5_SW_STEERING
        bool "Mellanox Technologies software-managed steering"
        depends on MLX5_CORE_EN && MLX5_ESWITCH
+       select CRC32
        default y
        help
        Build support for software-managed steering in the NIC.
index 3e44e4d..95f2b26 100644 (file)
@@ -187,7 +187,7 @@ static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
        struct mlx5e_priv *priv;
 
        /* A given netdev is not a representor or not a slave of LAG configuration */
-       if (!mlx5e_eswitch_rep(netdev) || !bond_slave_get_rtnl(netdev))
+       if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev))
                return false;
 
        priv = netdev_priv(netdev);
index 97f1594..e51f60b 100644 (file)
@@ -44,6 +44,7 @@ static void accel_fs_tcp_set_ipv4_flow(struct mlx5_flow_spec *spec, struct sock
                         outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
 static void accel_fs_tcp_set_ipv6_flow(struct mlx5_flow_spec *spec, struct sock *sk)
 {
        MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
@@ -63,6 +64,7 @@ static void accel_fs_tcp_set_ipv6_flow(struct mlx5_flow_spec *spec, struct sock
                            outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
               0xff, 16);
 }
+#endif
 
 void mlx5e_accel_fs_del_sk(struct mlx5_flow_handle *rule)
 {
index 0e45590..381a9c8 100644 (file)
@@ -64,13 +64,13 @@ static int rx_err_add_rule(struct mlx5e_priv *priv,
        if (!spec)
                return -ENOMEM;
 
-       /* Action to copy 7 bit ipsec_syndrome to regB[0:6] */
+       /* Action to copy 7 bit ipsec_syndrome to regB[24:30] */
        MLX5_SET(copy_action_in, action, action_type, MLX5_ACTION_TYPE_COPY);
        MLX5_SET(copy_action_in, action, src_field, MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME);
        MLX5_SET(copy_action_in, action, src_offset, 0);
        MLX5_SET(copy_action_in, action, length, 7);
        MLX5_SET(copy_action_in, action, dst_field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
-       MLX5_SET(copy_action_in, action, dst_offset, 0);
+       MLX5_SET(copy_action_in, action, dst_offset, 24);
 
        modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL,
                                              1, action);
@@ -488,13 +488,13 @@ static int rx_add_rule(struct mlx5e_priv *priv,
 
        setup_fte_common(attrs, ipsec_obj_id, spec, &flow_act);
 
-       /* Set 1  bit ipsec marker */
-       /* Set 24 bit ipsec_obj_id */
+       /* Set bit[31] ipsec marker */
+       /* Set bit[23-0] ipsec_obj_id */
        MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
        MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
-       MLX5_SET(set_action_in, action, data, (ipsec_obj_id << 1) | 0x1);
-       MLX5_SET(set_action_in, action, offset, 7);
-       MLX5_SET(set_action_in, action, length, 25);
+       MLX5_SET(set_action_in, action, data, (ipsec_obj_id | BIT(31)));
+       MLX5_SET(set_action_in, action, offset, 0);
+       MLX5_SET(set_action_in, action, length, 32);
 
        modify_hdr = mlx5_modify_header_alloc(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL,
                                              1, action);
index 11e31a3..a9b4560 100644 (file)
@@ -453,7 +453,6 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
                                       struct mlx5_cqe64 *cqe)
 {
        u32 ipsec_meta_data = be32_to_cpu(cqe->ft_metadata);
-       u8 ipsec_syndrome = ipsec_meta_data & 0xFF;
        struct mlx5e_priv *priv;
        struct xfrm_offload *xo;
        struct xfrm_state *xs;
@@ -481,7 +480,7 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
        xo = xfrm_offload(skb);
        xo->flags = CRYPTO_DONE;
 
-       switch (ipsec_syndrome & MLX5_IPSEC_METADATA_SYNDROM_MASK) {
+       switch (MLX5_IPSEC_METADATA_SYNDROM(ipsec_meta_data)) {
        case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED:
                xo->status = CRYPTO_SUCCESS;
                if (WARN_ON_ONCE(priv->ipsec->no_trailer))
index 056dacb..9df9b9a 100644 (file)
 #include "en.h"
 #include "en/txrx.h"
 
-#define MLX5_IPSEC_METADATA_MARKER_MASK      (0x80)
-#define MLX5_IPSEC_METADATA_SYNDROM_MASK     (0x7F)
-#define MLX5_IPSEC_METADATA_HANDLE(metadata) (((metadata) >> 8) & 0xFF)
+/* Bit31: IPsec marker, Bit30-24: IPsec syndrome, Bit23-0: IPsec obj id */
+#define MLX5_IPSEC_METADATA_MARKER(metadata)  (((metadata) >> 31) & 0x1)
+#define MLX5_IPSEC_METADATA_SYNDROM(metadata) (((metadata) >> 24) & GENMASK(6, 0))
+#define MLX5_IPSEC_METADATA_HANDLE(metadata)  ((metadata) & GENMASK(23, 0))
 
 struct mlx5e_accel_tx_ipsec_state {
        struct xfrm_offload *xo;
@@ -78,7 +79,7 @@ static inline unsigned int mlx5e_ipsec_tx_ids_len(struct mlx5e_accel_tx_ipsec_st
 
 static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe)
 {
-       return !!(MLX5_IPSEC_METADATA_MARKER_MASK & be32_to_cpu(cqe->ft_metadata));
+       return MLX5_IPSEC_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata));
 }
 
 static inline bool mlx5e_ipsec_is_tx_flow(struct mlx5e_accel_tx_ipsec_state *ipsec_st)
index 7f6221b..6a1d825 100644 (file)
@@ -476,19 +476,22 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb)
 
        depth += sizeof(struct tcphdr);
 
-       if (unlikely(!sk || sk->sk_state == TCP_TIME_WAIT))
+       if (unlikely(!sk))
                return;
 
-       if (unlikely(!resync_queue_get_psv(sk)))
-               return;
+       if (unlikely(sk->sk_state == TCP_TIME_WAIT))
+               goto unref;
 
-       skb->sk = sk;
-       skb->destructor = sock_edemux;
+       if (unlikely(!resync_queue_get_psv(sk)))
+               goto unref;
 
        seq = th->seq;
        datalen = skb->len - depth;
        tls_offload_rx_resync_async_request_start(sk, seq, datalen);
        rq->stats->tls_resync_req_start++;
+
+unref:
+       sock_gen_put(sk);
 }
 
 void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk,
index 2e2fa04..ce710f2 100644 (file)
@@ -5229,8 +5229,10 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 
        tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
                                 MLX5_FLOW_NAMESPACE_KERNEL);
-       if (IS_ERR(tc->ct))
+       if (IS_ERR(tc->ct)) {
+               err = PTR_ERR(tc->ct);
                goto err_ct;
+       }
 
        tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
        err = register_netdevice_notifier_dev_net(priv->netdev,
index 3b97900..4a2ce24 100644 (file)
@@ -283,6 +283,9 @@ static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
 
        reg_b = be32_to_cpu(cqe->ft_metadata);
 
+       if (reg_b >> (MLX5E_TC_TABLE_CHAIN_TAG_BITS + ZONE_RESTORE_BITS))
+               return false;
+
        chain = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
        if (chain)
                return true;
index 82b4419..d97203c 100644 (file)
@@ -144,7 +144,9 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs)
        memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz);
 }
 
-/* RM 2311217: no L4 inner checksum for IPsec tunnel type packet */
+/* If packet is not IP's CHECKSUM_PARTIAL (e.g. icmd packet),
+ * need to set L3 checksum flag for IPsec
+ */
 static void
 ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
                            struct mlx5_wqe_eth_seg *eseg)
@@ -154,19 +156,15 @@ ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
                eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
                sq->stats->csum_partial_inner++;
        } else {
-               eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
                sq->stats->csum_partial++;
        }
 }
 
 static inline void
-mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
+mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+                           struct mlx5e_accel_tx_state *accel,
+                           struct mlx5_wqe_eth_seg *eseg)
 {
-       if (unlikely(eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC))) {
-               ipsec_txwqe_build_eseg_csum(sq, skb, eseg);
-               return;
-       }
-
        if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
                eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
                if (skb->encapsulation) {
@@ -177,6 +175,14 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct
                        eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
                        sq->stats->csum_partial++;
                }
+#ifdef CONFIG_MLX5_EN_TLS
+       } else if (unlikely(accel && accel->tls.tls_tisn)) {
+               eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
+               sq->stats->csum_partial++;
+#endif
+       } else if (unlikely(eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC))) {
+               ipsec_txwqe_build_eseg_csum(sq, skb, eseg);
+
        } else
                sq->stats->csum_none++;
 }
@@ -608,12 +614,13 @@ void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
 }
 
 static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
-                                  struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
+                                  struct sk_buff *skb, struct mlx5e_accel_tx_state *accel,
+                                  struct mlx5_wqe_eth_seg *eseg)
 {
        if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg)))
                return false;
 
-       mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
+       mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg);
 
        return true;
 }
@@ -640,7 +647,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
                if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) {
                        struct mlx5_wqe_eth_seg eseg = {};
 
-                       if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &eseg)))
+                       if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg)))
                                return NETDEV_TX_OK;
 
                        mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more());
@@ -657,7 +664,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
        /* May update the WQE, but may not post other WQEs. */
        mlx5e_accel_tx_finish(sq, wqe, &accel,
                              (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl));
-       if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &wqe->eth)))
+       if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth)))
                return NETDEV_TX_OK;
 
        mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more());
@@ -676,7 +683,7 @@ void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit
        mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
        pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
        wqe = MLX5E_TX_FETCH_WQE(sq, pi);
-       mlx5e_txwqe_build_eseg_csum(sq, skb, &wqe->eth);
+       mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, &wqe->eth);
        mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, xmit_more);
 }
 
@@ -945,7 +952,7 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 
        mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram);
 
-       mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
+       mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, eseg);
 
        eseg->mss = attr.mss;
 
index e8e6294..d4ee0a9 100644 (file)
@@ -1142,6 +1142,10 @@ int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
        struct mlx5_vport *vport;
 
        vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+       if (!vport->qos.enabled)
+               return -EOPNOTSUPP;
+
        MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
 
        return mlx5_modify_scheduling_element_cmd(esw->dev,
@@ -1408,6 +1412,7 @@ static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw)
        int i;
 
        mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+               memset(&vport->qos, 0, sizeof(vport->qos));
                memset(&vport->info, 0, sizeof(vport->info));
                vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
        }
@@ -2221,12 +2226,15 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
                max_guarantee = evport->info.min_rate;
        }
 
-       return max_t(u32, max_guarantee / fw_max_bw_share, 1);
+       if (max_guarantee)
+               return max_t(u32, max_guarantee / fw_max_bw_share, 1);
+       return 0;
 }
 
-static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider)
+static int normalize_vports_min_rate(struct mlx5_eswitch *esw)
 {
        u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+       u32 divider = calculate_vports_min_rate_divider(esw);
        struct mlx5_vport *evport;
        u32 vport_max_rate;
        u32 vport_min_rate;
@@ -2239,9 +2247,9 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider)
                        continue;
                vport_min_rate = evport->info.min_rate;
                vport_max_rate = evport->info.max_rate;
-               bw_share = MLX5_MIN_BW_SHARE;
+               bw_share = 0;
 
-               if (vport_min_rate)
+               if (divider)
                        bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate,
                                                         divider,
                                                         fw_max_bw_share);
@@ -2266,7 +2274,6 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
        struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
        u32 fw_max_bw_share;
        u32 previous_min_rate;
-       u32 divider;
        bool min_rate_supported;
        bool max_rate_supported;
        int err = 0;
@@ -2291,8 +2298,7 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
 
        previous_min_rate = evport->info.min_rate;
        evport->info.min_rate = min_rate;
-       divider = calculate_vports_min_rate_divider(esw);
-       err = normalize_vports_min_rate(esw, divider);
+       err = normalize_vports_min_rate(esw);
        if (err) {
                evport->info.min_rate = previous_min_rate;
                goto unlock;
index 325a5b0..9fdd992 100644 (file)
@@ -534,6 +534,13 @@ static void del_sw_hw_rule(struct fs_node *node)
                goto out;
        }
 
+       if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_PORT &&
+           --fte->dests_size) {
+               fte->modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
+               fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+               goto out;
+       }
+
        if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
            --fte->dests_size) {
                fte->modify_mask |=
index 1506388..4d7f8a3 100644 (file)
@@ -422,6 +422,24 @@ static void release_all_pages(struct mlx5_core_dev *dev, u32 func_id,
                      npages, ec_function, func_id);
 }
 
+static u32 fwp_fill_manage_pages_out(struct fw_page *fwp, u32 *out, u32 index,
+                                    u32 npages)
+{
+       u32 pages_set = 0;
+       unsigned int n;
+
+       for_each_clear_bit(n, &fwp->bitmask, MLX5_NUM_4K_IN_PAGE) {
+               MLX5_ARRAY_SET64(manage_pages_out, out, pas, index + pages_set,
+                                fwp->addr + (n * MLX5_ADAPTER_PAGE_SIZE));
+               pages_set++;
+
+               if (!--npages)
+                       break;
+       }
+
+       return pages_set;
+}
+
 static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
                             u32 *in, int in_size, u32 *out, int out_size)
 {
@@ -448,8 +466,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
                fwp = rb_entry(p, struct fw_page, rb_node);
                p = rb_next(p);
 
-               MLX5_ARRAY_SET64(manage_pages_out, out, pas, i, fwp->addr);
-               i++;
+               i += fwp_fill_manage_pages_out(fwp, out, i, npages - i);
        }
 
        MLX5_SET(manage_pages_out, out, output_num_entries, i);
index 6bd34b2..51bbd88 100644 (file)
@@ -92,6 +92,7 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
        caps->eswitch_manager   = MLX5_CAP_GEN(mdev, eswitch_manager);
        caps->gvmi              = MLX5_CAP_GEN(mdev, vhca_id);
        caps->flex_protocols    = MLX5_CAP_GEN(mdev, flex_parser_protocols);
+       caps->sw_format_ver     = MLX5_CAP_GEN(mdev, steering_format_version);
 
        if (mlx5dr_matcher_supp_flex_parser_icmp_v4(caps)) {
                caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0);
index 890767a..aa2c2d6 100644 (file)
@@ -223,6 +223,11 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev,
        if (ret)
                return ret;
 
+       if (dmn->info.caps.sw_format_ver != MLX5_STEERING_FORMAT_CONNECTX_5) {
+               mlx5dr_err(dmn, "SW steering is not supported on this device\n");
+               return -EOPNOTSUPP;
+       }
+
        ret = dr_domain_query_fdb_caps(mdev, dmn);
        if (ret)
                return ret;
index f50f3b1..cf62ea4 100644 (file)
@@ -625,6 +625,7 @@ struct mlx5dr_cmd_caps {
        u8 max_ft_level;
        u16 roce_min_src_udp;
        u8 num_esw_ports;
+       u8 sw_format_ver;
        bool eswitch_manager;
        bool rx_sw_owner;
        bool tx_sw_owner;
index 872e991..a619d90 100644 (file)
@@ -6,6 +6,7 @@
 config MLXSW_CORE
        tristate "Mellanox Technologies Switch ASICs support"
        select NET_DEVLINK
+       select MLXFW
        help
          This driver supports Mellanox Technologies Switch ASICs family.
 
@@ -82,7 +83,6 @@ config MLXSW_SPECTRUM
        select GENERIC_ALLOCATOR
        select PARMAN
        select OBJAGG
-       select MLXFW
        imply PTP_1588_CLOCK
        select NET_PTP_CLASSIFY if PTP_1588_CLOCK
        default m
index 937b8e4..1a86535 100644 (file)
@@ -571,7 +571,8 @@ static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans)
        if (trans->core->fw_flash_in_progress)
                timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS);
 
-       queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, timeout);
+       queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw,
+                          timeout << trans->retries);
 }
 
 static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core,
index 31f9a82..d0f6dfe 100644 (file)
@@ -47,6 +47,7 @@ config LAN743X
        depends on PCI
        select PHYLIB
        select CRC16
+       select CRC32
        help
          Support for the Microchip LAN743x PCI Express Gigabit Ethernet chip
 
index e2c99d9..b319c22 100644 (file)
@@ -148,7 +148,8 @@ static void lan743x_intr_software_isr(void *context)
 
        int_sts = lan743x_csr_read(adapter, INT_STS);
        if (int_sts & INT_BIT_SW_GP_) {
-               lan743x_csr_write(adapter, INT_STS, INT_BIT_SW_GP_);
+               /* disable the interrupt to prevent repeated re-triggering */
+               lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_SW_GP_);
                intr->software_isr_flag = 1;
        }
 }
@@ -1307,13 +1308,13 @@ clean_up_data_descriptor:
                goto clear_active;
 
        if (!(buffer_info->flags & TX_BUFFER_INFO_FLAG_TIMESTAMP_REQUESTED)) {
-               dev_kfree_skb(buffer_info->skb);
+               dev_kfree_skb_any(buffer_info->skb);
                goto clear_skb;
        }
 
        if (cleanup) {
                lan743x_ptp_unrequest_tx_timestamp(tx->adapter);
-               dev_kfree_skb(buffer_info->skb);
+               dev_kfree_skb_any(buffer_info->skb);
        } else {
                ignore_sync = (buffer_info->flags &
                               TX_BUFFER_INFO_FLAG_IGNORE_SYNC) != 0;
@@ -1623,7 +1624,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx,
        if (required_number_of_descriptors >
                lan743x_tx_get_avail_desc(tx)) {
                if (required_number_of_descriptors > (tx->ring_size - 1)) {
-                       dev_kfree_skb(skb);
+                       dev_kfree_skb_irq(skb);
                } else {
                        /* save to overflow buffer */
                        tx->overflow_skb = skb;
@@ -1656,7 +1657,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx,
                                   start_frame_length,
                                   do_timestamp,
                                   skb->ip_summed == CHECKSUM_PARTIAL)) {
-               dev_kfree_skb(skb);
+               dev_kfree_skb_irq(skb);
                goto unlock;
        }
 
@@ -1675,7 +1676,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx,
                         * frame assembler clean up was performed inside
                         *      lan743x_tx_frame_add_fragment
                         */
-                       dev_kfree_skb(skb);
+                       dev_kfree_skb_irq(skb);
                        goto unlock;
                }
        }
index 70bf8c6..a53bd36 100644 (file)
@@ -1489,10 +1489,11 @@ int ocelot_init(struct ocelot *ocelot)
                     SYS_FRM_AGING_MAX_AGE(307692), SYS_FRM_AGING);
 
        /* Setup flooding PGIDs */
-       ocelot_write_rix(ocelot, ANA_FLOODING_FLD_MULTICAST(PGID_MC) |
-                        ANA_FLOODING_FLD_BROADCAST(PGID_MC) |
-                        ANA_FLOODING_FLD_UNICAST(PGID_UC),
-                        ANA_FLOODING, 0);
+       for (i = 0; i < ocelot->num_flooding_pgids; i++)
+               ocelot_write_rix(ocelot, ANA_FLOODING_FLD_MULTICAST(PGID_MC) |
+                                ANA_FLOODING_FLD_BROADCAST(PGID_MC) |
+                                ANA_FLOODING_FLD_UNICAST(PGID_UC),
+                                ANA_FLOODING, i);
        ocelot_write(ocelot, ANA_FLOODING_IPMC_FLD_MC6_DATA(PGID_MCIPV6) |
                     ANA_FLOODING_IPMC_FLD_MC6_CTRL(PGID_MC) |
                     ANA_FLOODING_IPMC_FLD_MC4_DATA(PGID_MCIPV4) |
index dc00772..1e77294 100644 (file)
@@ -1254,6 +1254,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
        }
 
        ocelot->num_phys_ports = of_get_child_count(ports);
+       ocelot->num_flooding_pgids = 1;
 
        ocelot->vcap = vsc7514_vcap_props;
        ocelot->inj_prefix = OCELOT_TAG_PREFIX_NONE;
index d8b99d6..b82758d 100644 (file)
@@ -22,6 +22,7 @@ config NFP
        depends on VXLAN || VXLAN=n
        depends on TLS && TLS_DEVICE || TLS_DEVICE=n
        select NET_DEVLINK
+       select CRC32
        help
          This driver supports the Netronome(R) NFP4000/NFP6000 based
          cards working as a advanced Ethernet NIC.  It works with both
index b150da4..4372268 100644 (file)
@@ -3562,9 +3562,6 @@ static int nfp_net_xdp_setup_drv(struct nfp_net *nn, struct netdev_bpf *bpf)
        struct nfp_net_dp *dp;
        int err;
 
-       if (!xdp_attachment_flags_ok(&nn->xdp, bpf))
-               return -EBUSY;
-
        if (!prog == !nn->dp.xdp_prog) {
                WRITE_ONCE(nn->dp.xdp_prog, prog);
                xdp_attachment_setup(&nn->xdp, bpf);
@@ -3593,9 +3590,6 @@ static int nfp_net_xdp_setup_hw(struct nfp_net *nn, struct netdev_bpf *bpf)
 {
        int err;
 
-       if (!xdp_attachment_flags_ok(&nn->xdp_hw, bpf))
-               return -EBUSY;
-
        err = nfp_app_xdp_offload(nn->app, nn, bpf->prog, bpf->extack);
        if (err)
                return err;
index ee83a71..c84997d 100644 (file)
@@ -3,6 +3,7 @@ config LPC_ENET
        tristate "NXP ethernet MAC on LPC devices"
        depends on ARCH_LPC32XX || COMPILE_TEST
        select PHYLIB
+       select CRC32
        help
          Say Y or M here if you want to use the NXP ethernet MAC included on
          some NXP LPC devices. You can safely enable this option for LPC32xx
index be66601..040a15a 100644 (file)
@@ -1078,16 +1078,20 @@ static int pasemi_mac_open(struct net_device *dev)
 
        mac->tx = pasemi_mac_setup_tx_resources(dev);
 
-       if (!mac->tx)
+       if (!mac->tx) {
+               ret = -ENOMEM;
                goto out_tx_ring;
+       }
 
        /* We might already have allocated rings in case mtu was changed
         * before interface was brought up.
         */
        if (dev->mtu > 1500 && !mac->num_cs) {
                pasemi_mac_setup_csrings(mac);
-               if (!mac->num_cs)
+               if (!mac->num_cs) {
+                       ret = -ENOMEM;
                        goto out_tx_ring;
+               }
        }
 
        /* Zero out rmon counters */
index 0e4cd88..0a22f8c 100644 (file)
@@ -1647,9 +1647,9 @@ static void qed_src_init_pf(struct qed_hwfn *p_hwfn)
                     ilog2(rounded_conn_num));
 
        STORE_RT_REG_AGG(p_hwfn, SRC_REG_FIRSTFREE_RT_OFFSET,
-                        p_hwfn->p_cxt_mngr->first_free);
+                        p_hwfn->p_cxt_mngr->src_t2.first_free);
        STORE_RT_REG_AGG(p_hwfn, SRC_REG_LASTFREE_RT_OFFSET,
-                        p_hwfn->p_cxt_mngr->last_free);
+                        p_hwfn->p_cxt_mngr->src_t2.last_free);
 }
 
 /* Timers PF */
index 8b64495..056e796 100644 (file)
@@ -326,9 +326,6 @@ struct qed_cxt_mngr {
 
        /* SRC T2 */
        struct qed_src_t2 src_t2;
-       u32 t2_num_pages;
-       u64 first_free;
-       u64 last_free;
 
        /* total number of SRQ's for this hwfn */
        u32 srq_count;
index 512cbef..a998611 100644 (file)
@@ -2754,14 +2754,18 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
        iwarp_info->partial_fpdus = kcalloc((u16)p_hwfn->p_rdma_info->num_qps,
                                            sizeof(*iwarp_info->partial_fpdus),
                                            GFP_KERNEL);
-       if (!iwarp_info->partial_fpdus)
+       if (!iwarp_info->partial_fpdus) {
+               rc = -ENOMEM;
                goto err;
+       }
 
        iwarp_info->max_num_partial_fpdus = (u16)p_hwfn->p_rdma_info->num_qps;
 
        iwarp_info->mpa_intermediate_buf = kzalloc(buff_size, GFP_KERNEL);
-       if (!iwarp_info->mpa_intermediate_buf)
+       if (!iwarp_info->mpa_intermediate_buf) {
+               rc = -ENOMEM;
                goto err;
+       }
 
        /* The mpa_bufs array serves for pending RX packets received on the
         * mpa ll2 that don't have place on the tx ring and require later
@@ -2771,8 +2775,10 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
        iwarp_info->mpa_bufs = kcalloc(data.input.rx_num_desc,
                                       sizeof(*iwarp_info->mpa_bufs),
                                       GFP_KERNEL);
-       if (!iwarp_info->mpa_bufs)
+       if (!iwarp_info->mpa_bufs) {
+               rc = -ENOMEM;
                goto err;
+       }
 
        INIT_LIST_HEAD(&iwarp_info->mpa_buf_pending_list);
        INIT_LIST_HEAD(&iwarp_info->mpa_buf_list);
index b8af59f..d2c1907 100644 (file)
@@ -2231,7 +2231,8 @@ static int qlcnic_83xx_restart_hw(struct qlcnic_adapter *adapter)
 
        /* Boot either flash image or firmware image from host file system */
        if (qlcnic_load_fw_file == 1) {
-               if (qlcnic_83xx_load_fw_image_from_host(adapter))
+               err = qlcnic_83xx_load_fw_image_from_host(adapter);
+               if (err)
                        return err;
        } else {
                QLC_SHARED_REG_WR32(adapter, QLCNIC_FW_IMG_VALID,
index 29a7bfa..3d7d3ab 100644 (file)
@@ -188,6 +188,11 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb)
 
        dev = skb->dev;
        port = rmnet_get_port_rcu(dev);
+       if (unlikely(!port)) {
+               atomic_long_inc(&skb->dev->rx_nohandler);
+               kfree_skb(skb);
+               goto done;
+       }
 
        switch (port->rmnet_mode) {
        case RMNET_EPMODE_VND:
index 99e1290..2318811 100644 (file)
@@ -19,6 +19,7 @@ if NET_VENDOR_ROCKER
 config ROCKER
        tristate "Rocker switch driver (EXPERIMENTAL)"
        depends on PCI && NET_SWITCHDEV && BRIDGE
+       select CRC32
        help
          This driver supports Rocker switch device.
 
index efef547..223f69d 100644 (file)
@@ -246,13 +246,7 @@ static int imx_dwmac_probe(struct platform_device *pdev)
                goto err_parse_dt;
        }
 
-       ret = dma_set_mask_and_coherent(&pdev->dev,
-                                       DMA_BIT_MASK(dwmac->ops->addr_width));
-       if (ret) {
-               dev_err(&pdev->dev, "DMA mask set failed\n");
-               goto err_dma_mask;
-       }
-
+       plat_dat->addr64 = dwmac->ops->addr_width;
        plat_dat->init = imx_dwmac_init;
        plat_dat->exit = imx_dwmac_exit;
        plat_dat->fix_mac_speed = imx_dwmac_fix_speed;
@@ -272,7 +266,6 @@ static int imx_dwmac_probe(struct platform_device *pdev)
 err_dwmac_init:
 err_drv_probe:
        imx_dwmac_exit(pdev, plat_dat->bsp_priv);
-err_dma_mask:
 err_parse_dt:
 err_match_data:
        stmmac_remove_config_dt(pdev, plat_dat);
index f61cb99..82b1c7a 100644 (file)
@@ -113,8 +113,10 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
                /* Enable TX clock */
                if (dwmac->data->tx_clk_en) {
                        dwmac->tx_clk = devm_clk_get(&pdev->dev, "tx_clk");
-                       if (IS_ERR(dwmac->tx_clk))
+                       if (IS_ERR(dwmac->tx_clk)) {
+                               ret = PTR_ERR(dwmac->tx_clk);
                                goto err_remove_config_dt;
+                       }
 
                        clk_prepare_enable(dwmac->tx_clk);
 
index 5afcf05..6d6bd77 100644 (file)
@@ -30,7 +30,6 @@
 #define PRG_ETH0_EXT_RMII_MODE         4
 
 /* mux to choose between fclk_div2 (bit unset) and mpll2 (bit set) */
-#define PRG_ETH0_CLK_M250_SEL_SHIFT    4
 #define PRG_ETH0_CLK_M250_SEL_MASK     GENMASK(4, 4)
 
 /* TX clock delay in ns = "8ns / 4 * tx_dly_val" (where 8ns are exactly one
@@ -155,8 +154,9 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
                return -ENOMEM;
 
        clk_configs->m250_mux.reg = dwmac->regs + PRG_ETH0;
-       clk_configs->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT;
-       clk_configs->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK;
+       clk_configs->m250_mux.shift = __ffs(PRG_ETH0_CLK_M250_SEL_MASK);
+       clk_configs->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK >>
+                                    clk_configs->m250_mux.shift;
        clk = meson8b_dwmac_register_clk(dwmac, "m250_sel", mux_parents,
                                         ARRAY_SIZE(mux_parents), &clk_mux_ops,
                                         &clk_configs->m250_mux.hw);
index 002791b..ced6d76 100644 (file)
@@ -1171,7 +1171,6 @@ const struct stmmac_ops dwmac4_ops = {
        .pcs_get_adv_lp = dwmac4_get_adv_lp,
        .debug = dwmac4_debug,
        .set_filter = dwmac4_set_filter,
-       .flex_pps_config = dwmac5_flex_pps_config,
        .set_mac_loopback = dwmac4_set_mac_loopback,
        .update_vlan_hash = dwmac4_update_vlan_hash,
        .sarc_configure = dwmac4_sarc_configure,
@@ -1213,6 +1212,7 @@ const struct stmmac_ops dwmac410_ops = {
        .pcs_get_adv_lp = dwmac4_get_adv_lp,
        .debug = dwmac4_debug,
        .set_filter = dwmac4_set_filter,
+       .flex_pps_config = dwmac5_flex_pps_config,
        .set_mac_loopback = dwmac4_set_mac_loopback,
        .update_vlan_hash = dwmac4_update_vlan_hash,
        .sarc_configure = dwmac4_sarc_configure,
index 6e30d7e..0b4ee2d 100644 (file)
@@ -22,7 +22,7 @@ int dwmac4_dma_reset(void __iomem *ioaddr)
 
        return readl_poll_timeout(ioaddr + DMA_BUS_MODE, value,
                                 !(value & DMA_BUS_MODE_SFT_RESET),
-                                10000, 100000);
+                                10000, 1000000);
 }
 
 void dwmac4_set_rx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan)
index cb87d31..57a53a6 100644 (file)
@@ -23,7 +23,7 @@ int dwmac_dma_reset(void __iomem *ioaddr)
 
        return readl_poll_timeout(ioaddr + DMA_BUS_MODE, value,
                                 !(value & DMA_BUS_MODE_SFT_RESET),
-                                10000, 100000);
+                                10000, 200000);
 }
 
 /* CSR1 enables the transmit DMA to check for new descriptor */
index d833908..c33db79 100644 (file)
@@ -1534,6 +1534,19 @@ static void dma_free_tx_skbufs(struct stmmac_priv *priv, u32 queue)
 }
 
 /**
+ * stmmac_free_tx_skbufs - free TX skb buffers
+ * @priv: private structure
+ */
+static void stmmac_free_tx_skbufs(struct stmmac_priv *priv)
+{
+       u32 tx_queue_cnt = priv->plat->tx_queues_to_use;
+       u32 queue;
+
+       for (queue = 0; queue < tx_queue_cnt; queue++)
+               dma_free_tx_skbufs(priv, queue);
+}
+
+/**
  * free_dma_rx_desc_resources - free RX dma desc resources
  * @priv: private structure
  */
@@ -2895,9 +2908,6 @@ static int stmmac_release(struct net_device *dev)
        struct stmmac_priv *priv = netdev_priv(dev);
        u32 chan;
 
-       if (priv->eee_enabled)
-               del_timer_sync(&priv->eee_ctrl_timer);
-
        if (device_may_wakeup(priv->device))
                phylink_speed_down(priv->phylink, false);
        /* Stop and disconnect the PHY */
@@ -2916,6 +2926,11 @@ static int stmmac_release(struct net_device *dev)
        if (priv->lpi_irq > 0)
                free_irq(priv->lpi_irq, dev);
 
+       if (priv->eee_enabled) {
+               priv->tx_path_in_lpi_mode = false;
+               del_timer_sync(&priv->eee_ctrl_timer);
+       }
+
        /* Stop TX/RX DMA and clear the descriptors */
        stmmac_stop_all_dma(priv);
 
@@ -4930,6 +4945,14 @@ int stmmac_dvr_probe(struct device *device,
                dev_info(priv->device, "SPH feature enabled\n");
        }
 
+       /* The current IP register MAC_HW_Feature1[ADDR64] only define
+        * 32/40/64 bit width, but some SOC support others like i.MX8MP
+        * support 34 bits but it map to 40 bits width in MAC_HW_Feature1[ADDR64].
+        * So overwrite dma_cap.addr64 according to HW real design.
+        */
+       if (priv->plat->addr64)
+               priv->dma_cap.addr64 = priv->plat->addr64;
+
        if (priv->dma_cap.addr64) {
                ret = dma_set_mask_and_coherent(device,
                                DMA_BIT_MASK(priv->dma_cap.addr64));
@@ -5142,6 +5165,11 @@ int stmmac_suspend(struct device *dev)
        for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
                del_timer_sync(&priv->tx_queue[chan].txtimer);
 
+       if (priv->eee_enabled) {
+               priv->tx_path_in_lpi_mode = false;
+               del_timer_sync(&priv->eee_ctrl_timer);
+       }
+
        /* Stop TX/RX DMA */
        stmmac_stop_all_dma(priv);
 
@@ -5247,10 +5275,20 @@ int stmmac_resume(struct device *dev)
                        return ret;
        }
 
+       if (!device_may_wakeup(priv->device) || !priv->plat->pmt) {
+               rtnl_lock();
+               phylink_start(priv->phylink);
+               /* We may have called phylink_speed_down before */
+               phylink_speed_up(priv->phylink);
+               rtnl_unlock();
+       }
+
+       rtnl_lock();
        mutex_lock(&priv->lock);
 
        stmmac_reset_queues_param(priv);
 
+       stmmac_free_tx_skbufs(priv);
        stmmac_clear_descriptors(priv);
 
        stmmac_hw_setup(ndev, false);
@@ -5262,14 +5300,7 @@ int stmmac_resume(struct device *dev)
        stmmac_enable_all_queues(priv);
 
        mutex_unlock(&priv->lock);
-
-       if (!device_may_wakeup(priv->device) || !priv->plat->pmt) {
-               rtnl_lock();
-               phylink_start(priv->phylink);
-               /* We may have called phylink_speed_down before */
-               phylink_speed_up(priv->phylink);
-               rtnl_unlock();
-       }
+       rtnl_unlock();
 
        phylink_mac_change(priv->phylink, true);
 
index 75056c1..5dc60ec 100644 (file)
@@ -1001,8 +1001,7 @@ struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs,
        if (IS_ERR_OR_NULL(cpts->ptp_clock)) {
                dev_err(dev, "Failed to register ptp clk %ld\n",
                        PTR_ERR(cpts->ptp_clock));
-               if (!cpts->ptp_clock)
-                       ret = -ENODEV;
+               ret = cpts->ptp_clock ? PTR_ERR(cpts->ptp_clock) : -ENODEV;
                goto refclk_disable;
        }
        cpts->phc_index = ptp_clock_index(cpts->ptp_clock);
index 9fd1f77..b0f00b4 100644 (file)
@@ -838,9 +838,12 @@ static int cpsw_ndo_open(struct net_device *ndev)
                if (ret < 0)
                        goto err_cleanup;
 
-               if (cpts_register(cpsw->cpts))
-                       dev_err(priv->dev, "error registering cpts device\n");
-
+               if (cpsw->cpts) {
+                       if (cpts_register(cpsw->cpts))
+                               dev_err(priv->dev, "error registering cpts device\n");
+                       else
+                               writel(0x10, &cpsw->wr_regs->misc_en);
+               }
        }
 
        cpsw_restore(priv);
@@ -1631,6 +1634,7 @@ static int cpsw_probe(struct platform_device *pdev)
                                       CPSW_MAX_QUEUES, CPSW_MAX_QUEUES);
        if (!ndev) {
                dev_err(dev, "error allocating net_device\n");
+               ret = -ENOMEM;
                goto clean_cpts;
        }
 
@@ -1716,7 +1720,6 @@ static int cpsw_probe(struct platform_device *pdev)
 
        /* Enable misc CPTS evnt_pend IRQ */
        cpts_set_irqpoll(cpsw->cpts, false);
-       writel(0x10, &cpsw->wr_regs->misc_en);
 
 skip_cpts:
        cpsw_notice(priv, probe,
index f779d2e..2f5e0ad 100644 (file)
@@ -873,8 +873,12 @@ static int cpsw_ndo_open(struct net_device *ndev)
                if (ret < 0)
                        goto err_cleanup;
 
-               if (cpts_register(cpsw->cpts))
-                       dev_err(priv->dev, "error registering cpts device\n");
+               if (cpsw->cpts) {
+                       if (cpts_register(cpsw->cpts))
+                               dev_err(priv->dev, "error registering cpts device\n");
+                       else
+                               writel(0x10, &cpsw->wr_regs->misc_en);
+               }
 
                napi_enable(&cpsw->napi_rx);
                napi_enable(&cpsw->napi_tx);
@@ -2006,7 +2010,6 @@ static int cpsw_probe(struct platform_device *pdev)
 
        /* Enable misc CPTS evnt_pend IRQ */
        cpts_set_irqpoll(cpsw->cpts, false);
-       writel(0x10, &cpsw->wr_regs->misc_en);
 
 skip_cpts:
        ret = cpsw_register_notifiers(cpsw);
index 31c5e36..424e644 100644 (file)
@@ -1265,9 +1265,6 @@ static int cpsw_xdp_prog_setup(struct cpsw_priv *priv, struct netdev_bpf *bpf)
        if (!priv->xdpi.prog && !prog)
                return 0;
 
-       if (!xdp_attachment_flags_ok(&priv->xdpi, bpf))
-               return -EBUSY;
-
        WRITE_ONCE(priv->xdp_prog, prog);
 
        xdp_attachment_setup(&priv->xdpi, bpf);
index 60c199f..0301853 100644 (file)
@@ -1351,7 +1351,6 @@ static int temac_probe(struct platform_device *pdev)
        struct device_node *temac_np = dev_of_node(&pdev->dev), *dma_np;
        struct temac_local *lp;
        struct net_device *ndev;
-       struct resource *res;
        const void *addr;
        __be32 *p;
        bool little_endian;
@@ -1500,13 +1499,11 @@ static int temac_probe(struct platform_device *pdev)
                of_node_put(dma_np);
        } else if (pdata) {
                /* 2nd memory resource specifies DMA registers */
-               res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-               lp->sdma_regs = devm_ioremap(&pdev->dev, res->start,
-                                                    resource_size(res));
-               if (!lp->sdma_regs) {
+               lp->sdma_regs = devm_platform_ioremap_resource(pdev, 1);
+               if (IS_ERR(lp->sdma_regs)) {
                        dev_err(&pdev->dev,
                                "could not map DMA registers\n");
-                       return -ENOMEM;
+                       return PTR_ERR(lp->sdma_regs);
                }
                if (pdata->dma_little_endian) {
                        lp->dma_in = temac_dma_in32_le;
index d07008a..1426bfc 100644 (file)
@@ -224,8 +224,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
        if (ip_tunnel_collect_metadata() || gs->collect_md) {
                __be16 flags;
 
-               flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
-                       (gnvh->oam ? TUNNEL_OAM : 0) |
+               flags = TUNNEL_KEY | (gnvh->oam ? TUNNEL_OAM : 0) |
                        (gnvh->critical ? TUNNEL_CRIT_OPT : 0);
 
                tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags,
index 9264203..6c3ed5b 100644 (file)
@@ -156,6 +156,9 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool,
        /* The allocator will give us a power-of-2 number of pages.  But we
         * can't guarantee that, so request it.  That way we won't waste any
         * memory that would be available beyond the required space.
+        *
+        * Note that gsi_trans_pool_exit_dma() assumes the total allocated
+        * size is exactly (count * size).
         */
        total_size = get_order(total_size) << PAGE_SHIFT;
 
@@ -175,7 +178,9 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool,
 
 void gsi_trans_pool_exit_dma(struct device *dev, struct gsi_trans_pool *pool)
 {
-       dma_free_coherent(dev, pool->size, pool->base, pool->addr);
+       size_t total_size = pool->count * pool->size;
+
+       dma_free_coherent(dev, total_size, pool->base, pool->addr);
        memset(pool, 0, sizeof(*pool));
 }
 
@@ -362,22 +367,31 @@ struct gsi_trans *gsi_channel_trans_alloc(struct gsi *gsi, u32 channel_id,
        return trans;
 }
 
-/* Free a previously-allocated transaction (used only in case of error) */
+/* Free a previously-allocated transaction */
 void gsi_trans_free(struct gsi_trans *trans)
 {
+       refcount_t *refcount = &trans->refcount;
        struct gsi_trans_info *trans_info;
+       bool last;
 
-       if (!refcount_dec_and_test(&trans->refcount))
+       /* We must hold the lock to release the last reference */
+       if (refcount_dec_not_one(refcount))
                return;
 
        trans_info = &trans->gsi->channel[trans->channel_id].trans_info;
 
        spin_lock_bh(&trans_info->spinlock);
 
-       list_del(&trans->links);
+       /* Reference might have been added before we got the lock */
+       last = refcount_dec_and_test(refcount);
+       if (last)
+               list_del(&trans->links);
 
        spin_unlock_bh(&trans_info->spinlock);
 
+       if (!last)
+               return;
+
        ipa_gsi_trans_release(trans);
 
        /* Releasing the reserved TREs implicitly frees the sgl[] and
index 2e90512..90aafb5 100644 (file)
@@ -63,15 +63,20 @@ static int
 nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn)
 {
        struct nsim_bpf_bound_prog *state;
+       int ret = 0;
 
        state = env->prog->aux->offload->dev_priv;
        if (state->nsim_dev->bpf_bind_verifier_delay && !insn_idx)
                msleep(state->nsim_dev->bpf_bind_verifier_delay);
 
-       if (insn_idx == env->prog->len - 1)
+       if (insn_idx == env->prog->len - 1) {
                pr_vlog(env, "Hello from netdevsim!\n");
 
-       return 0;
+               if (!state->nsim_dev->bpf_bind_verifier_accept)
+                       ret = -EOPNOTSUPP;
+       }
+
+       return ret;
 }
 
 static int nsim_bpf_finalize(struct bpf_verifier_env *env)
@@ -190,9 +195,6 @@ nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf,
 {
        int err;
 
-       if (!xdp_attachment_flags_ok(xdp, bpf))
-               return -EBUSY;
-
        if (bpf->command == XDP_SETUP_PROG && !ns->bpf_xdpdrv_accept) {
                NSIM_EA(bpf->extack, "driver XDP disabled in DebugFS");
                return -EOPNOTSUPP;
@@ -598,6 +600,9 @@ int nsim_bpf_dev_init(struct nsim_dev *nsim_dev)
                            &nsim_dev->bpf_bind_accept);
        debugfs_create_u32("bpf_bind_verifier_delay", 0600, nsim_dev->ddir,
                           &nsim_dev->bpf_bind_verifier_delay);
+       nsim_dev->bpf_bind_verifier_accept = true;
+       debugfs_create_bool("bpf_bind_verifier_accept", 0600, nsim_dev->ddir,
+                           &nsim_dev->bpf_bind_verifier_accept);
        return 0;
 }
 
index d070614..e7972e8 100644 (file)
@@ -96,6 +96,7 @@ static const struct file_operations nsim_dev_take_snapshot_fops = {
        .open = simple_open,
        .write = nsim_dev_take_snapshot_write,
        .llseek = generic_file_llseek,
+       .owner = THIS_MODULE,
 };
 
 static ssize_t nsim_dev_trap_fa_cookie_read(struct file *file,
@@ -188,6 +189,7 @@ static const struct file_operations nsim_dev_trap_fa_cookie_fops = {
        .read = nsim_dev_trap_fa_cookie_read,
        .write = nsim_dev_trap_fa_cookie_write,
        .llseek = generic_file_llseek,
+       .owner = THIS_MODULE,
 };
 
 static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
index 62958b2..21e2974 100644 (file)
@@ -261,6 +261,7 @@ static const struct file_operations nsim_dev_health_break_fops = {
        .open = simple_open,
        .write = nsim_dev_health_break_write,
        .llseek = generic_file_llseek,
+       .owner = THIS_MODULE,
 };
 
 int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink)
index 827fc80..c4e7ad2 100644 (file)
@@ -189,6 +189,7 @@ struct nsim_dev {
        struct dentry *take_snapshot;
        struct bpf_offload_dev *bpf_dev;
        bool bpf_bind_accept;
+       bool bpf_bind_verifier_accept;
        u32 bpf_bind_verifier_delay;
        struct dentry *ddir_bpf_bound_progs;
        u32 prog_id_gen;
index 6ab023a..02dc312 100644 (file)
@@ -124,6 +124,7 @@ static const struct file_operations nsim_udp_tunnels_info_reset_fops = {
        .open = simple_open,
        .write = nsim_udp_tunnels_info_reset_write,
        .llseek = generic_file_llseek,
+       .owner = THIS_MODULE,
 };
 
 int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev,
index 6cf9b79..10be266 100644 (file)
@@ -981,7 +981,6 @@ int vsc8584_macsec_init(struct phy_device *phydev)
 
        switch (phydev->phy_id & phydev->drv->phy_id_mask) {
        case PHY_ID_VSC856X:
-       case PHY_ID_VSC8575:
        case PHY_ID_VSC8582:
        case PHY_ID_VSC8584:
                INIT_LIST_HEAD(&vsc8531->macsec_flows);
index ec97669..0fc39ac 100644 (file)
@@ -291,8 +291,10 @@ static int smsc_phy_probe(struct phy_device *phydev)
                return ret;
 
        ret = clk_set_rate(priv->refclk, 50 * 1000 * 1000);
-       if (ret)
+       if (ret) {
+               clk_disable_unprepare(priv->refclk);
                return ret;
+       }
 
        return 0;
 }
index be69d27..cd06cae 100644 (file)
@@ -1961,12 +1961,15 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
        struct tun_file *tfile = file->private_data;
        struct tun_struct *tun = tun_get(tfile);
        ssize_t result;
+       int noblock = 0;
 
        if (!tun)
                return -EBADFD;
 
-       result = tun_get_user(tun, tfile, NULL, from,
-                             file->f_flags & O_NONBLOCK, false);
+       if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT))
+               noblock = 1;
+
+       result = tun_get_user(tun, tfile, NULL, from, noblock, false);
 
        tun_put(tun);
        return result;
@@ -2185,10 +2188,15 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
        struct tun_file *tfile = file->private_data;
        struct tun_struct *tun = tun_get(tfile);
        ssize_t len = iov_iter_count(to), ret;
+       int noblock = 0;
 
        if (!tun)
                return -EBADFD;
-       ret = tun_do_read(tun, tfile, to, file->f_flags & O_NONBLOCK, NULL);
+
+       if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT))
+               noblock = 1;
+
+       ret = tun_do_read(tun, tfile, to, noblock, NULL);
        ret = min_t(ssize_t, ret, len);
        if (ret > 0)
                iocb->ki_pos = ret;
index ca89d82..c4568a4 100644 (file)
@@ -197,7 +197,8 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf)
        }
 
        /* enable ethernet mode (?) */
-       if (cx82310_enable_ethernet(dev))
+       ret = cx82310_enable_ethernet(dev);
+       if (ret)
                goto err;
 
        /* get the MAC address */
index b09b453..207e59e 100644 (file)
@@ -59,7 +59,7 @@
 #define IPHETH_USBINTF_SUBCLASS 253
 #define IPHETH_USBINTF_PROTO    1
 
-#define IPHETH_BUF_SIZE         1516
+#define IPHETH_BUF_SIZE         1514
 #define IPHETH_IP_ALIGN                2       /* padding at front of URB */
 #define IPHETH_TX_TIMEOUT       (5 * HZ)
 
index 581ed51..fc378ff 100644 (file)
@@ -1070,7 +1070,7 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x05c6, 0x9011, 4)},
        {QMI_FIXED_INTF(0x05c6, 0x9021, 1)},
        {QMI_FIXED_INTF(0x05c6, 0x9022, 2)},
-       {QMI_FIXED_INTF(0x05c6, 0x9025, 4)},    /* Alcatel-sbell ASB TL131 TDD LTE  (China Mobile) */
+       {QMI_QUIRK_SET_DTR(0x05c6, 0x9025, 4)}, /* Alcatel-sbell ASB TL131 TDD LTE (China Mobile) */
        {QMI_FIXED_INTF(0x05c6, 0x9026, 3)},
        {QMI_FIXED_INTF(0x05c6, 0x902e, 5)},
        {QMI_FIXED_INTF(0x05c6, 0x9031, 5)},
index f2793ff..b9b7e00 100644 (file)
@@ -1315,11 +1315,17 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
        int orig_iif = skb->skb_iif;
        bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
        bool is_ndisc = ipv6_ndisc_frame(skb);
+       bool is_ll_src;
 
        /* loopback, multicast & non-ND link-local traffic; do not push through
-        * packet taps again. Reset pkt_type for upper layers to process skb
+        * packet taps again. Reset pkt_type for upper layers to process skb.
+        * for packets with lladdr src, however, skip so that the dst can be
+        * determine at input using original ifindex in the case that daddr
+        * needs strict
         */
-       if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) {
+       is_ll_src = ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL;
+       if (skb->pkt_type == PACKET_LOOPBACK ||
+           (need_strict && !is_ndisc && !is_ll_src)) {
                skb->dev = vrf_dev;
                skb->skb_iif = vrf_dev->ifindex;
                IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
index 1a557ae..977f77e 100644 (file)
@@ -3798,6 +3798,9 @@ static void vxlan_config_apply(struct net_device *dev,
                dev->gso_max_segs = lowerdev->gso_max_segs;
 
                needed_headroom = lowerdev->hard_header_len;
+               needed_headroom += lowerdev->needed_headroom;
+
+               dev->needed_tailroom = lowerdev->needed_tailroom;
 
                max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
                                           VXLAN_HEADROOM);
@@ -3877,8 +3880,10 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
 
        if (dst->remote_ifindex) {
                remote_dev = __dev_get_by_index(net, dst->remote_ifindex);
-               if (!remote_dev)
+               if (!remote_dev) {
+                       err = -ENODEV;
                        goto errout;
+               }
 
                err = netdev_upper_dev_link(remote_dev, dev, extack);
                if (err)
index d43e0d3..052413e 100644 (file)
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2012-2014, 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  *
  * BSD LICENSE
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2012-2014, 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -128,7 +126,9 @@ enum iwl_sta_flags {
        STA_FLG_MAX_AGG_SIZE_256K       = (5 << STA_FLG_MAX_AGG_SIZE_SHIFT),
        STA_FLG_MAX_AGG_SIZE_512K       = (6 << STA_FLG_MAX_AGG_SIZE_SHIFT),
        STA_FLG_MAX_AGG_SIZE_1024K      = (7 << STA_FLG_MAX_AGG_SIZE_SHIFT),
-       STA_FLG_MAX_AGG_SIZE_MSK        = (7 << STA_FLG_MAX_AGG_SIZE_SHIFT),
+       STA_FLG_MAX_AGG_SIZE_2M         = (8 << STA_FLG_MAX_AGG_SIZE_SHIFT),
+       STA_FLG_MAX_AGG_SIZE_4M         = (9 << STA_FLG_MAX_AGG_SIZE_SHIFT),
+       STA_FLG_MAX_AGG_SIZE_MSK        = (0xf << STA_FLG_MAX_AGG_SIZE_SHIFT),
 
        STA_FLG_AGG_MPDU_DENS_SHIFT     = 23,
        STA_FLG_AGG_MPDU_DENS_2US       = (4 << STA_FLG_AGG_MPDU_DENS_SHIFT),
index a731f28..53b438d 100644 (file)
@@ -8,7 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -31,7 +31,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -421,12 +421,14 @@ struct iwl_hs20_roc_res {
  *     able to run the GO Negotiation. Will not be fragmented and not
  *     repetitive. Valid only on the P2P Device MAC. Only the duration will
  *     be taken into account.
+ * @SESSION_PROTECT_CONF_MAX_ID: not used
  */
 enum iwl_mvm_session_prot_conf_id {
        SESSION_PROTECT_CONF_ASSOC,
        SESSION_PROTECT_CONF_GO_CLIENT_ASSOC,
        SESSION_PROTECT_CONF_P2P_DEVICE_DISCOV,
        SESSION_PROTECT_CONF_P2P_GO_NEGOTIATION,
+       SESSION_PROTECT_CONF_MAX_ID,
 }; /* SESSION_PROTECTION_CONF_ID_E_VER_1 */
 
 /**
@@ -459,7 +461,7 @@ struct iwl_mvm_session_prot_cmd {
  * @mac_id: the mac id for which the session protection started / ended
  * @status: 1 means success, 0 means failure
  * @start: 1 means the session protection started, 0 means it ended
- * @conf_id: the configuration id of the session that started / eneded
+ * @conf_id: see &enum iwl_mvm_session_prot_conf_id
  *
  * Note that any session protection will always get two notifications: start
  * and end even the firmware could not schedule it.
index ca4967b..580b07a 100644 (file)
@@ -491,8 +491,8 @@ struct iwl_cfg {
 #define IWL_CFG_RF_ID_HR               0x7
 #define IWL_CFG_RF_ID_HR1              0x4
 
-#define IWL_CFG_NO_160                 0x0
-#define IWL_CFG_160                    0x1
+#define IWL_CFG_NO_160                 0x1
+#define IWL_CFG_160                    0x0
 
 #define IWL_CFG_CORES_BT               0x0
 #define IWL_CFG_CORES_BT_GNSS          0x5
index cb9e8e1..1d48c7d 100644 (file)
 #define CSR_MAC_SHADOW_REG_CTL2                (CSR_BASE + 0x0AC)
 #define CSR_MAC_SHADOW_REG_CTL2_RX_WAKE        0xFFFF
 
+/* LTR control (since IWL_DEVICE_FAMILY_22000) */
+#define CSR_LTR_LONG_VAL_AD                    (CSR_BASE + 0x0D4)
+#define CSR_LTR_LONG_VAL_AD_NO_SNOOP_REQ       0x80000000
+#define CSR_LTR_LONG_VAL_AD_NO_SNOOP_SCALE     0x1c000000
+#define CSR_LTR_LONG_VAL_AD_NO_SNOOP_VAL       0x03ff0000
+#define CSR_LTR_LONG_VAL_AD_SNOOP_REQ          0x00008000
+#define CSR_LTR_LONG_VAL_AD_SNOOP_SCALE                0x00001c00
+#define CSR_LTR_LONG_VAL_AD_SNOOP_VAL          0x000003ff
+#define CSR_LTR_LONG_VAL_AD_SCALE_USEC         2
+
 /* GIO Chicken Bits (PCI Express bus link power management) */
 #define CSR_GIO_CHICKEN_BITS    (CSR_BASE+0x100)
 
index 688c112..b627e7d 100644 (file)
@@ -3080,7 +3080,7 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw,
 
        /* this would be a mac80211 bug ... but don't crash */
        if (WARN_ON_ONCE(!mvmvif->phy_ctxt))
-               return -EINVAL;
+               return test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status) ? 0 : -EINVAL;
 
        /*
         * If we are in a STA removal flow and in DQA mode:
@@ -3127,6 +3127,9 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw,
                        goto out_unlock;
                }
 
+               if (vif->type == NL80211_IFTYPE_STATION)
+                       vif->bss_conf.he_support = sta->he_cap.has_he;
+
                if (sta->tdls &&
                    (vif->p2p ||
                     iwl_mvm_tdls_sta_count(mvm, NULL) ==
index 0175379..799d821 100644 (file)
@@ -196,6 +196,7 @@ int iwl_mvm_sta_send_to_fw(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
                mpdu_dens = sta->ht_cap.ampdu_density;
        }
 
+
        if (sta->vht_cap.vht_supported) {
                agg_size = sta->vht_cap.cap &
                        IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
@@ -205,6 +206,23 @@ int iwl_mvm_sta_send_to_fw(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
                agg_size = sta->ht_cap.ampdu_factor;
        }
 
+       /* D6.0 10.12.2 A-MPDU length limit rules
+        * A STA indicates the maximum length of the A-MPDU preEOF padding
+        * that it can receive in an HE PPDU in the Maximum A-MPDU Length
+        * Exponent field in its HT Capabilities, VHT Capabilities,
+        * and HE 6 GHz Band Capabilities elements (if present) and the
+        * Maximum AMPDU Length Exponent Extension field in its HE
+        * Capabilities element
+        */
+       if (sta->he_cap.has_he)
+               agg_size += u8_get_bits(sta->he_cap.he_cap_elem.mac_cap_info[3],
+                                       IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK);
+
+       /* Limit to max A-MPDU supported by FW */
+       if (agg_size > (STA_FLG_MAX_AGG_SIZE_4M >> STA_FLG_MAX_AGG_SIZE_SHIFT))
+               agg_size = (STA_FLG_MAX_AGG_SIZE_4M >>
+                           STA_FLG_MAX_AGG_SIZE_SHIFT);
+
        add_sta_cmd.station_flags |=
                cpu_to_le32(agg_size << STA_FLG_MAX_AGG_SIZE_SHIFT);
        add_sta_cmd.station_flags |=
index 7fce79c..1db6d8d 100644 (file)
@@ -641,11 +641,32 @@ void iwl_mvm_protect_session(struct iwl_mvm *mvm,
        }
 }
 
+static void iwl_mvm_cancel_session_protection(struct iwl_mvm *mvm,
+                                             struct iwl_mvm_vif *mvmvif)
+{
+       struct iwl_mvm_session_prot_cmd cmd = {
+               .id_and_color =
+                       cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id,
+                                                       mvmvif->color)),
+               .action = cpu_to_le32(FW_CTXT_ACTION_REMOVE),
+               .conf_id = cpu_to_le32(mvmvif->time_event_data.id),
+       };
+       int ret;
+
+       ret = iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(SESSION_PROTECTION_CMD,
+                                                  MAC_CONF_GROUP, 0),
+                                  0, sizeof(cmd), &cmd);
+       if (ret)
+               IWL_ERR(mvm,
+                       "Couldn't send the SESSION_PROTECTION_CMD: %d\n", ret);
+}
+
 static bool __iwl_mvm_remove_time_event(struct iwl_mvm *mvm,
                                        struct iwl_mvm_time_event_data *te_data,
                                        u32 *uid)
 {
        u32 id;
+       struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(te_data->vif);
 
        /*
         * It is possible that by the time we got to this point the time
@@ -663,14 +684,29 @@ static bool __iwl_mvm_remove_time_event(struct iwl_mvm *mvm,
        iwl_mvm_te_clear_data(mvm, te_data);
        spin_unlock_bh(&mvm->time_event_lock);
 
-       /*
-        * It is possible that by the time we try to remove it, the time event
-        * has already ended and removed. In such a case there is no need to
-        * send a removal command.
+       /* When session protection is supported, the te_data->id field
+        * is reused to save session protection's configuration.
         */
-       if (id == TE_MAX) {
-               IWL_DEBUG_TE(mvm, "TE 0x%x has already ended\n", *uid);
+       if (fw_has_capa(&mvm->fw->ucode_capa,
+                       IWL_UCODE_TLV_CAPA_SESSION_PROT_CMD)) {
+               if (mvmvif && id < SESSION_PROTECT_CONF_MAX_ID) {
+                       /* Session protection is still ongoing. Cancel it */
+                       iwl_mvm_cancel_session_protection(mvm, mvmvif);
+                       if (te_data->vif->type == NL80211_IFTYPE_P2P_DEVICE) {
+                               set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
+                               iwl_mvm_roc_finished(mvm);
+                       }
+               }
                return false;
+       } else {
+               /* It is possible that by the time we try to remove it, the
+                * time event has already ended and removed. In such a case
+                * there is no need to send a removal command.
+                */
+               if (id == TE_MAX) {
+                       IWL_DEBUG_TE(mvm, "TE 0x%x has already ended\n", *uid);
+                       return false;
+               }
        }
 
        return true;
@@ -771,6 +807,7 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
        struct iwl_rx_packet *pkt = rxb_addr(rxb);
        struct iwl_mvm_session_prot_notif *notif = (void *)pkt->data;
        struct ieee80211_vif *vif;
+       struct iwl_mvm_vif *mvmvif;
 
        rcu_read_lock();
        vif = iwl_mvm_rcu_dereference_vif_id(mvm, le32_to_cpu(notif->mac_id),
@@ -779,9 +816,10 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
        if (!vif)
                goto out_unlock;
 
+       mvmvif = iwl_mvm_vif_from_mac80211(vif);
+
        /* The vif is not a P2P_DEVICE, maintain its time_event_data */
        if (vif->type != NL80211_IFTYPE_P2P_DEVICE) {
-               struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
                struct iwl_mvm_time_event_data *te_data =
                        &mvmvif->time_event_data;
 
@@ -816,10 +854,14 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
 
        if (!le32_to_cpu(notif->status) || !le32_to_cpu(notif->start)) {
                /* End TE, notify mac80211 */
+               mvmvif->time_event_data.id = SESSION_PROTECT_CONF_MAX_ID;
                ieee80211_remain_on_channel_expired(mvm->hw);
                set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
                iwl_mvm_roc_finished(mvm);
        } else if (le32_to_cpu(notif->start)) {
+               if (WARN_ON(mvmvif->time_event_data.id !=
+                               le32_to_cpu(notif->conf_id)))
+                       goto out_unlock;
                set_bit(IWL_MVM_STATUS_ROC_RUNNING, &mvm->status);
                ieee80211_ready_on_channel(mvm->hw); /* Start TE */
        }
@@ -845,20 +887,24 @@ iwl_mvm_start_p2p_roc_session_protection(struct iwl_mvm *mvm,
 
        lockdep_assert_held(&mvm->mutex);
 
+       /* The time_event_data.id field is reused to save session
+        * protection's configuration.
+        */
        switch (type) {
        case IEEE80211_ROC_TYPE_NORMAL:
-               cmd.conf_id =
-                       cpu_to_le32(SESSION_PROTECT_CONF_P2P_DEVICE_DISCOV);
+               mvmvif->time_event_data.id =
+                       SESSION_PROTECT_CONF_P2P_DEVICE_DISCOV;
                break;
        case IEEE80211_ROC_TYPE_MGMT_TX:
-               cmd.conf_id =
-                       cpu_to_le32(SESSION_PROTECT_CONF_P2P_GO_NEGOTIATION);
+               mvmvif->time_event_data.id =
+                       SESSION_PROTECT_CONF_P2P_GO_NEGOTIATION;
                break;
        default:
                WARN_ONCE(1, "Got an invalid ROC type\n");
                return -EINVAL;
        }
 
+       cmd.conf_id = cpu_to_le32(mvmvif->time_event_data.id);
        return iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(SESSION_PROTECTION_CMD,
                                                    MAC_CONF_GROUP, 0),
                                    0, sizeof(cmd), &cmd);
@@ -960,25 +1006,6 @@ void iwl_mvm_cleanup_roc_te(struct iwl_mvm *mvm)
                __iwl_mvm_remove_time_event(mvm, te_data, &uid);
 }
 
-static void iwl_mvm_cancel_session_protection(struct iwl_mvm *mvm,
-                                             struct iwl_mvm_vif *mvmvif)
-{
-       struct iwl_mvm_session_prot_cmd cmd = {
-               .id_and_color =
-                       cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id,
-                                                       mvmvif->color)),
-               .action = cpu_to_le32(FW_CTXT_ACTION_REMOVE),
-       };
-       int ret;
-
-       ret = iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(SESSION_PROTECTION_CMD,
-                                                  MAC_CONF_GROUP, 0),
-                                  0, sizeof(cmd), &cmd);
-       if (ret)
-               IWL_ERR(mvm,
-                       "Couldn't send the SESSION_PROTECTION_CMD: %d\n", ret);
-}
-
 void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 {
        struct iwl_mvm_vif *mvmvif;
@@ -988,10 +1015,13 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
                        IWL_UCODE_TLV_CAPA_SESSION_PROT_CMD)) {
                mvmvif = iwl_mvm_vif_from_mac80211(vif);
 
-               iwl_mvm_cancel_session_protection(mvm, mvmvif);
-
-               if (vif->type == NL80211_IFTYPE_P2P_DEVICE)
+               if (vif->type == NL80211_IFTYPE_P2P_DEVICE) {
+                       iwl_mvm_cancel_session_protection(mvm, mvmvif);
                        set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
+               } else {
+                       iwl_mvm_remove_aux_roc_te(mvm, mvmvif,
+                                                 &mvmvif->time_event_data);
+               }
 
                iwl_mvm_roc_finished(mvm);
 
@@ -1126,10 +1156,15 @@ void iwl_mvm_schedule_session_protection(struct iwl_mvm *mvm,
                        cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id,
                                                        mvmvif->color)),
                .action = cpu_to_le32(FW_CTXT_ACTION_ADD),
-               .conf_id = cpu_to_le32(SESSION_PROTECT_CONF_ASSOC),
                .duration_tu = cpu_to_le32(MSEC_TO_TU(duration)),
        };
 
+       /* The time_event_data.id field is reused to save session
+        * protection's configuration.
+        */
+       mvmvif->time_event_data.id = SESSION_PROTECT_CONF_ASSOC;
+       cmd.conf_id = cpu_to_le32(mvmvif->time_event_data.id);
+
        lockdep_assert_held(&mvm->mutex);
 
        spin_lock_bh(&mvm->time_event_lock);
index a0352fa..5512e3c 100644 (file)
@@ -252,6 +252,26 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
 
        iwl_set_bit(trans, CSR_CTXT_INFO_BOOT_CTRL,
                    CSR_AUTO_FUNC_BOOT_ENA);
+
+       if (trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_AX210) {
+               /*
+                * The firmware initializes this again later (to a smaller
+                * value), but for the boot process initialize the LTR to
+                * ~250 usec.
+                */
+               u32 val = CSR_LTR_LONG_VAL_AD_NO_SNOOP_REQ |
+                         u32_encode_bits(CSR_LTR_LONG_VAL_AD_SCALE_USEC,
+                                         CSR_LTR_LONG_VAL_AD_NO_SNOOP_SCALE) |
+                         u32_encode_bits(250,
+                                         CSR_LTR_LONG_VAL_AD_NO_SNOOP_VAL) |
+                         CSR_LTR_LONG_VAL_AD_SNOOP_REQ |
+                         u32_encode_bits(CSR_LTR_LONG_VAL_AD_SCALE_USEC,
+                                         CSR_LTR_LONG_VAL_AD_SNOOP_SCALE) |
+                         u32_encode_bits(250, CSR_LTR_LONG_VAL_AD_SNOOP_VAL);
+
+               iwl_write32(trans, CSR_LTR_LONG_VAL_AD, val);
+       }
+
        if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
                iwl_write_umac_prph(trans, UREG_CPU_INIT_RUN, 1);
        else
index 129021f..7b5ece3 100644 (file)
@@ -536,9 +536,15 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 
        {IWL_PCI_DEVICE(0x2725, 0x0090, iwlax211_2ax_cfg_so_gf_a0)},
        {IWL_PCI_DEVICE(0x2725, 0x0020, iwlax210_2ax_cfg_ty_gf_a0)},
+       {IWL_PCI_DEVICE(0x2725, 0x0024, iwlax210_2ax_cfg_ty_gf_a0)},
        {IWL_PCI_DEVICE(0x2725, 0x0310, iwlax210_2ax_cfg_ty_gf_a0)},
        {IWL_PCI_DEVICE(0x2725, 0x0510, iwlax210_2ax_cfg_ty_gf_a0)},
        {IWL_PCI_DEVICE(0x2725, 0x0A10, iwlax210_2ax_cfg_ty_gf_a0)},
+       {IWL_PCI_DEVICE(0x2725, 0xE020, iwlax210_2ax_cfg_ty_gf_a0)},
+       {IWL_PCI_DEVICE(0x2725, 0xE024, iwlax210_2ax_cfg_ty_gf_a0)},
+       {IWL_PCI_DEVICE(0x2725, 0x4020, iwlax210_2ax_cfg_ty_gf_a0)},
+       {IWL_PCI_DEVICE(0x2725, 0x6020, iwlax210_2ax_cfg_ty_gf_a0)},
+       {IWL_PCI_DEVICE(0x2725, 0x6024, iwlax210_2ax_cfg_ty_gf_a0)},
        {IWL_PCI_DEVICE(0x2725, 0x00B0, iwlax411_2ax_cfg_sosnj_gf4_a0)},
        {IWL_PCI_DEVICE(0x2726, 0x0070, iwlax201_cfg_snj_hr_b0)},
        {IWL_PCI_DEVICE(0x2726, 0x0074, iwlax201_cfg_snj_hr_b0)},
index d2e69ad..2fffbbc 100644 (file)
@@ -2156,18 +2156,36 @@ static int iwl_trans_pcie_read_mem(struct iwl_trans *trans, u32 addr,
                                   void *buf, int dwords)
 {
        unsigned long flags;
-       int offs, ret = 0;
+       int offs = 0;
        u32 *vals = buf;
 
-       if (iwl_trans_grab_nic_access(trans, &flags)) {
-               iwl_write32(trans, HBUS_TARG_MEM_RADDR, addr);
-               for (offs = 0; offs < dwords; offs++)
-                       vals[offs] = iwl_read32(trans, HBUS_TARG_MEM_RDAT);
-               iwl_trans_release_nic_access(trans, &flags);
-       } else {
-               ret = -EBUSY;
+       while (offs < dwords) {
+               /* limit the time we spin here under lock to 1/2s */
+               ktime_t timeout = ktime_add_us(ktime_get(), 500 * USEC_PER_MSEC);
+
+               if (iwl_trans_grab_nic_access(trans, &flags)) {
+                       iwl_write32(trans, HBUS_TARG_MEM_RADDR,
+                                   addr + 4 * offs);
+
+                       while (offs < dwords) {
+                               vals[offs] = iwl_read32(trans,
+                                                       HBUS_TARG_MEM_RDAT);
+                               offs++;
+
+                               /* calling ktime_get is expensive so
+                                * do it once in 128 reads
+                                */
+                               if (offs % 128 == 0 && ktime_after(ktime_get(),
+                                                                  timeout))
+                                       break;
+                       }
+                       iwl_trans_release_nic_access(trans, &flags);
+               } else {
+                       return -EBUSY;
+               }
        }
-       return ret;
+
+       return 0;
 }
 
 static int iwl_trans_pcie_write_mem(struct iwl_trans *trans, u32 addr,
index 7d3f0a2..f1ae9ff 100644 (file)
@@ -1020,8 +1020,6 @@ void mt76u_stop_tx(struct mt76_dev *dev)
 {
        int ret;
 
-       mt76_worker_disable(&dev->tx_worker);
-
        ret = wait_event_timeout(dev->tx_wait, !mt76_has_tx_pending(&dev->phy),
                                 HZ / 5);
        if (!ret) {
@@ -1040,6 +1038,8 @@ void mt76u_stop_tx(struct mt76_dev *dev)
                                usb_kill_urb(q->entry[j].urb);
                }
 
+               mt76_worker_disable(&dev->tx_worker);
+
                /* On device removal we maight queue skb's, but mt76u_tx_kick()
                 * will fail to submit urb, cleanup those skb's manually.
                 */
@@ -1048,18 +1048,19 @@ void mt76u_stop_tx(struct mt76_dev *dev)
                        if (!q)
                                continue;
 
-                       entry = q->entry[q->tail];
-                       q->entry[q->tail].done = false;
-
-                       mt76_queue_tx_complete(dev, q, &entry);
+                       while (q->queued > 0) {
+                               entry = q->entry[q->tail];
+                               q->entry[q->tail].done = false;
+                               mt76_queue_tx_complete(dev, q, &entry);
+                       }
                }
+
+               mt76_worker_enable(&dev->tx_worker);
        }
 
        cancel_work_sync(&dev->usb.stat_work);
        clear_bit(MT76_READING_STATS, &dev->phy.state);
 
-       mt76_worker_enable(&dev->tx_worker);
-
        mt76_tx_status_check(dev, NULL, true);
 }
 EXPORT_SYMBOL_GPL(mt76u_stop_tx);
index 3852c4f..efbba9c 100644 (file)
@@ -147,6 +147,8 @@ static int rtw_debugfs_copy_from_user(char tmp[], int size,
 {
        int tmp_len;
 
+       memset(tmp, 0, size);
+
        if (count < num)
                return -EFAULT;
 
index 042015b..b2fd878 100644 (file)
@@ -1482,7 +1482,7 @@ static bool rtw_fw_dump_check_size(struct rtw_dev *rtwdev,
 int rtw_fw_dump_fifo(struct rtw_dev *rtwdev, u8 fifo_sel, u32 addr, u32 size,
                     u32 *buffer)
 {
-       if (!rtwdev->chip->fw_fifo_addr) {
+       if (!rtwdev->chip->fw_fifo_addr[0]) {
                rtw_dbg(rtwdev, RTW_DBG_FW, "chip not support dump fw fifo\n");
                return -ENOTSUPP;
        }
index dc99528..d0a3bd9 100644 (file)
@@ -26,8 +26,8 @@ struct s3fwrn5_i2c_phy {
        struct i2c_client *i2c_dev;
        struct nci_dev *ndev;
 
-       unsigned int gpio_en;
-       unsigned int gpio_fw_wake;
+       int gpio_en;
+       int gpio_fw_wake;
 
        struct mutex mutex;
 
index 9b01afc..9a270e4 100644 (file)
@@ -2929,7 +2929,7 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
 static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
                                struct nvme_effects_log **log)
 {
-       struct nvme_cel *cel = xa_load(&ctrl->cels, csi);
+       struct nvme_effects_log *cel = xa_load(&ctrl->cels, csi);
        int ret;
 
        if (cel)
@@ -2940,16 +2940,15 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
                return -ENOMEM;
 
        ret = nvme_get_log(ctrl, 0x00, NVME_LOG_CMD_EFFECTS, 0, csi,
-                       &cel->log, sizeof(cel->log), 0);
+                       cel, sizeof(*cel), 0);
        if (ret) {
                kfree(cel);
                return ret;
        }
 
-       cel->csi = csi;
-       xa_store(&ctrl->cels, cel->csi, cel, GFP_KERNEL);
+       xa_store(&ctrl->cels, csi, cel, GFP_KERNEL);
 out:
-       *log = &cel->log;
+       *log = cel;
        return 0;
 }
 
@@ -4374,6 +4373,19 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
 
+static void nvme_free_cels(struct nvme_ctrl *ctrl)
+{
+       struct nvme_effects_log *cel;
+       unsigned long i;
+
+       xa_for_each (&ctrl->cels, i, cel) {
+               xa_erase(&ctrl->cels, i);
+               kfree(cel);
+       }
+
+       xa_destroy(&ctrl->cels);
+}
+
 static void nvme_free_ctrl(struct device *dev)
 {
        struct nvme_ctrl *ctrl =
@@ -4383,8 +4395,7 @@ static void nvme_free_ctrl(struct device *dev)
        if (!subsys || ctrl->instance != subsys->instance)
                ida_simple_remove(&nvme_instance_ida, ctrl->instance);
 
-       xa_destroy(&ctrl->cels);
-
+       nvme_free_cels(ctrl);
        nvme_mpath_uninit(ctrl);
        __free_page(ctrl->discard_page);
 
index bc330bf..567f7ad 100644 (file)
@@ -226,12 +226,6 @@ struct nvme_fault_inject {
 #endif
 };
 
-struct nvme_cel {
-       struct list_head        entry;
-       struct nvme_effects_log log;
-       u8                      csi;
-};
-
 struct nvme_ctrl {
        bool comp_seen;
        enum nvme_ctrl_state state;
index 0578ff2..3be3524 100644 (file)
@@ -292,9 +292,21 @@ static void nvme_dbbuf_init(struct nvme_dev *dev,
        nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)];
 }
 
+static void nvme_dbbuf_free(struct nvme_queue *nvmeq)
+{
+       if (!nvmeq->qid)
+               return;
+
+       nvmeq->dbbuf_sq_db = NULL;
+       nvmeq->dbbuf_cq_db = NULL;
+       nvmeq->dbbuf_sq_ei = NULL;
+       nvmeq->dbbuf_cq_ei = NULL;
+}
+
 static void nvme_dbbuf_set(struct nvme_dev *dev)
 {
        struct nvme_command c;
+       unsigned int i;
 
        if (!dev->dbbuf_dbs)
                return;
@@ -308,6 +320,9 @@ static void nvme_dbbuf_set(struct nvme_dev *dev)
                dev_warn(dev->ctrl.device, "unable to set dbbuf\n");
                /* Free memory and continue on */
                nvme_dbbuf_dma_free(dev);
+
+               for (i = 1; i <= dev->online_queues; i++)
+                       nvme_dbbuf_free(&dev->queues[i]);
        }
 }
 
index 456dc4a..e63457e 100644 (file)
@@ -270,11 +270,6 @@ static void usb_init_common_7211b0(struct brcm_usb_init_params *params)
        reg |= params->mode << USB_PHY_UTMI_CTL_1_PHY_MODE_SHIFT;
        brcm_usb_writel(reg, usb_phy + USB_PHY_UTMI_CTL_1);
 
-       /* Fix the incorrect default */
-       reg = brcm_usb_readl(ctrl + USB_CTRL_SETUP);
-       reg &= ~USB_CTRL_SETUP_tca_drv_sel_MASK;
-       brcm_usb_writel(reg, ctrl + USB_CTRL_SETUP);
-
        usb_init_common(params);
 
        /*
index 58ec695..62c2476 100644 (file)
@@ -4,7 +4,7 @@
 #
 config PHY_INTEL_KEEMBAY_EMMC
        tristate "Intel Keem Bay EMMC PHY driver"
-       depends on (OF && ARM64) || COMPILE_TEST
+       depends on ARCH_KEEMBAY || COMPILE_TEST
        depends on HAS_IOMEM
        select GENERIC_PHY
        select REGMAP_MMIO
index 50c5e93..c8126bd 100644 (file)
@@ -12,7 +12,7 @@ config PHY_MTK_TPHY
          it supports multiple usb2.0, usb3.0 ports, PCIe and
          SATA, and meanwhile supports two version T-PHY which have
          different banks layout, the T-PHY with shared banks between
-         multi-ports is first version, otherwise is second veriosn,
+         multi-ports is first version, otherwise is second version,
          so you can easily distinguish them by banks layout.
 
 config PHY_MTK_UFS
index 089db0d..442522b 100644 (file)
@@ -364,7 +364,8 @@ static int cpcap_usb_init_irq(struct platform_device *pdev,
 
        error = devm_request_threaded_irq(ddata->dev, irq, NULL,
                                          cpcap_phy_irq_thread,
-                                         IRQF_SHARED,
+                                         IRQF_SHARED |
+                                         IRQF_ONESHOT,
                                          name, ddata);
        if (error) {
                dev_err(ddata->dev, "could not get irq %s: %i\n",
index 928db51..7f6fcb8 100644 (file)
@@ -87,7 +87,7 @@ config PHY_QCOM_USB_HSIC
 
 config PHY_QCOM_USB_HS_28NM
        tristate "Qualcomm 28nm High-Speed PHY"
-       depends on ARCH_QCOM || COMPILE_TEST
+       depends on OF && (ARCH_QCOM || COMPILE_TEST)
        depends on EXTCON || !EXTCON # if EXTCON=m, this cannot be built-in
        select GENERIC_PHY
        help
@@ -98,7 +98,7 @@ config PHY_QCOM_USB_HS_28NM
 
 config PHY_QCOM_USB_SS
        tristate "Qualcomm USB Super-Speed PHY driver"
-       depends on ARCH_QCOM || COMPILE_TEST
+       depends on OF && (ARCH_QCOM || COMPILE_TEST)
        depends on EXTCON || !EXTCON # if EXTCON=m, this cannot be built-in
        select GENERIC_PHY
        help
index 5d33ad4..0cda168 100644 (file)
@@ -3926,7 +3926,7 @@ static int qcom_qmp_phy_probe(struct platform_device *pdev)
        struct phy_provider *phy_provider;
        void __iomem *serdes;
        void __iomem *usb_serdes;
-       void __iomem *dp_serdes;
+       void __iomem *dp_serdes = NULL;
        const struct qmp_phy_combo_cfg *combo_cfg = NULL;
        const struct qmp_phy_cfg *cfg = NULL;
        const struct qmp_phy_cfg *usb_cfg = NULL;
index de4a46f..ad88d74 100644 (file)
@@ -1242,6 +1242,7 @@ power_down:
 reset:
        reset_control_assert(padctl->rst);
 remove:
+       platform_set_drvdata(pdev, NULL);
        soc->ops->remove(padctl);
        return err;
 }
index d6b8495..9c65d56 100644 (file)
@@ -286,14 +286,76 @@ int aspeed_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int function,
 static bool aspeed_expr_is_gpio(const struct aspeed_sig_expr *expr)
 {
        /*
-        * The signal type is GPIO if the signal name has "GPI" as a prefix.
-        * strncmp (rather than strcmp) is used to implement the prefix
-        * requirement.
+        * We need to differentiate between GPIO and non-GPIO signals to
+        * implement the gpio_request_enable() interface. For better or worse
+        * the ASPEED pinctrl driver uses the expression names to determine
+        * whether an expression will mux a pin for GPIO.
         *
-        * expr->signal might look like "GPIOB1" in the GPIO case.
-        * expr->signal might look like "GPIT0" in the GPI case.
+        * Generally we have the following - A GPIO such as B1 has:
+        *
+        *    - expr->signal set to "GPIOB1"
+        *    - expr->function set to "GPIOB1"
+        *
+        * Using this fact we can determine whether the provided expression is
+        * a GPIO expression by testing the signal name for the string prefix
+        * "GPIO".
+        *
+        * However, some GPIOs are input-only, and the ASPEED datasheets name
+        * them differently. An input-only GPIO such as T0 has:
+        *
+        *    - expr->signal set to "GPIT0"
+        *    - expr->function set to "GPIT0"
+        *
+        * It's tempting to generalise the prefix test from "GPIO" to "GPI" to
+        * account for both GPIOs and GPIs, but in doing so we run aground on
+        * another feature:
+        *
+        * Some pins in the ASPEED BMC SoCs have a "pass-through" GPIO
+        * function where the input state of one pin is replicated as the
+        * output state of another (as if they were shorted together - a mux
+        * configuration that is typically enabled by hardware strapping).
+        * This feature allows the BMC to pass e.g. power button state through
+        * to the host while the BMC is yet to boot, but take control of the
+        * button state once the BMC has booted by muxing each pin as a
+        * separate, pin-specific GPIO.
+        *
+        * Conceptually this pass-through mode is a form of GPIO and is named
+        * as such in the datasheets, e.g. "GPID0". This naming similarity
+        * trips us up with the simple GPI-prefixed-signal-name scheme
+        * discussed above, as the pass-through configuration is not what we
+        * want when muxing a pin as GPIO for the GPIO subsystem.
+        *
+        * On e.g. the AST2400, a pass-through function "GPID0" is grouped on
+        * balls A18 and D16, where we have:
+        *
+        *    For ball A18:
+        *    - expr->signal set to "GPID0IN"
+        *    - expr->function set to "GPID0"
+        *
+        *    For ball D16:
+        *    - expr->signal set to "GPID0OUT"
+        *    - expr->function set to "GPID0"
+        *
+        * By contrast, the pin-specific GPIO expressions for the same pins are
+        * as follows:
+        *
+        *    For ball A18:
+        *    - expr->signal looks like "GPIOD0"
+        *    - expr->function looks like "GPIOD0"
+        *
+        *    For ball D16:
+        *    - expr->signal looks like "GPIOD1"
+        *    - expr->function looks like "GPIOD1"
+        *
+        * Testing both the signal _and_ function names gives us the means
+        * differentiate the pass-through GPIO pinmux configuration from the
+        * pin-specific configuration that the GPIO subsystem is after: An
+        * expression is a pin-specific (non-pass-through) GPIO configuration
+        * if the signal prefix is "GPI" and the signal name matches the
+        * function name.
         */
-       return strncmp(expr->signal, "GPI", 3) == 0;
+       return !strncmp(expr->signal, "GPI", 3) &&
+                       !strcmp(expr->signal, expr->function);
 }
 
 static bool aspeed_gpio_in_exprs(const struct aspeed_sig_expr **exprs)
index f86739e..dba5875 100644 (file)
@@ -452,10 +452,11 @@ struct aspeed_sig_desc {
  * evaluation of the descriptors.
  *
  * @signal: The signal name for the priority level on the pin. If the signal
- *          type is GPIO, then the signal name must begin with the string
- *          "GPIO", e.g. GPIOA0, GPIOT4 etc.
+ *          type is GPIO, then the signal name must begin with the
+ *          prefix "GPI", e.g. GPIOA0, GPIT0 etc.
  * @function: The name of the function the signal participates in for the
- *            associated expression
+ *            associated expression. For pin-specific GPIO, the function
+ *            name must match the signal name.
  * @ndescs: The number of signal descriptors in the expression
  * @descs: Pointer to an array of signal descriptors that comprise the
  *         function expression
index d49aab3..394a421 100644 (file)
@@ -1049,7 +1049,6 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev,
                        break;
                case PIN_CONFIG_INPUT_DEBOUNCE:
                        debounce = readl(db_reg);
-                       debounce &= ~BYT_DEBOUNCE_PULSE_MASK;
 
                        if (arg)
                                conf |= BYT_DEBOUNCE_EN;
@@ -1058,24 +1057,31 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev,
 
                        switch (arg) {
                        case 375:
+                               debounce &= ~BYT_DEBOUNCE_PULSE_MASK;
                                debounce |= BYT_DEBOUNCE_PULSE_375US;
                                break;
                        case 750:
+                               debounce &= ~BYT_DEBOUNCE_PULSE_MASK;
                                debounce |= BYT_DEBOUNCE_PULSE_750US;
                                break;
                        case 1500:
+                               debounce &= ~BYT_DEBOUNCE_PULSE_MASK;
                                debounce |= BYT_DEBOUNCE_PULSE_1500US;
                                break;
                        case 3000:
+                               debounce &= ~BYT_DEBOUNCE_PULSE_MASK;
                                debounce |= BYT_DEBOUNCE_PULSE_3MS;
                                break;
                        case 6000:
+                               debounce &= ~BYT_DEBOUNCE_PULSE_MASK;
                                debounce |= BYT_DEBOUNCE_PULSE_6MS;
                                break;
                        case 12000:
+                               debounce &= ~BYT_DEBOUNCE_PULSE_MASK;
                                debounce |= BYT_DEBOUNCE_PULSE_12MS;
                                break;
                        case 24000:
+                               debounce &= ~BYT_DEBOUNCE_PULSE_MASK;
                                debounce |= BYT_DEBOUNCE_PULSE_24MS;
                                break;
                        default:
index 1c10ab1..b6ef191 100644 (file)
@@ -442,8 +442,8 @@ static void intel_gpio_set_gpio_mode(void __iomem *padcfg0)
        value |= PADCFG0_PMODE_GPIO;
 
        /* Disable input and output buffers */
-       value &= ~PADCFG0_GPIORXDIS;
-       value &= ~PADCFG0_GPIOTXDIS;
+       value |= PADCFG0_GPIORXDIS;
+       value |= PADCFG0_GPIOTXDIS;
 
        /* Disable SCI/SMI/NMI generation */
        value &= ~(PADCFG0_GPIROUTIOXAPIC | PADCFG0_GPIROUTSCI);
index 9bd0e8e..ec435b7 100644 (file)
@@ -16,7 +16,7 @@
 
 #define JSL_PAD_OWN    0x020
 #define JSL_PADCFGLOCK 0x080
-#define JSL_HOSTSW_OWN 0x0b0
+#define JSL_HOSTSW_OWN 0x0c0
 #define JSL_GPI_IS     0x100
 #define JSL_GPI_IE     0x120
 
@@ -65,252 +65,263 @@ static const struct pinctrl_pin_desc jsl_pins[] = {
        PINCTRL_PIN(17, "EMMC_CLK"),
        PINCTRL_PIN(18, "EMMC_RESETB"),
        PINCTRL_PIN(19, "A4WP_PRESENT"),
+       /* SPI */
+       PINCTRL_PIN(20, "SPI0_IO_2"),
+       PINCTRL_PIN(21, "SPI0_IO_3"),
+       PINCTRL_PIN(22, "SPI0_MOSI_IO_0"),
+       PINCTRL_PIN(23, "SPI0_MISO_IO_1"),
+       PINCTRL_PIN(24, "SPI0_TPM_CSB"),
+       PINCTRL_PIN(25, "SPI0_FLASH_0_CSB"),
+       PINCTRL_PIN(26, "SPI0_FLASH_1_CSB"),
+       PINCTRL_PIN(27, "SPI0_CLK"),
+       PINCTRL_PIN(28, "SPI0_CLK_LOOPBK"),
        /* GPP_B */
-       PINCTRL_PIN(20, "CORE_VID_0"),
-       PINCTRL_PIN(21, "CORE_VID_1"),
-       PINCTRL_PIN(22, "VRALERTB"),
-       PINCTRL_PIN(23, "CPU_GP_2"),
-       PINCTRL_PIN(24, "CPU_GP_3"),
-       PINCTRL_PIN(25, "SRCCLKREQB_0"),
-       PINCTRL_PIN(26, "SRCCLKREQB_1"),
-       PINCTRL_PIN(27, "SRCCLKREQB_2"),
-       PINCTRL_PIN(28, "SRCCLKREQB_3"),
-       PINCTRL_PIN(29, "SRCCLKREQB_4"),
-       PINCTRL_PIN(30, "SRCCLKREQB_5"),
-       PINCTRL_PIN(31, "PMCALERTB"),
-       PINCTRL_PIN(32, "SLP_S0B"),
-       PINCTRL_PIN(33, "PLTRSTB"),
-       PINCTRL_PIN(34, "SPKR"),
-       PINCTRL_PIN(35, "GSPI0_CS0B"),
-       PINCTRL_PIN(36, "GSPI0_CLK"),
-       PINCTRL_PIN(37, "GSPI0_MISO"),
-       PINCTRL_PIN(38, "GSPI0_MOSI"),
-       PINCTRL_PIN(39, "GSPI1_CS0B"),
-       PINCTRL_PIN(40, "GSPI1_CLK"),
-       PINCTRL_PIN(41, "GSPI1_MISO"),
-       PINCTRL_PIN(42, "GSPI1_MOSI"),
-       PINCTRL_PIN(43, "DDSP_HPD_A"),
-       PINCTRL_PIN(44, "GSPI0_CLK_LOOPBK"),
-       PINCTRL_PIN(45, "GSPI1_CLK_LOOPBK"),
+       PINCTRL_PIN(29, "CORE_VID_0"),
+       PINCTRL_PIN(30, "CORE_VID_1"),
+       PINCTRL_PIN(31, "VRALERTB"),
+       PINCTRL_PIN(32, "CPU_GP_2"),
+       PINCTRL_PIN(33, "CPU_GP_3"),
+       PINCTRL_PIN(34, "SRCCLKREQB_0"),
+       PINCTRL_PIN(35, "SRCCLKREQB_1"),
+       PINCTRL_PIN(36, "SRCCLKREQB_2"),
+       PINCTRL_PIN(37, "SRCCLKREQB_3"),
+       PINCTRL_PIN(38, "SRCCLKREQB_4"),
+       PINCTRL_PIN(39, "SRCCLKREQB_5"),
+       PINCTRL_PIN(40, "PMCALERTB"),
+       PINCTRL_PIN(41, "SLP_S0B"),
+       PINCTRL_PIN(42, "PLTRSTB"),
+       PINCTRL_PIN(43, "SPKR"),
+       PINCTRL_PIN(44, "GSPI0_CS0B"),
+       PINCTRL_PIN(45, "GSPI0_CLK"),
+       PINCTRL_PIN(46, "GSPI0_MISO"),
+       PINCTRL_PIN(47, "GSPI0_MOSI"),
+       PINCTRL_PIN(48, "GSPI1_CS0B"),
+       PINCTRL_PIN(49, "GSPI1_CLK"),
+       PINCTRL_PIN(50, "GSPI1_MISO"),
+       PINCTRL_PIN(51, "GSPI1_MOSI"),
+       PINCTRL_PIN(52, "DDSP_HPD_A"),
+       PINCTRL_PIN(53, "GSPI0_CLK_LOOPBK"),
+       PINCTRL_PIN(54, "GSPI1_CLK_LOOPBK"),
        /* GPP_A */
-       PINCTRL_PIN(46, "ESPI_IO_0"),
-       PINCTRL_PIN(47, "ESPI_IO_1"),
-       PINCTRL_PIN(48, "ESPI_IO_2"),
-       PINCTRL_PIN(49, "ESPI_IO_3"),
-       PINCTRL_PIN(50, "ESPI_CSB"),
-       PINCTRL_PIN(51, "ESPI_CLK"),
-       PINCTRL_PIN(52, "ESPI_RESETB"),
-       PINCTRL_PIN(53, "SMBCLK"),
-       PINCTRL_PIN(54, "SMBDATA"),
-       PINCTRL_PIN(55, "SMBALERTB"),
-       PINCTRL_PIN(56, "CPU_GP_0"),
-       PINCTRL_PIN(57, "CPU_GP_1"),
-       PINCTRL_PIN(58, "USB2_OCB_1"),
-       PINCTRL_PIN(59, "USB2_OCB_2"),
-       PINCTRL_PIN(60, "USB2_OCB_3"),
-       PINCTRL_PIN(61, "DDSP_HPD_A_TIME_SYNC_0"),
-       PINCTRL_PIN(62, "DDSP_HPD_B"),
-       PINCTRL_PIN(63, "DDSP_HPD_C"),
-       PINCTRL_PIN(64, "USB2_OCB_0"),
-       PINCTRL_PIN(65, "PCHHOTB"),
-       PINCTRL_PIN(66, "ESPI_CLK_LOOPBK"),
+       PINCTRL_PIN(55, "ESPI_IO_0"),
+       PINCTRL_PIN(56, "ESPI_IO_1"),
+       PINCTRL_PIN(57, "ESPI_IO_2"),
+       PINCTRL_PIN(58, "ESPI_IO_3"),
+       PINCTRL_PIN(59, "ESPI_CSB"),
+       PINCTRL_PIN(60, "ESPI_CLK"),
+       PINCTRL_PIN(61, "ESPI_RESETB"),
+       PINCTRL_PIN(62, "SMBCLK"),
+       PINCTRL_PIN(63, "SMBDATA"),
+       PINCTRL_PIN(64, "SMBALERTB"),
+       PINCTRL_PIN(65, "CPU_GP_0"),
+       PINCTRL_PIN(66, "CPU_GP_1"),
+       PINCTRL_PIN(67, "USB2_OCB_1"),
+       PINCTRL_PIN(68, "USB2_OCB_2"),
+       PINCTRL_PIN(69, "USB2_OCB_3"),
+       PINCTRL_PIN(70, "DDSP_HPD_A_TIME_SYNC_0"),
+       PINCTRL_PIN(71, "DDSP_HPD_B"),
+       PINCTRL_PIN(72, "DDSP_HPD_C"),
+       PINCTRL_PIN(73, "USB2_OCB_0"),
+       PINCTRL_PIN(74, "PCHHOTB"),
+       PINCTRL_PIN(75, "ESPI_CLK_LOOPBK"),
        /* GPP_S */
-       PINCTRL_PIN(67, "SNDW1_CLK"),
-       PINCTRL_PIN(68, "SNDW1_DATA"),
-       PINCTRL_PIN(69, "SNDW2_CLK"),
-       PINCTRL_PIN(70, "SNDW2_DATA"),
-       PINCTRL_PIN(71, "SNDW1_CLK"),
-       PINCTRL_PIN(72, "SNDW1_DATA"),
-       PINCTRL_PIN(73, "SNDW4_CLK_DMIC_CLK_0"),
-       PINCTRL_PIN(74, "SNDW4_DATA_DMIC_DATA_0"),
+       PINCTRL_PIN(76, "SNDW1_CLK"),
+       PINCTRL_PIN(77, "SNDW1_DATA"),
+       PINCTRL_PIN(78, "SNDW2_CLK"),
+       PINCTRL_PIN(79, "SNDW2_DATA"),
+       PINCTRL_PIN(80, "SNDW1_CLK"),
+       PINCTRL_PIN(81, "SNDW1_DATA"),
+       PINCTRL_PIN(82, "SNDW4_CLK_DMIC_CLK_0"),
+       PINCTRL_PIN(83, "SNDW4_DATA_DMIC_DATA_0"),
        /* GPP_R */
-       PINCTRL_PIN(75, "HDA_BCLK"),
-       PINCTRL_PIN(76, "HDA_SYNC"),
-       PINCTRL_PIN(77, "HDA_SDO"),
-       PINCTRL_PIN(78, "HDA_SDI_0"),
-       PINCTRL_PIN(79, "HDA_RSTB"),
-       PINCTRL_PIN(80, "HDA_SDI_1"),
-       PINCTRL_PIN(81, "I2S1_SFRM"),
-       PINCTRL_PIN(82, "I2S1_TXD"),
+       PINCTRL_PIN(84, "HDA_BCLK"),
+       PINCTRL_PIN(85, "HDA_SYNC"),
+       PINCTRL_PIN(86, "HDA_SDO"),
+       PINCTRL_PIN(87, "HDA_SDI_0"),
+       PINCTRL_PIN(88, "HDA_RSTB"),
+       PINCTRL_PIN(89, "HDA_SDI_1"),
+       PINCTRL_PIN(90, "I2S1_SFRM"),
+       PINCTRL_PIN(91, "I2S1_TXD"),
        /* GPP_H */
-       PINCTRL_PIN(83, "GPPC_H_0"),
-       PINCTRL_PIN(84, "SD_PWR_EN_B"),
-       PINCTRL_PIN(85, "MODEM_CLKREQ"),
-       PINCTRL_PIN(86, "SX_EXIT_HOLDOFFB"),
-       PINCTRL_PIN(87, "I2C2_SDA"),
-       PINCTRL_PIN(88, "I2C2_SCL"),
-       PINCTRL_PIN(89, "I2C3_SDA"),
-       PINCTRL_PIN(90, "I2C3_SCL"),
-       PINCTRL_PIN(91, "I2C4_SDA"),
-       PINCTRL_PIN(92, "I2C4_SCL"),
-       PINCTRL_PIN(93, "CPU_VCCIO_PWR_GATEB"),
-       PINCTRL_PIN(94, "I2S2_SCLK"),
-       PINCTRL_PIN(95, "I2S2_SFRM"),
-       PINCTRL_PIN(96, "I2S2_TXD"),
-       PINCTRL_PIN(97, "I2S2_RXD"),
-       PINCTRL_PIN(98, "I2S1_SCLK"),
-       PINCTRL_PIN(99, "GPPC_H_16"),
-       PINCTRL_PIN(100, "GPPC_H_17"),
-       PINCTRL_PIN(101, "GPPC_H_18"),
-       PINCTRL_PIN(102, "GPPC_H_19"),
-       PINCTRL_PIN(103, "GPPC_H_20"),
-       PINCTRL_PIN(104, "GPPC_H_21"),
-       PINCTRL_PIN(105, "GPPC_H_22"),
-       PINCTRL_PIN(106, "GPPC_H_23"),
+       PINCTRL_PIN(92, "GPPC_H_0"),
+       PINCTRL_PIN(93, "SD_PWR_EN_B"),
+       PINCTRL_PIN(94, "MODEM_CLKREQ"),
+       PINCTRL_PIN(95, "SX_EXIT_HOLDOFFB"),
+       PINCTRL_PIN(96, "I2C2_SDA"),
+       PINCTRL_PIN(97, "I2C2_SCL"),
+       PINCTRL_PIN(98, "I2C3_SDA"),
+       PINCTRL_PIN(99, "I2C3_SCL"),
+       PINCTRL_PIN(100, "I2C4_SDA"),
+       PINCTRL_PIN(101, "I2C4_SCL"),
+       PINCTRL_PIN(102, "CPU_VCCIO_PWR_GATEB"),
+       PINCTRL_PIN(103, "I2S2_SCLK"),
+       PINCTRL_PIN(104, "I2S2_SFRM"),
+       PINCTRL_PIN(105, "I2S2_TXD"),
+       PINCTRL_PIN(106, "I2S2_RXD"),
+       PINCTRL_PIN(107, "I2S1_SCLK"),
+       PINCTRL_PIN(108, "GPPC_H_16"),
+       PINCTRL_PIN(109, "GPPC_H_17"),
+       PINCTRL_PIN(110, "GPPC_H_18"),
+       PINCTRL_PIN(111, "GPPC_H_19"),
+       PINCTRL_PIN(112, "GPPC_H_20"),
+       PINCTRL_PIN(113, "GPPC_H_21"),
+       PINCTRL_PIN(114, "GPPC_H_22"),
+       PINCTRL_PIN(115, "GPPC_H_23"),
        /* GPP_D */
-       PINCTRL_PIN(107, "SPI1_CSB"),
-       PINCTRL_PIN(108, "SPI1_CLK"),
-       PINCTRL_PIN(109, "SPI1_MISO_IO_1"),
-       PINCTRL_PIN(110, "SPI1_MOSI_IO_0"),
-       PINCTRL_PIN(111, "ISH_I2C0_SDA"),
-       PINCTRL_PIN(112, "ISH_I2C0_SCL"),
-       PINCTRL_PIN(113, "ISH_I2C1_SDA"),
-       PINCTRL_PIN(114, "ISH_I2C1_SCL"),
-       PINCTRL_PIN(115, "ISH_SPI_CSB"),
-       PINCTRL_PIN(116, "ISH_SPI_CLK"),
-       PINCTRL_PIN(117, "ISH_SPI_MISO"),
-       PINCTRL_PIN(118, "ISH_SPI_MOSI"),
-       PINCTRL_PIN(119, "ISH_UART0_RXD"),
-       PINCTRL_PIN(120, "ISH_UART0_TXD"),
-       PINCTRL_PIN(121, "ISH_UART0_RTSB"),
-       PINCTRL_PIN(122, "ISH_UART0_CTSB"),
-       PINCTRL_PIN(123, "SPI1_IO_2"),
-       PINCTRL_PIN(124, "SPI1_IO_3"),
-       PINCTRL_PIN(125, "I2S_MCLK"),
-       PINCTRL_PIN(126, "CNV_MFUART2_RXD"),
-       PINCTRL_PIN(127, "CNV_MFUART2_TXD"),
-       PINCTRL_PIN(128, "CNV_PA_BLANKING"),
-       PINCTRL_PIN(129, "I2C5_SDA"),
-       PINCTRL_PIN(130, "I2C5_SCL"),
-       PINCTRL_PIN(131, "GSPI2_CLK_LOOPBK"),
-       PINCTRL_PIN(132, "SPI1_CLK_LOOPBK"),
+       PINCTRL_PIN(116, "SPI1_CSB"),
+       PINCTRL_PIN(117, "SPI1_CLK"),
+       PINCTRL_PIN(118, "SPI1_MISO_IO_1"),
+       PINCTRL_PIN(119, "SPI1_MOSI_IO_0"),
+       PINCTRL_PIN(120, "ISH_I2C0_SDA"),
+       PINCTRL_PIN(121, "ISH_I2C0_SCL"),
+       PINCTRL_PIN(122, "ISH_I2C1_SDA"),
+       PINCTRL_PIN(123, "ISH_I2C1_SCL"),
+       PINCTRL_PIN(124, "ISH_SPI_CSB"),
+       PINCTRL_PIN(125, "ISH_SPI_CLK"),
+       PINCTRL_PIN(126, "ISH_SPI_MISO"),
+       PINCTRL_PIN(127, "ISH_SPI_MOSI"),
+       PINCTRL_PIN(128, "ISH_UART0_RXD"),
+       PINCTRL_PIN(129, "ISH_UART0_TXD"),
+       PINCTRL_PIN(130, "ISH_UART0_RTSB"),
+       PINCTRL_PIN(131, "ISH_UART0_CTSB"),
+       PINCTRL_PIN(132, "SPI1_IO_2"),
+       PINCTRL_PIN(133, "SPI1_IO_3"),
+       PINCTRL_PIN(134, "I2S_MCLK"),
+       PINCTRL_PIN(135, "CNV_MFUART2_RXD"),
+       PINCTRL_PIN(136, "CNV_MFUART2_TXD"),
+       PINCTRL_PIN(137, "CNV_PA_BLANKING"),
+       PINCTRL_PIN(138, "I2C5_SDA"),
+       PINCTRL_PIN(139, "I2C5_SCL"),
+       PINCTRL_PIN(140, "GSPI2_CLK_LOOPBK"),
+       PINCTRL_PIN(141, "SPI1_CLK_LOOPBK"),
        /* vGPIO */
-       PINCTRL_PIN(133, "CNV_BTEN"),
-       PINCTRL_PIN(134, "CNV_WCEN"),
-       PINCTRL_PIN(135, "CNV_BT_HOST_WAKEB"),
-       PINCTRL_PIN(136, "CNV_BT_IF_SELECT"),
-       PINCTRL_PIN(137, "vCNV_BT_UART_TXD"),
-       PINCTRL_PIN(138, "vCNV_BT_UART_RXD"),
-       PINCTRL_PIN(139, "vCNV_BT_UART_CTS_B"),
-       PINCTRL_PIN(140, "vCNV_BT_UART_RTS_B"),
-       PINCTRL_PIN(141, "vCNV_MFUART1_TXD"),
-       PINCTRL_PIN(142, "vCNV_MFUART1_RXD"),
-       PINCTRL_PIN(143, "vCNV_MFUART1_CTS_B"),
-       PINCTRL_PIN(144, "vCNV_MFUART1_RTS_B"),
-       PINCTRL_PIN(145, "vUART0_TXD"),
-       PINCTRL_PIN(146, "vUART0_RXD"),
-       PINCTRL_PIN(147, "vUART0_CTS_B"),
-       PINCTRL_PIN(148, "vUART0_RTS_B"),
-       PINCTRL_PIN(149, "vISH_UART0_TXD"),
-       PINCTRL_PIN(150, "vISH_UART0_RXD"),
-       PINCTRL_PIN(151, "vISH_UART0_CTS_B"),
-       PINCTRL_PIN(152, "vISH_UART0_RTS_B"),
-       PINCTRL_PIN(153, "vCNV_BT_I2S_BCLK"),
-       PINCTRL_PIN(154, "vCNV_BT_I2S_WS_SYNC"),
-       PINCTRL_PIN(155, "vCNV_BT_I2S_SDO"),
-       PINCTRL_PIN(156, "vCNV_BT_I2S_SDI"),
-       PINCTRL_PIN(157, "vI2S2_SCLK"),
-       PINCTRL_PIN(158, "vI2S2_SFRM"),
-       PINCTRL_PIN(159, "vI2S2_TXD"),
-       PINCTRL_PIN(160, "vI2S2_RXD"),
-       PINCTRL_PIN(161, "vSD3_CD_B"),
+       PINCTRL_PIN(142, "CNV_BTEN"),
+       PINCTRL_PIN(143, "CNV_WCEN"),
+       PINCTRL_PIN(144, "CNV_BT_HOST_WAKEB"),
+       PINCTRL_PIN(145, "CNV_BT_IF_SELECT"),
+       PINCTRL_PIN(146, "vCNV_BT_UART_TXD"),
+       PINCTRL_PIN(147, "vCNV_BT_UART_RXD"),
+       PINCTRL_PIN(148, "vCNV_BT_UART_CTS_B"),
+       PINCTRL_PIN(149, "vCNV_BT_UART_RTS_B"),
+       PINCTRL_PIN(150, "vCNV_MFUART1_TXD"),
+       PINCTRL_PIN(151, "vCNV_MFUART1_RXD"),
+       PINCTRL_PIN(152, "vCNV_MFUART1_CTS_B"),
+       PINCTRL_PIN(153, "vCNV_MFUART1_RTS_B"),
+       PINCTRL_PIN(154, "vUART0_TXD"),
+       PINCTRL_PIN(155, "vUART0_RXD"),
+       PINCTRL_PIN(156, "vUART0_CTS_B"),
+       PINCTRL_PIN(157, "vUART0_RTS_B"),
+       PINCTRL_PIN(158, "vISH_UART0_TXD"),
+       PINCTRL_PIN(159, "vISH_UART0_RXD"),
+       PINCTRL_PIN(160, "vISH_UART0_CTS_B"),
+       PINCTRL_PIN(161, "vISH_UART0_RTS_B"),
+       PINCTRL_PIN(162, "vCNV_BT_I2S_BCLK"),
+       PINCTRL_PIN(163, "vCNV_BT_I2S_WS_SYNC"),
+       PINCTRL_PIN(164, "vCNV_BT_I2S_SDO"),
+       PINCTRL_PIN(165, "vCNV_BT_I2S_SDI"),
+       PINCTRL_PIN(166, "vI2S2_SCLK"),
+       PINCTRL_PIN(167, "vI2S2_SFRM"),
+       PINCTRL_PIN(168, "vI2S2_TXD"),
+       PINCTRL_PIN(169, "vI2S2_RXD"),
+       PINCTRL_PIN(170, "vSD3_CD_B"),
        /* GPP_C */
-       PINCTRL_PIN(162, "GPPC_C_0"),
-       PINCTRL_PIN(163, "GPPC_C_1"),
-       PINCTRL_PIN(164, "GPPC_C_2"),
-       PINCTRL_PIN(165, "GPPC_C_3"),
-       PINCTRL_PIN(166, "GPPC_C_4"),
-       PINCTRL_PIN(167, "GPPC_C_5"),
-       PINCTRL_PIN(168, "SUSWARNB_SUSPWRDNACK"),
-       PINCTRL_PIN(169, "SUSACKB"),
-       PINCTRL_PIN(170, "UART0_RXD"),
-       PINCTRL_PIN(171, "UART0_TXD"),
-       PINCTRL_PIN(172, "UART0_RTSB"),
-       PINCTRL_PIN(173, "UART0_CTSB"),
-       PINCTRL_PIN(174, "UART1_RXD"),
-       PINCTRL_PIN(175, "UART1_TXD"),
-       PINCTRL_PIN(176, "UART1_RTSB"),
-       PINCTRL_PIN(177, "UART1_CTSB"),
-       PINCTRL_PIN(178, "I2C0_SDA"),
-       PINCTRL_PIN(179, "I2C0_SCL"),
-       PINCTRL_PIN(180, "I2C1_SDA"),
-       PINCTRL_PIN(181, "I2C1_SCL"),
-       PINCTRL_PIN(182, "UART2_RXD"),
-       PINCTRL_PIN(183, "UART2_TXD"),
-       PINCTRL_PIN(184, "UART2_RTSB"),
-       PINCTRL_PIN(185, "UART2_CTSB"),
+       PINCTRL_PIN(171, "GPPC_C_0"),
+       PINCTRL_PIN(172, "GPPC_C_1"),
+       PINCTRL_PIN(173, "GPPC_C_2"),
+       PINCTRL_PIN(174, "GPPC_C_3"),
+       PINCTRL_PIN(175, "GPPC_C_4"),
+       PINCTRL_PIN(176, "GPPC_C_5"),
+       PINCTRL_PIN(177, "SUSWARNB_SUSPWRDNACK"),
+       PINCTRL_PIN(178, "SUSACKB"),
+       PINCTRL_PIN(179, "UART0_RXD"),
+       PINCTRL_PIN(180, "UART0_TXD"),
+       PINCTRL_PIN(181, "UART0_RTSB"),
+       PINCTRL_PIN(182, "UART0_CTSB"),
+       PINCTRL_PIN(183, "UART1_RXD"),
+       PINCTRL_PIN(184, "UART1_TXD"),
+       PINCTRL_PIN(185, "UART1_RTSB"),
+       PINCTRL_PIN(186, "UART1_CTSB"),
+       PINCTRL_PIN(187, "I2C0_SDA"),
+       PINCTRL_PIN(188, "I2C0_SCL"),
+       PINCTRL_PIN(189, "I2C1_SDA"),
+       PINCTRL_PIN(190, "I2C1_SCL"),
+       PINCTRL_PIN(191, "UART2_RXD"),
+       PINCTRL_PIN(192, "UART2_TXD"),
+       PINCTRL_PIN(193, "UART2_RTSB"),
+       PINCTRL_PIN(194, "UART2_CTSB"),
        /* HVCMOS */
-       PINCTRL_PIN(186, "L_BKLTEN"),
-       PINCTRL_PIN(187, "L_BKLTCTL"),
-       PINCTRL_PIN(188, "L_VDDEN"),
-       PINCTRL_PIN(189, "SYS_PWROK"),
-       PINCTRL_PIN(190, "SYS_RESETB"),
-       PINCTRL_PIN(191, "MLK_RSTB"),
+       PINCTRL_PIN(195, "L_BKLTEN"),
+       PINCTRL_PIN(196, "L_BKLTCTL"),
+       PINCTRL_PIN(197, "L_VDDEN"),
+       PINCTRL_PIN(198, "SYS_PWROK"),
+       PINCTRL_PIN(199, "SYS_RESETB"),
+       PINCTRL_PIN(200, "MLK_RSTB"),
        /* GPP_E */
-       PINCTRL_PIN(192, "ISH_GP_0"),
-       PINCTRL_PIN(193, "ISH_GP_1"),
-       PINCTRL_PIN(194, "IMGCLKOUT_1"),
-       PINCTRL_PIN(195, "ISH_GP_2"),
-       PINCTRL_PIN(196, "IMGCLKOUT_2"),
-       PINCTRL_PIN(197, "SATA_LEDB"),
-       PINCTRL_PIN(198, "IMGCLKOUT_3"),
-       PINCTRL_PIN(199, "ISH_GP_3"),
-       PINCTRL_PIN(200, "ISH_GP_4"),
-       PINCTRL_PIN(201, "ISH_GP_5"),
-       PINCTRL_PIN(202, "ISH_GP_6"),
-       PINCTRL_PIN(203, "ISH_GP_7"),
-       PINCTRL_PIN(204, "IMGCLKOUT_4"),
-       PINCTRL_PIN(205, "DDPA_CTRLCLK"),
-       PINCTRL_PIN(206, "DDPA_CTRLDATA"),
-       PINCTRL_PIN(207, "DDPB_CTRLCLK"),
-       PINCTRL_PIN(208, "DDPB_CTRLDATA"),
-       PINCTRL_PIN(209, "DDPC_CTRLCLK"),
-       PINCTRL_PIN(210, "DDPC_CTRLDATA"),
-       PINCTRL_PIN(211, "IMGCLKOUT_5"),
-       PINCTRL_PIN(212, "CNV_BRI_DT"),
-       PINCTRL_PIN(213, "CNV_BRI_RSP"),
-       PINCTRL_PIN(214, "CNV_RGI_DT"),
-       PINCTRL_PIN(215, "CNV_RGI_RSP"),
+       PINCTRL_PIN(201, "ISH_GP_0"),
+       PINCTRL_PIN(202, "ISH_GP_1"),
+       PINCTRL_PIN(203, "IMGCLKOUT_1"),
+       PINCTRL_PIN(204, "ISH_GP_2"),
+       PINCTRL_PIN(205, "IMGCLKOUT_2"),
+       PINCTRL_PIN(206, "SATA_LEDB"),
+       PINCTRL_PIN(207, "IMGCLKOUT_3"),
+       PINCTRL_PIN(208, "ISH_GP_3"),
+       PINCTRL_PIN(209, "ISH_GP_4"),
+       PINCTRL_PIN(210, "ISH_GP_5"),
+       PINCTRL_PIN(211, "ISH_GP_6"),
+       PINCTRL_PIN(212, "ISH_GP_7"),
+       PINCTRL_PIN(213, "IMGCLKOUT_4"),
+       PINCTRL_PIN(214, "DDPA_CTRLCLK"),
+       PINCTRL_PIN(215, "DDPA_CTRLDATA"),
+       PINCTRL_PIN(216, "DDPB_CTRLCLK"),
+       PINCTRL_PIN(217, "DDPB_CTRLDATA"),
+       PINCTRL_PIN(218, "DDPC_CTRLCLK"),
+       PINCTRL_PIN(219, "DDPC_CTRLDATA"),
+       PINCTRL_PIN(220, "IMGCLKOUT_5"),
+       PINCTRL_PIN(221, "CNV_BRI_DT"),
+       PINCTRL_PIN(222, "CNV_BRI_RSP"),
+       PINCTRL_PIN(223, "CNV_RGI_DT"),
+       PINCTRL_PIN(224, "CNV_RGI_RSP"),
        /* GPP_G */
-       PINCTRL_PIN(216, "SD3_CMD"),
-       PINCTRL_PIN(217, "SD3_D0"),
-       PINCTRL_PIN(218, "SD3_D1"),
-       PINCTRL_PIN(219, "SD3_D2"),
-       PINCTRL_PIN(220, "SD3_D3"),
-       PINCTRL_PIN(221, "SD3_CDB"),
-       PINCTRL_PIN(222, "SD3_CLK"),
-       PINCTRL_PIN(223, "SD3_WP"),
+       PINCTRL_PIN(225, "SD3_CMD"),
+       PINCTRL_PIN(226, "SD3_D0"),
+       PINCTRL_PIN(227, "SD3_D1"),
+       PINCTRL_PIN(228, "SD3_D2"),
+       PINCTRL_PIN(229, "SD3_D3"),
+       PINCTRL_PIN(230, "SD3_CDB"),
+       PINCTRL_PIN(231, "SD3_CLK"),
+       PINCTRL_PIN(232, "SD3_WP"),
 };
 
 static const struct intel_padgroup jsl_community0_gpps[] = {
        JSL_GPP(0, 0, 19, 320),                         /* GPP_F */
-       JSL_GPP(1, 20, 45, 32),                         /* GPP_B */
-       JSL_GPP(2, 46, 66, 64),                         /* GPP_A */
-       JSL_GPP(3, 67, 74, 96),                         /* GPP_S */
-       JSL_GPP(4, 75, 82, 128),                        /* GPP_R */
+       JSL_GPP(1, 20, 28, INTEL_GPIO_BASE_NOMAP),      /* SPI */
+       JSL_GPP(2, 29, 54, 32),                         /* GPP_B */
+       JSL_GPP(3, 55, 75, 64),                         /* GPP_A */
+       JSL_GPP(4, 76, 83, 96),                         /* GPP_S */
+       JSL_GPP(5, 84, 91, 128),                        /* GPP_R */
 };
 
 static const struct intel_padgroup jsl_community1_gpps[] = {
-       JSL_GPP(0, 83, 106, 160),                       /* GPP_H */
-       JSL_GPP(1, 107, 132, 192),                      /* GPP_D */
-       JSL_GPP(2, 133, 161, 224),                      /* vGPIO */
-       JSL_GPP(3, 162, 185, 256),                      /* GPP_C */
+       JSL_GPP(0, 92, 115, 160),                       /* GPP_H */
+       JSL_GPP(1, 116, 141, 192),                      /* GPP_D */
+       JSL_GPP(2, 142, 170, 224),                      /* vGPIO */
+       JSL_GPP(3, 171, 194, 256),                      /* GPP_C */
 };
 
 static const struct intel_padgroup jsl_community4_gpps[] = {
-       JSL_GPP(0, 186, 191, INTEL_GPIO_BASE_NOMAP),    /* HVCMOS */
-       JSL_GPP(1, 192, 215, 288),                      /* GPP_E */
+       JSL_GPP(0, 195, 200, INTEL_GPIO_BASE_NOMAP),    /* HVCMOS */
+       JSL_GPP(1, 201, 224, 288),                      /* GPP_E */
 };
 
 static const struct intel_padgroup jsl_community5_gpps[] = {
-       JSL_GPP(0, 216, 223, INTEL_GPIO_BASE_ZERO),     /* GPP_G */
+       JSL_GPP(0, 225, 232, INTEL_GPIO_BASE_ZERO),     /* GPP_G */
 };
 
 static const struct intel_community jsl_communities[] = {
-       JSL_COMMUNITY(0, 0, 82, jsl_community0_gpps),
-       JSL_COMMUNITY(1, 83, 185, jsl_community1_gpps),
-       JSL_COMMUNITY(2, 186, 215, jsl_community4_gpps),
-       JSL_COMMUNITY(3, 216, 223, jsl_community5_gpps),
+       JSL_COMMUNITY(0, 0, 91, jsl_community0_gpps),
+       JSL_COMMUNITY(1, 92, 194, jsl_community1_gpps),
+       JSL_COMMUNITY(2, 195, 224, jsl_community4_gpps),
+       JSL_COMMUNITY(3, 225, 232, jsl_community5_gpps),
 };
 
 static const struct intel_pinctrl_soc_data jsl_soc_data = {
@@ -336,7 +347,6 @@ static struct platform_driver jsl_pinctrl_driver = {
                .pm = &jsl_pinctrl_pm_ops,
        },
 };
-
 module_platform_driver(jsl_pinctrl_driver);
 
 MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
index e4ff8da..3ae141e 100644 (file)
@@ -745,6 +745,10 @@ static int mrfld_config_set_pin(struct mrfld_pinctrl *mp, unsigned int pin,
                mask |= BUFCFG_Px_EN_MASK | BUFCFG_PUPD_VAL_MASK;
                bits |= BUFCFG_PU_EN;
 
+               /* Set default strength value in case none is given */
+               if (arg == 1)
+                       arg = 20000;
+
                switch (arg) {
                case 50000:
                        bits |= BUFCFG_PUPD_VAL_50K << BUFCFG_PUPD_VAL_SHIFT;
@@ -765,6 +769,10 @@ static int mrfld_config_set_pin(struct mrfld_pinctrl *mp, unsigned int pin,
                mask |= BUFCFG_Px_EN_MASK | BUFCFG_PUPD_VAL_MASK;
                bits |= BUFCFG_PD_EN;
 
+               /* Set default strength value in case none is given */
+               if (arg == 1)
+                       arg = 20000;
+
                switch (arg) {
                case 50000:
                        bits |= BUFCFG_PUPD_VAL_50K << BUFCFG_PUPD_VAL_SHIFT;
index 4aea3e0..899c16c 100644 (file)
@@ -429,7 +429,6 @@ static int amd_gpio_irq_set_type(struct irq_data *d, unsigned int type)
                pin_reg &= ~BIT(LEVEL_TRIG_OFF);
                pin_reg &= ~(ACTIVE_LEVEL_MASK << ACTIVE_LEVEL_OFF);
                pin_reg |= ACTIVE_HIGH << ACTIVE_LEVEL_OFF;
-               pin_reg |= DB_TYPE_REMOVE_GLITCH << DB_CNTRL_OFF;
                irq_set_handler_locked(d, handle_edge_irq);
                break;
 
@@ -437,7 +436,6 @@ static int amd_gpio_irq_set_type(struct irq_data *d, unsigned int type)
                pin_reg &= ~BIT(LEVEL_TRIG_OFF);
                pin_reg &= ~(ACTIVE_LEVEL_MASK << ACTIVE_LEVEL_OFF);
                pin_reg |= ACTIVE_LOW << ACTIVE_LEVEL_OFF;
-               pin_reg |= DB_TYPE_REMOVE_GLITCH << DB_CNTRL_OFF;
                irq_set_handler_locked(d, handle_edge_irq);
                break;
 
@@ -445,7 +443,6 @@ static int amd_gpio_irq_set_type(struct irq_data *d, unsigned int type)
                pin_reg &= ~BIT(LEVEL_TRIG_OFF);
                pin_reg &= ~(ACTIVE_LEVEL_MASK << ACTIVE_LEVEL_OFF);
                pin_reg |= BOTH_EADGE << ACTIVE_LEVEL_OFF;
-               pin_reg |= DB_TYPE_REMOVE_GLITCH << DB_CNTRL_OFF;
                irq_set_handler_locked(d, handle_edge_irq);
                break;
 
@@ -453,8 +450,6 @@ static int amd_gpio_irq_set_type(struct irq_data *d, unsigned int type)
                pin_reg |= LEVEL_TRIGGER << LEVEL_TRIG_OFF;
                pin_reg &= ~(ACTIVE_LEVEL_MASK << ACTIVE_LEVEL_OFF);
                pin_reg |= ACTIVE_HIGH << ACTIVE_LEVEL_OFF;
-               pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF);
-               pin_reg |= DB_TYPE_PRESERVE_LOW_GLITCH << DB_CNTRL_OFF;
                irq_set_handler_locked(d, handle_level_irq);
                break;
 
@@ -462,8 +457,6 @@ static int amd_gpio_irq_set_type(struct irq_data *d, unsigned int type)
                pin_reg |= LEVEL_TRIGGER << LEVEL_TRIG_OFF;
                pin_reg &= ~(ACTIVE_LEVEL_MASK << ACTIVE_LEVEL_OFF);
                pin_reg |= ACTIVE_LOW << ACTIVE_LEVEL_OFF;
-               pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF);
-               pin_reg |= DB_TYPE_PRESERVE_HIGH_GLITCH << DB_CNTRL_OFF;
                irq_set_handler_locked(d, handle_level_irq);
                break;
 
index 49f4b73..5592a92 100644 (file)
@@ -111,6 +111,7 @@ static const struct key_entry acer_wmi_keymap[] __initconst = {
        {KE_KEY, 0x64, {KEY_SWITCHVIDEOMODE} }, /* Display Switch */
        {KE_IGNORE, 0x81, {KEY_SLEEP} },
        {KE_KEY, 0x82, {KEY_TOUCHPAD_TOGGLE} }, /* Touch Pad Toggle */
+       {KE_IGNORE, 0x84, {KEY_KBDILLUMTOGGLE} }, /* Automatic Keyboard background light toggle */
        {KE_KEY, KEY_TOUCHPAD_ON, {KEY_TOUCHPAD_ON} },
        {KE_KEY, KEY_TOUCHPAD_OFF, {KEY_TOUCHPAD_OFF} },
        {KE_IGNORE, 0x83, {KEY_TOUCHPAD_TOGGLE} },
index f5901b0..0419c80 100644 (file)
@@ -206,6 +206,12 @@ static const struct dmi_system_id dmi_switches_allow_list[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "HP Stream x360 Convertible PC 11"),
                },
        },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion 13 x360 PC"),
+               },
+       },
        {} /* Array terminator */
 };
 
index e381067..c404706 100644 (file)
@@ -3218,7 +3218,14 @@ static int hotkey_init_tablet_mode(void)
 
                in_tablet_mode = hotkey_gmms_get_tablet_mode(res,
                                                             &has_tablet_mode);
-               if (has_tablet_mode)
+               /*
+                * The Yoga 11e series has 2 accelerometers described by a
+                * BOSC0200 ACPI node. This setup relies on a Windows service
+                * which calls special ACPI methods on this node to report
+                * the laptop/tent/tablet mode to the EC. The bmc150 iio driver
+                * does not support this, so skip the hotkey on these models.
+                */
+               if (has_tablet_mode && !acpi_dev_present("BOSC0200", "1", -1))
                        tp_features.hotkey_tablet = TP_HOTKEY_TABLET_USES_GMMS;
                type = "GMMS";
        } else if (acpi_evalf(hkey_handle, &res, "MHKG", "qd")) {
@@ -4228,6 +4235,7 @@ static void hotkey_resume(void)
                pr_err("error while attempting to reset the event firmware interface\n");
 
        tpacpi_send_radiosw_update();
+       tpacpi_input_send_tabletsw();
        hotkey_tablet_mode_notify_change();
        hotkey_wakeup_reason_notify_change();
        hotkey_wakeup_hotunplug_complete_notify_change();
@@ -8776,6 +8784,8 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = {
        TPACPI_Q_LNV3('N', '2', 'C', TPACPI_FAN_2CTL),  /* P52 / P72 */
        TPACPI_Q_LNV3('N', '2', 'E', TPACPI_FAN_2CTL),  /* P1 / X1 Extreme (1st gen) */
        TPACPI_Q_LNV3('N', '2', 'O', TPACPI_FAN_2CTL),  /* P1 / X1 Extreme (2nd gen) */
+       TPACPI_Q_LNV3('N', '2', 'V', TPACPI_FAN_2CTL),  /* P1 / X1 Extreme (3nd gen) */
+       TPACPI_Q_LNV3('N', '3', '0', TPACPI_FAN_2CTL),  /* P15 (1st gen) / P15v (1st gen) */
 };
 
 static int __init fan_init(struct ibm_init_struct *iibm)
@@ -9703,6 +9713,7 @@ static const struct tpacpi_quirk battery_quirk_table[] __initconst = {
        TPACPI_Q_LNV3('R', '0', 'B', true), /* Thinkpad 11e gen 3 */
        TPACPI_Q_LNV3('R', '0', 'C', true), /* Thinkpad 13 */
        TPACPI_Q_LNV3('R', '0', 'J', true), /* Thinkpad 13 gen 2 */
+       TPACPI_Q_LNV3('R', '0', 'K', true), /* Thinkpad 11e gen 4 celeron BIOS */
 };
 
 static int __init tpacpi_battery_init(struct ibm_init_struct *ibm)
index e557d75..fa7232a 100644 (file)
@@ -1478,7 +1478,7 @@ static ssize_t video_proc_write(struct file *file, const char __user *buf,
        struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file));
        char *buffer;
        char *cmd;
-       int lcd_out, crt_out, tv_out;
+       int lcd_out = -1, crt_out = -1, tv_out = -1;
        int remain = count;
        int value;
        int ret;
@@ -1510,7 +1510,6 @@ static ssize_t video_proc_write(struct file *file, const char __user *buf,
 
        kfree(cmd);
 
-       lcd_out = crt_out = tv_out = -1;
        ret = get_video_status(dev, &video_out);
        if (!ret) {
                unsigned int new_video_out = video_out;
index dda60f8..5783139 100644 (file)
@@ -295,6 +295,21 @@ static const struct ts_dmi_data irbis_tw90_data = {
        .properties     = irbis_tw90_props,
 };
 
+static const struct property_entry irbis_tw118_props[] = {
+       PROPERTY_ENTRY_U32("touchscreen-min-x", 20),
+       PROPERTY_ENTRY_U32("touchscreen-min-y", 30),
+       PROPERTY_ENTRY_U32("touchscreen-size-x", 1960),
+       PROPERTY_ENTRY_U32("touchscreen-size-y", 1510),
+       PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-irbis-tw118.fw"),
+       PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+       { }
+};
+
+static const struct ts_dmi_data irbis_tw118_data = {
+       .acpi_name      = "MSSL1680:00",
+       .properties     = irbis_tw118_props,
+};
+
 static const struct property_entry itworks_tw891_props[] = {
        PROPERTY_ENTRY_U32("touchscreen-min-x", 1),
        PROPERTY_ENTRY_U32("touchscreen-min-y", 5),
@@ -623,6 +638,23 @@ static const struct ts_dmi_data pov_mobii_wintab_p1006w_v10_data = {
        .properties     = pov_mobii_wintab_p1006w_v10_props,
 };
 
+static const struct property_entry predia_basic_props[] = {
+       PROPERTY_ENTRY_U32("touchscreen-min-x", 3),
+       PROPERTY_ENTRY_U32("touchscreen-min-y", 10),
+       PROPERTY_ENTRY_U32("touchscreen-size-x", 1728),
+       PROPERTY_ENTRY_U32("touchscreen-size-y", 1144),
+       PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
+       PROPERTY_ENTRY_STRING("firmware-name", "gsl3680-predia-basic.fw"),
+       PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+       PROPERTY_ENTRY_BOOL("silead,home-button"),
+       { }
+};
+
+static const struct ts_dmi_data predia_basic_data = {
+       .acpi_name      = "MSSL1680:00",
+       .properties     = predia_basic_props,
+};
+
 static const struct property_entry schneider_sct101ctm_props[] = {
        PROPERTY_ENTRY_U32("touchscreen-size-x", 1715),
        PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
@@ -937,6 +969,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
                },
        },
        {
+               /* Irbis TW118 */
+               .driver_data = (void *)&irbis_tw118_data,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "IRBIS"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "TW118"),
+               },
+       },
+       {
                /* I.T.Works TW891 */
                .driver_data = (void *)&itworks_tw891_data,
                .matches = {
@@ -1110,6 +1150,16 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
                },
        },
        {
+               /* Predia Basic tablet) */
+               .driver_data = (void *)&predia_basic_data,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "CherryTrail"),
+                       /* Above matches are too generic, add bios-version match */
+                       DMI_MATCH(DMI_BIOS_VERSION, "Mx.WT107.KUBNGEA"),
+               },
+       },
+       {
                /* Point of View mobii wintab p800w (v2.1) */
                .driver_data = (void *)&pov_mobii_wintab_p800w_v21_data,
                .matches = {
index e020faf..6632557 100644 (file)
@@ -103,43 +103,26 @@ static int timespec_to_char_array(struct timespec64 const *ts,
        return 0;
 }
 
-static int idtcm_strverscmp(const char *ver1, const char *ver2)
+static int idtcm_strverscmp(const char *version1, const char *version2)
 {
-       u8 num1;
-       u8 num2;
-       int result = 0;
-
-       /* loop through each level of the version string */
-       while (result == 0) {
-               /* extract leading version numbers */
-               if (kstrtou8(ver1, 10, &num1) < 0)
-                       return -1;
+       u8 ver1[3], ver2[3];
+       int i;
 
-               if (kstrtou8(ver2, 10, &num2) < 0)
-                       return -1;
+       if (sscanf(version1, "%hhu.%hhu.%hhu",
+                  &ver1[0], &ver1[1], &ver1[2]) != 3)
+               return -1;
+       if (sscanf(version2, "%hhu.%hhu.%hhu",
+                  &ver2[0], &ver2[1], &ver2[2]) != 3)
+               return -1;
 
-               /* if numbers differ, then set the result */
-               if (num1 < num2)
-                       result = -1;
-               else if (num1 > num2)
-                       result = 1;
-               else {
-                       /* if numbers are the same, go to next level */
-                       ver1 = strchr(ver1, '.');
-                       ver2 = strchr(ver2, '.');
-                       if (!ver1 && !ver2)
-                               break;
-                       else if (!ver1)
-                               result = -1;
-                       else if (!ver2)
-                               result = 1;
-                       else {
-                               ver1++;
-                               ver2++;
-                       }
-               }
+       for (i = 0; i < 3; i++) {
+               if (ver1[i] > ver2[i])
+                       return 1;
+               if (ver1[i] < ver2[i])
+                       return -1;
        }
-       return result;
+
+       return 0;
 }
 
 static int idtcm_xfer_read(struct idtcm *idtcm,
index 5046b6b..b4c651f 100644 (file)
@@ -84,12 +84,14 @@ struct sl28cpld_pwm {
        struct regmap *regmap;
        u32 offset;
 };
+#define sl28cpld_pwm_from_chip(_chip) \
+       container_of(_chip, struct sl28cpld_pwm, pwm_chip)
 
 static void sl28cpld_pwm_get_state(struct pwm_chip *chip,
                                   struct pwm_device *pwm,
                                   struct pwm_state *state)
 {
-       struct sl28cpld_pwm *priv = dev_get_drvdata(chip->dev);
+       struct sl28cpld_pwm *priv = sl28cpld_pwm_from_chip(chip);
        unsigned int reg;
        int prescaler;
 
@@ -118,7 +120,7 @@ static void sl28cpld_pwm_get_state(struct pwm_chip *chip,
 static int sl28cpld_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
                              const struct pwm_state *state)
 {
-       struct sl28cpld_pwm *priv = dev_get_drvdata(chip->dev);
+       struct sl28cpld_pwm *priv = sl28cpld_pwm_from_chip(chip);
        unsigned int cycle, prescaler;
        bool write_duty_cycle_first;
        int ret;
index a5ad553..42bbd99 100644 (file)
@@ -1315,7 +1315,6 @@ static int _regulator_do_enable(struct regulator_dev *rdev);
 /**
  * set_machine_constraints - sets regulator constraints
  * @rdev: regulator source
- * @constraints: constraints to apply
  *
  * Allows platform initialisation code to define and constrain
  * regulator circuits e.g. valid voltage/current ranges, etc.  NOTE:
@@ -1323,21 +1322,11 @@ static int _regulator_do_enable(struct regulator_dev *rdev);
  * regulator operations to proceed i.e. set_voltage, set_current_limit,
  * set_mode.
  */
-static int set_machine_constraints(struct regulator_dev *rdev,
-       const struct regulation_constraints *constraints)
+static int set_machine_constraints(struct regulator_dev *rdev)
 {
        int ret = 0;
        const struct regulator_ops *ops = rdev->desc->ops;
 
-       if (constraints)
-               rdev->constraints = kmemdup(constraints, sizeof(*constraints),
-                                           GFP_KERNEL);
-       else
-               rdev->constraints = kzalloc(sizeof(*constraints),
-                                           GFP_KERNEL);
-       if (!rdev->constraints)
-               return -ENOMEM;
-
        ret = machine_constraints_voltage(rdev, rdev->constraints);
        if (ret != 0)
                return ret;
@@ -1852,6 +1841,15 @@ static int regulator_resolve_supply(struct regulator_dev *rdev)
                }
        }
 
+       if (r == rdev) {
+               dev_err(dev, "Supply for %s (%s) resolved to itself\n",
+                       rdev->desc->name, rdev->supply_name);
+               if (!have_full_constraints())
+                       return -EINVAL;
+               r = dummy_regulator_rdev;
+               get_device(&r->dev);
+       }
+
        /*
         * If the supply's parent device is not the same as the
         * regulator's parent device, then ensure the parent device
@@ -5146,7 +5144,6 @@ struct regulator_dev *
 regulator_register(const struct regulator_desc *regulator_desc,
                   const struct regulator_config *cfg)
 {
-       const struct regulation_constraints *constraints = NULL;
        const struct regulator_init_data *init_data;
        struct regulator_config *config = NULL;
        static atomic_t regulator_no = ATOMIC_INIT(-1);
@@ -5285,14 +5282,23 @@ regulator_register(const struct regulator_desc *regulator_desc,
 
        /* set regulator constraints */
        if (init_data)
-               constraints = &init_data->constraints;
+               rdev->constraints = kmemdup(&init_data->constraints,
+                                           sizeof(*rdev->constraints),
+                                           GFP_KERNEL);
+       else
+               rdev->constraints = kzalloc(sizeof(*rdev->constraints),
+                                           GFP_KERNEL);
+       if (!rdev->constraints) {
+               ret = -ENOMEM;
+               goto wash;
+       }
 
        if (init_data && init_data->supply_regulator)
                rdev->supply_name = init_data->supply_regulator;
        else if (regulator_desc->supply_name)
                rdev->supply_name = regulator_desc->supply_name;
 
-       ret = set_machine_constraints(rdev, constraints);
+       ret = set_machine_constraints(rdev);
        if (ret == -EPROBE_DEFER) {
                /* Regulator might be in bypass mode and so needs its supply
                 * to set the constraints */
@@ -5301,7 +5307,7 @@ regulator_register(const struct regulator_desc *regulator_desc,
                 * that is just being created */
                ret = regulator_resolve_supply(rdev);
                if (!ret)
-                       ret = set_machine_constraints(rdev, constraints);
+                       ret = set_machine_constraints(rdev);
                else
                        rdev_dbg(rdev, "unable to resolve supply early: %pe\n",
                                 ERR_PTR(ret));
@@ -5843,13 +5849,14 @@ static int regulator_late_cleanup(struct device *dev, void *data)
        if (rdev->use_count)
                goto unlock;
 
-       /* If we can't read the status assume it's on. */
+       /* If we can't read the status assume it's always on. */
        if (ops->is_enabled)
                enabled = ops->is_enabled(rdev);
        else
                enabled = 1;
 
-       if (!enabled)
+       /* But if reading the status failed, assume that it's off. */
+       if (enabled <= 0)
                goto unlock;
 
        if (have_full_constraints()) {
index 7e8ba92..01a12cf 100644 (file)
@@ -836,11 +836,14 @@ static int pfuze100_regulator_probe(struct i2c_client *client,
                 * the switched regulator till yet.
                 */
                if (pfuze_chip->flags & PFUZE_FLAG_DISABLE_SW) {
-                       if (pfuze_chip->regulator_descs[i].sw_reg) {
-                               desc->ops = &pfuze100_sw_disable_regulator_ops;
-                               desc->enable_val = 0x8;
-                               desc->disable_val = 0x0;
-                               desc->enable_time = 500;
+                       if (pfuze_chip->chip_id == PFUZE100 ||
+                               pfuze_chip->chip_id == PFUZE200) {
+                               if (pfuze_chip->regulator_descs[i].sw_reg) {
+                                       desc->ops = &pfuze100_sw_disable_regulator_ops;
+                                       desc->enable_val = 0x8;
+                                       desc->disable_val = 0x0;
+                                       desc->enable_time = 500;
+                               }
                        }
                }
 
index 3e60bff..9f0a4d5 100644 (file)
@@ -342,8 +342,17 @@ static int ti_abb_set_voltage_sel(struct regulator_dev *rdev, unsigned sel)
                return ret;
        }
 
-       /* If data is exactly the same, then just update index, no change */
        info = &abb->info[sel];
+       /*
+        * When Linux kernel is starting up, we are'nt sure of the
+        * Bias configuration that bootloader has configured.
+        * So, we get to know the actual setting the first time
+        * we are asked to transition.
+        */
+       if (abb->current_info_idx == -EINVAL)
+               goto just_set_abb;
+
+       /* If data is exactly the same, then just update index, no change */
        oinfo = &abb->info[abb->current_info_idx];
        if (!memcmp(info, oinfo, sizeof(*info))) {
                dev_dbg(dev, "%s: Same data new idx=%d, old idx=%d\n", __func__,
@@ -351,6 +360,7 @@ static int ti_abb_set_voltage_sel(struct regulator_dev *rdev, unsigned sel)
                goto out;
        }
 
+just_set_abb:
        ret = ti_abb_set_opp(rdev, abb, info);
 
 out:
index eb17fea..217a7b8 100644 (file)
@@ -2980,6 +2980,12 @@ static int _dasd_requeue_request(struct dasd_ccw_req *cqr)
 
        if (!block)
                return -EINVAL;
+       /*
+        * If the request is an ERP request there is nothing to requeue.
+        * This will be done with the remaining original request.
+        */
+       if (cqr->refers)
+               return 0;
        spin_lock_irq(&cqr->dq->lock);
        req = (struct request *) cqr->callback_data;
        blk_mq_requeue_request(req, false);
index f73b475..b235393 100644 (file)
@@ -417,10 +417,13 @@ enum qeth_qdio_out_buffer_state {
        QETH_QDIO_BUF_EMPTY,
        /* Filled by driver; owned by hardware in order to be sent. */
        QETH_QDIO_BUF_PRIMED,
-       /* Identified to be pending in TPQ. */
+       /* Discovered by the TX completion code: */
        QETH_QDIO_BUF_PENDING,
-       /* Found in completion queue. */
-       QETH_QDIO_BUF_IN_CQ,
+       /* Finished by the TX completion code: */
+       QETH_QDIO_BUF_NEED_QAOB,
+       /* Received QAOB notification on CQ: */
+       QETH_QDIO_BUF_QAOB_OK,
+       QETH_QDIO_BUF_QAOB_ERROR,
        /* Handled via transfer pending / completion queue. */
        QETH_QDIO_BUF_HANDLED_DELAYED,
 };
index 93c9b30..e27319d 100644 (file)
@@ -33,6 +33,7 @@
 
 #include <net/iucv/af_iucv.h>
 #include <net/dsfield.h>
+#include <net/sock.h>
 
 #include <asm/ebcdic.h>
 #include <asm/chpid.h>
@@ -499,17 +500,12 @@ static void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q, int bidx,
 
                }
        }
-       if (forced_cleanup && (atomic_read(&(q->bufs[bidx]->state)) ==
-                                       QETH_QDIO_BUF_HANDLED_DELAYED)) {
-               /* for recovery situations */
-               qeth_init_qdio_out_buf(q, bidx);
-               QETH_CARD_TEXT(q->card, 2, "clprecov");
-       }
 }
 
 static void qeth_qdio_handle_aob(struct qeth_card *card,
                                 unsigned long phys_aob_addr)
 {
+       enum qeth_qdio_out_buffer_state new_state = QETH_QDIO_BUF_QAOB_OK;
        struct qaob *aob;
        struct qeth_qdio_out_buffer *buffer;
        enum iucv_tx_notify notification;
@@ -521,22 +517,6 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
        buffer = (struct qeth_qdio_out_buffer *) aob->user1;
        QETH_CARD_TEXT_(card, 5, "%lx", aob->user1);
 
-       if (atomic_cmpxchg(&buffer->state, QETH_QDIO_BUF_PRIMED,
-                          QETH_QDIO_BUF_IN_CQ) == QETH_QDIO_BUF_PRIMED) {
-               notification = TX_NOTIFY_OK;
-       } else {
-               WARN_ON_ONCE(atomic_read(&buffer->state) !=
-                                                       QETH_QDIO_BUF_PENDING);
-               atomic_set(&buffer->state, QETH_QDIO_BUF_IN_CQ);
-               notification = TX_NOTIFY_DELAYED_OK;
-       }
-
-       if (aob->aorc != 0)  {
-               QETH_CARD_TEXT_(card, 2, "aorc%02X", aob->aorc);
-               notification = qeth_compute_cq_notification(aob->aorc, 1);
-       }
-       qeth_notify_skbs(buffer->q, buffer, notification);
-
        /* Free dangling allocations. The attached skbs are handled by
         * qeth_cleanup_handled_pending().
         */
@@ -548,7 +528,33 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
                if (data && buffer->is_header[i])
                        kmem_cache_free(qeth_core_header_cache, data);
        }
-       atomic_set(&buffer->state, QETH_QDIO_BUF_HANDLED_DELAYED);
+
+       if (aob->aorc) {
+               QETH_CARD_TEXT_(card, 2, "aorc%02X", aob->aorc);
+               new_state = QETH_QDIO_BUF_QAOB_ERROR;
+       }
+
+       switch (atomic_xchg(&buffer->state, new_state)) {
+       case QETH_QDIO_BUF_PRIMED:
+               /* Faster than TX completion code. */
+               notification = qeth_compute_cq_notification(aob->aorc, 0);
+               qeth_notify_skbs(buffer->q, buffer, notification);
+               atomic_set(&buffer->state, QETH_QDIO_BUF_HANDLED_DELAYED);
+               break;
+       case QETH_QDIO_BUF_PENDING:
+               /* TX completion code is active and will handle the async
+                * completion for us.
+                */
+               break;
+       case QETH_QDIO_BUF_NEED_QAOB:
+               /* TX completion code is already finished. */
+               notification = qeth_compute_cq_notification(aob->aorc, 1);
+               qeth_notify_skbs(buffer->q, buffer, notification);
+               atomic_set(&buffer->state, QETH_QDIO_BUF_HANDLED_DELAYED);
+               break;
+       default:
+               WARN_ON_ONCE(1);
+       }
 
        qdio_release_aob(aob);
 }
@@ -1405,7 +1411,7 @@ static void qeth_notify_skbs(struct qeth_qdio_out_q *q,
        skb_queue_walk(&buf->skb_list, skb) {
                QETH_CARD_TEXT_(q->card, 5, "skbn%d", notification);
                QETH_CARD_TEXT_(q->card, 5, "%lx", (long) skb);
-               if (skb->protocol == htons(ETH_P_AF_IUCV) && skb->sk)
+               if (skb->sk && skb->sk->sk_family == PF_IUCV)
                        iucv_sk(skb->sk)->sk_txnotify(skb, notification);
        }
 }
@@ -1416,9 +1422,6 @@ static void qeth_tx_complete_buf(struct qeth_qdio_out_buffer *buf, bool error,
        struct qeth_qdio_out_q *queue = buf->q;
        struct sk_buff *skb;
 
-       /* release may never happen from within CQ tasklet scope */
-       WARN_ON_ONCE(atomic_read(&buf->state) == QETH_QDIO_BUF_IN_CQ);
-
        if (atomic_read(&buf->state) == QETH_QDIO_BUF_PENDING)
                qeth_notify_skbs(queue, buf, TX_NOTIFY_GENERALERROR);
 
@@ -5869,9 +5872,32 @@ static void qeth_iqd_tx_complete(struct qeth_qdio_out_q *queue,
 
                if (atomic_cmpxchg(&buffer->state, QETH_QDIO_BUF_PRIMED,
                                                   QETH_QDIO_BUF_PENDING) ==
-                   QETH_QDIO_BUF_PRIMED)
+                   QETH_QDIO_BUF_PRIMED) {
                        qeth_notify_skbs(queue, buffer, TX_NOTIFY_PENDING);
 
+                       /* Handle race with qeth_qdio_handle_aob(): */
+                       switch (atomic_xchg(&buffer->state,
+                                           QETH_QDIO_BUF_NEED_QAOB)) {
+                       case QETH_QDIO_BUF_PENDING:
+                               /* No concurrent QAOB notification. */
+                               break;
+                       case QETH_QDIO_BUF_QAOB_OK:
+                               qeth_notify_skbs(queue, buffer,
+                                                TX_NOTIFY_DELAYED_OK);
+                               atomic_set(&buffer->state,
+                                          QETH_QDIO_BUF_HANDLED_DELAYED);
+                               break;
+                       case QETH_QDIO_BUF_QAOB_ERROR:
+                               qeth_notify_skbs(queue, buffer,
+                                                TX_NOTIFY_DELAYED_GENERALERROR);
+                               atomic_set(&buffer->state,
+                                          QETH_QDIO_BUF_HANDLED_DELAYED);
+                               break;
+                       default:
+                               WARN_ON_ONCE(1);
+                       }
+               }
+
                QETH_CARD_TEXT_(card, 5, "pel%u", bidx);
 
                /* prepare the queue slot for re-use: */
index 28f6dda..79939ba 100644 (file)
@@ -985,32 +985,19 @@ static void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card)
  *     change notification' and thus can support the learning_sync bridgeport
  *     attribute
  *     @card: qeth_card structure pointer
- *
- *     This is a destructive test and must be called before dev2br or
- *     bridgeport address notification is enabled!
  */
 static void qeth_l2_detect_dev2br_support(struct qeth_card *card)
 {
        struct qeth_priv *priv = netdev_priv(card->dev);
        bool dev2br_supported;
-       int rc;
 
        QETH_CARD_TEXT(card, 2, "d2brsup");
        if (!IS_IQD(card))
                return;
 
        /* dev2br requires valid cssid,iid,chid */
-       if (!card->info.ids_valid) {
-               dev2br_supported = false;
-       } else if (css_general_characteristics.enarf) {
-               dev2br_supported = true;
-       } else {
-               /* Old machines don't have the feature bit:
-                * Probe by testing whether a disable succeeds
-                */
-               rc = qeth_l2_pnso(card, PNSO_OC_NET_ADDR_INFO, 0, NULL, NULL);
-               dev2br_supported = !rc;
-       }
+       dev2br_supported = card->info.ids_valid &&
+                          css_general_characteristics.enarf;
        QETH_CARD_TEXT_(card, 2, "D2Bsup%02x", dev2br_supported);
 
        if (dev2br_supported)
@@ -2233,7 +2220,6 @@ static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok)
        struct net_device *dev = card->dev;
        int rc = 0;
 
-       /* query before bridgeport_notification may be enabled */
        qeth_l2_detect_dev2br_support(card);
 
        mutex_lock(&card->sbp_lock);
index 202ba92..5c3513a 100644 (file)
@@ -3020,7 +3020,6 @@ static int beiscsi_create_eqs(struct beiscsi_hba *phba,
                        goto create_eq_error;
                }
 
-               mem->dma = paddr;
                mem->va = eq_vaddress;
                ret = be_fill_queue(eq, phba->params.num_eq_entries,
                                    sizeof(struct be_eq_entry), eq_vaddress);
@@ -3030,6 +3029,7 @@ static int beiscsi_create_eqs(struct beiscsi_hba *phba,
                        goto create_eq_error;
                }
 
+               mem->dma = paddr;
                ret = beiscsi_cmd_eq_create(&phba->ctrl, eq,
                                            BEISCSI_EQ_DELAY_DEF);
                if (ret) {
@@ -3086,7 +3086,6 @@ static int beiscsi_create_cqs(struct beiscsi_hba *phba,
                        goto create_cq_error;
                }
 
-               mem->dma = paddr;
                ret = be_fill_queue(cq, phba->params.num_cq_entries,
                                    sizeof(struct sol_cqe), cq_vaddress);
                if (ret) {
@@ -3096,6 +3095,7 @@ static int beiscsi_create_cqs(struct beiscsi_hba *phba,
                        goto create_cq_error;
                }
 
+               mem->dma = paddr;
                ret = beiscsi_cmd_cq_create(&phba->ctrl, cq, eq, false,
                                            false, 0);
                if (ret) {
index 75ace23..0cc06c2 100644 (file)
@@ -4,6 +4,7 @@ config SCSI_BNX2_ISCSI
        depends on NET
        depends on PCI
        depends on (IPV6 || IPV6=n)
+       depends on MMU
        select SCSI_ISCSI_ATTRS
        select NETDEVICES
        select ETHERNET
index c8dd858..274ccf1 100644 (file)
@@ -452,6 +452,12 @@ static int hisi_sas_task_prep(struct sas_task *task,
                blk_tag = blk_mq_unique_tag(scmd->request);
                dq_index = blk_mq_unique_tag_to_hwq(blk_tag);
                *dq_pointer = dq = &hisi_hba->dq[dq_index];
+       } else if (hisi_hba->shost->nr_hw_queues)  {
+               struct Scsi_Host *shost = hisi_hba->shost;
+               struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
+               int queue = qmap->mq_map[raw_smp_processor_id()];
+
+               *dq_pointer = dq = &hisi_hba->dq[queue];
        } else {
                *dq_pointer = dq = sas_dev->dq;
        }
index 7133ca8..960de37 100644 (file)
@@ -2452,6 +2452,11 @@ static int interrupt_init_v3_hw(struct hisi_hba *hisi_hba)
                        rc = -ENOENT;
                        goto free_irq_vectors;
                }
+               cq->irq_mask = pci_irq_get_affinity(pdev, i + BASE_VECTORS_V3_HW);
+               if (!cq->irq_mask) {
+                       dev_err(dev, "could not get cq%d irq affinity!\n", i);
+                       return -ENOENT;
+               }
        }
 
        return 0;
index 1e9c317..f9314f1 100644 (file)
@@ -533,8 +533,8 @@ static void iscsi_complete_task(struct iscsi_task *task, int state)
        if (conn->task == task)
                conn->task = NULL;
 
-       if (conn->ping_task == task)
-               conn->ping_task = NULL;
+       if (READ_ONCE(conn->ping_task) == task)
+               WRITE_ONCE(conn->ping_task, NULL);
 
        /* release get from queueing */
        __iscsi_put_task(task);
@@ -738,6 +738,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
                                                   task->conn->session->age);
        }
 
+       if (unlikely(READ_ONCE(conn->ping_task) == INVALID_SCSI_TASK))
+               WRITE_ONCE(conn->ping_task, task);
+
        if (!ihost->workq) {
                if (iscsi_prep_mgmt_task(conn, task))
                        goto free_task;
@@ -941,8 +944,11 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr)
         struct iscsi_nopout hdr;
        struct iscsi_task *task;
 
-       if (!rhdr && conn->ping_task)
-               return -EINVAL;
+       if (!rhdr) {
+               if (READ_ONCE(conn->ping_task))
+                       return -EINVAL;
+               WRITE_ONCE(conn->ping_task, INVALID_SCSI_TASK);
+       }
 
        memset(&hdr, 0, sizeof(struct iscsi_nopout));
        hdr.opcode = ISCSI_OP_NOOP_OUT | ISCSI_OP_IMMEDIATE;
@@ -957,11 +963,12 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr)
 
        task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, NULL, 0);
        if (!task) {
+               if (!rhdr)
+                       WRITE_ONCE(conn->ping_task, NULL);
                iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n");
                return -EIO;
        } else if (!rhdr) {
                /* only track our nops */
-               conn->ping_task = task;
                conn->last_ping = jiffies;
        }
 
@@ -984,7 +991,7 @@ static int iscsi_nop_out_rsp(struct iscsi_task *task,
        struct iscsi_conn *conn = task->conn;
        int rc = 0;
 
-       if (conn->ping_task != task) {
+       if (READ_ONCE(conn->ping_task) != task) {
                /*
                 * If this is not in response to one of our
                 * nops then it must be from userspace.
@@ -1923,7 +1930,7 @@ static void iscsi_start_tx(struct iscsi_conn *conn)
  */
 static int iscsi_has_ping_timed_out(struct iscsi_conn *conn)
 {
-       if (conn->ping_task &&
+       if (READ_ONCE(conn->ping_task) &&
            time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) +
                           (conn->ping_timeout * HZ), jiffies))
                return 1;
@@ -2058,7 +2065,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
         * Checking the transport already or nop from a cmd timeout still
         * running
         */
-       if (conn->ping_task) {
+       if (READ_ONCE(conn->ping_task)) {
                task->have_checked_conn = true;
                rc = BLK_EH_RESET_TIMER;
                goto done;
index 41cd66f..e158d3d 100644 (file)
@@ -37,7 +37,6 @@
 #include <linux/poll.h>
 #include <linux/vmalloc.h>
 #include <linux/irq_poll.h>
-#include <linux/blk-mq-pci.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -114,10 +113,6 @@ unsigned int enable_sdev_max_qd;
 module_param(enable_sdev_max_qd, int, 0444);
 MODULE_PARM_DESC(enable_sdev_max_qd, "Enable sdev max qd as can_queue. Default: 0");
 
-int host_tagset_enable = 1;
-module_param(host_tagset_enable, int, 0444);
-MODULE_PARM_DESC(host_tagset_enable, "Shared host tagset enable/disable Default: enable(1)");
-
 MODULE_LICENSE("GPL");
 MODULE_VERSION(MEGASAS_VERSION);
 MODULE_AUTHOR("megaraidlinux.pdl@broadcom.com");
@@ -3124,19 +3119,6 @@ megasas_bios_param(struct scsi_device *sdev, struct block_device *bdev,
        return 0;
 }
 
-static int megasas_map_queues(struct Scsi_Host *shost)
-{
-       struct megasas_instance *instance;
-
-       instance = (struct megasas_instance *)shost->hostdata;
-
-       if (shost->nr_hw_queues == 1)
-               return 0;
-
-       return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
-                       instance->pdev, instance->low_latency_index_start);
-}
-
 static void megasas_aen_polling(struct work_struct *work);
 
 /**
@@ -3445,7 +3427,6 @@ static struct scsi_host_template megasas_template = {
        .eh_timed_out = megasas_reset_timer,
        .shost_attrs = megaraid_host_attrs,
        .bios_param = megasas_bios_param,
-       .map_queues = megasas_map_queues,
        .change_queue_depth = scsi_change_queue_depth,
        .max_segment_size = 0xffffffff,
 };
@@ -6827,26 +6808,6 @@ static int megasas_io_attach(struct megasas_instance *instance)
        host->max_lun = MEGASAS_MAX_LUN;
        host->max_cmd_len = 16;
 
-       /* Use shared host tagset only for fusion adaptors
-        * if there are managed interrupts (smp affinity enabled case).
-        * Single msix_vectors in kdump, so shared host tag is also disabled.
-        */
-
-       host->host_tagset = 0;
-       host->nr_hw_queues = 1;
-
-       if ((instance->adapter_type != MFI_SERIES) &&
-               (instance->msix_vectors > instance->low_latency_index_start) &&
-               host_tagset_enable &&
-               instance->smp_affinity_enable) {
-               host->host_tagset = 1;
-               host->nr_hw_queues = instance->msix_vectors -
-                       instance->low_latency_index_start;
-       }
-
-       dev_info(&instance->pdev->dev,
-               "Max firmware commands: %d shared with nr_hw_queues = %d\n",
-               instance->max_fw_cmds, host->nr_hw_queues);
        /*
         * Notify the mid-layer about the new controller
         */
index fd60728..b0c01cf 100644 (file)
@@ -359,29 +359,24 @@ megasas_get_msix_index(struct megasas_instance *instance,
 {
        int sdev_busy;
 
-       /* TBD - if sml remove device_busy in future, driver
-        * should track counter in internal structure.
-        */
-       sdev_busy = atomic_read(&scmd->device->device_busy);
+       /* nr_hw_queue = 1 for MegaRAID */
+       struct blk_mq_hw_ctx *hctx =
+               scmd->device->request_queue->queue_hw_ctx[0];
+
+       sdev_busy = atomic_read(&hctx->nr_active);
 
        if (instance->perf_mode == MR_BALANCED_PERF_MODE &&
-           sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) {
+           sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH))
                cmd->request_desc->SCSIIO.MSIxIndex =
                        mega_mod64((atomic64_add_return(1, &instance->high_iops_outstanding) /
                                        MR_HIGH_IOPS_BATCH_COUNT), instance->low_latency_index_start);
-       } else if (instance->msix_load_balance) {
+       else if (instance->msix_load_balance)
                cmd->request_desc->SCSIIO.MSIxIndex =
                        (mega_mod64(atomic64_add_return(1, &instance->total_io_count),
                                instance->msix_vectors));
-       } else if (instance->host->nr_hw_queues > 1) {
-               u32 tag = blk_mq_unique_tag(scmd->request);
-
-               cmd->request_desc->SCSIIO.MSIxIndex = blk_mq_unique_tag_to_hwq(tag) +
-                       instance->low_latency_index_start;
-       } else {
+       else
                cmd->request_desc->SCSIIO.MSIxIndex =
                        instance->reply_map[raw_smp_processor_id()];
-       }
 }
 
 /**
@@ -961,6 +956,9 @@ megasas_alloc_cmds_fusion(struct megasas_instance *instance)
        if (megasas_alloc_cmdlist_fusion(instance))
                goto fail_exit;
 
+       dev_info(&instance->pdev->dev, "Configured max firmware commands: %d\n",
+                instance->max_fw_cmds);
+
        /* The first 256 bytes (SMID 0) is not used. Don't add to the cmd list */
        io_req_base = fusion->io_request_frames + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE;
        io_req_base_phys = fusion->io_request_frames_phys + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE;
@@ -1104,9 +1102,8 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
                MR_HIGH_IOPS_QUEUE_COUNT) && cur_intr_coalescing)
                instance->perf_mode = MR_BALANCED_PERF_MODE;
 
-       dev_info(&instance->pdev->dev, "Performance mode :%s (latency index = %d)\n",
-               MEGASAS_PERF_MODE_2STR(instance->perf_mode),
-               instance->low_latency_index_start);
+       dev_info(&instance->pdev->dev, "Performance mode :%s\n",
+               MEGASAS_PERF_MODE_2STR(instance->perf_mode));
 
        instance->fw_sync_cache_support = (scratch_pad_1 &
                MR_CAN_HANDLE_SYNC_CACHE_OFFSET) ? 1 : 0;
index e4cc92b..bb940cb 100644 (file)
@@ -6459,7 +6459,7 @@ _base_send_ioc_init(struct MPT3SAS_ADAPTER *ioc)
 
        r = _base_handshake_req_reply_wait(ioc,
            sizeof(Mpi2IOCInitRequest_t), (u32 *)&mpi_request,
-           sizeof(Mpi2IOCInitReply_t), (u16 *)&mpi_reply, 10);
+           sizeof(Mpi2IOCInitReply_t), (u16 *)&mpi_reply, 30);
 
        if (r != 0) {
                ioc_err(ioc, "%s: handshake failed (r=%d)\n", __func__, r);
index 0f2b681..edd26a2 100644 (file)
@@ -664,7 +664,7 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg,
        Mpi26NVMeEncapsulatedRequest_t *nvme_encap_request = NULL;
        struct _pcie_device *pcie_device = NULL;
        u16 smid;
-       u8 timeout;
+       unsigned long timeout;
        u8 issue_reset;
        u32 sz, sz_arg;
        void *psge;
index 60c7a7d..03c6d06 100644 (file)
@@ -1703,8 +1703,7 @@ out_put_budget:
                break;
        case BLK_STS_RESOURCE:
        case BLK_STS_ZONE_RESOURCE:
-               if (atomic_read(&sdev->device_busy) ||
-                   scsi_device_blocked(sdev))
+               if (scsi_device_blocked(sdev))
                        ret = BLK_STS_DEV_RESOURCE;
                break;
        default:
index 0c65fbd..ded00a8 100644 (file)
@@ -1994,8 +1994,10 @@ static int storvsc_probe(struct hv_device *device,
                        alloc_ordered_workqueue("storvsc_error_wq_%d",
                                                WQ_MEM_RECLAIM,
                                                host->host_no);
-       if (!host_dev->handle_error_wq)
+       if (!host_dev->handle_error_wq) {
+               ret = -ENOMEM;
                goto err_out2;
+       }
        INIT_WORK(&host_dev->host_scan_work, storvsc_host_scan);
        /* Register the HBA and start the scsi bus scan */
        ret = scsi_add_host(host, &device->device);
index 7a160b8..0c148fc 100644 (file)
@@ -1294,8 +1294,15 @@ static int ufshcd_devfreq_target(struct device *dev,
        }
        spin_unlock_irqrestore(hba->host->host_lock, irq_flags);
 
+       pm_runtime_get_noresume(hba->dev);
+       if (!pm_runtime_active(hba->dev)) {
+               pm_runtime_put_noidle(hba->dev);
+               ret = -EAGAIN;
+               goto out;
+       }
        start = ktime_get();
        ret = ufshcd_devfreq_scale(hba, scale_up);
+       pm_runtime_put(hba->dev);
 
        trace_ufshcd_profile_clk_scaling(dev_name(hba->dev),
                (scale_up ? "up" : "down"),
@@ -3192,13 +3199,19 @@ int ufshcd_read_desc_param(struct ufs_hba *hba,
        /* Get the length of descriptor */
        ufshcd_map_desc_id_to_length(hba, desc_id, &buff_len);
        if (!buff_len) {
-               dev_err(hba->dev, "%s: Failed to get desc length", __func__);
+               dev_err(hba->dev, "%s: Failed to get desc length\n", __func__);
+               return -EINVAL;
+       }
+
+       if (param_offset >= buff_len) {
+               dev_err(hba->dev, "%s: Invalid offset 0x%x in descriptor IDN 0x%x, length 0x%x\n",
+                       __func__, param_offset, desc_id, buff_len);
                return -EINVAL;
        }
 
        /* Check whether we need temp memory */
        if (param_offset != 0 || param_size < buff_len) {
-               desc_buf = kmalloc(buff_len, GFP_KERNEL);
+               desc_buf = kzalloc(buff_len, GFP_KERNEL);
                if (!desc_buf)
                        return -ENOMEM;
        } else {
@@ -3212,14 +3225,14 @@ int ufshcd_read_desc_param(struct ufs_hba *hba,
                                        desc_buf, &buff_len);
 
        if (ret) {
-               dev_err(hba->dev, "%s: Failed reading descriptor. desc_id %d, desc_index %d, param_offset %d, ret %d",
+               dev_err(hba->dev, "%s: Failed reading descriptor. desc_id %d, desc_index %d, param_offset %d, ret %d\n",
                        __func__, desc_id, desc_index, param_offset, ret);
                goto out;
        }
 
        /* Sanity check */
        if (desc_buf[QUERY_DESC_DESC_TYPE_OFFSET] != desc_id) {
-               dev_err(hba->dev, "%s: invalid desc_id %d in descriptor header",
+               dev_err(hba->dev, "%s: invalid desc_id %d in descriptor header\n",
                        __func__, desc_buf[QUERY_DESC_DESC_TYPE_OFFSET]);
                ret = -EINVAL;
                goto out;
@@ -3229,12 +3242,12 @@ int ufshcd_read_desc_param(struct ufs_hba *hba,
        buff_len = desc_buf[QUERY_DESC_LENGTH_OFFSET];
        ufshcd_update_desc_length(hba, desc_id, desc_index, buff_len);
 
-       /* Check wherher we will not copy more data, than available */
-       if (is_kmalloc && (param_offset + param_size) > buff_len)
-               param_size = buff_len - param_offset;
-
-       if (is_kmalloc)
+       if (is_kmalloc) {
+               /* Make sure we don't copy more data than available */
+               if (param_offset + param_size > buff_len)
+                       param_size = buff_len - param_offset;
                memcpy(param_read_buf, &desc_buf[param_offset], param_size);
+       }
 out:
        if (is_kmalloc)
                kfree(desc_buf);
@@ -8900,11 +8913,7 @@ int ufshcd_shutdown(struct ufs_hba *hba)
        if (ufshcd_is_ufs_dev_poweroff(hba) && ufshcd_is_link_off(hba))
                goto out;
 
-       if (pm_runtime_suspended(hba->dev)) {
-               ret = ufshcd_runtime_resume(hba);
-               if (ret)
-                       goto out;
-       }
+       pm_runtime_get_sync(hba->dev);
 
        ret = ufshcd_suspend(hba, UFS_SHUTDOWN_PM);
 out:
index 7b642c3..7f397b4 100644 (file)
@@ -95,7 +95,6 @@ static int register_dpio_irq_handlers(struct fsl_mc_device *dpio_dev, int cpu)
 {
        int error;
        struct fsl_mc_device_irq *irq;
-       cpumask_t mask;
 
        irq = dpio_dev->irqs[0];
        error = devm_request_irq(&dpio_dev->dev,
@@ -112,9 +111,7 @@ static int register_dpio_irq_handlers(struct fsl_mc_device *dpio_dev, int cpu)
        }
 
        /* set the affinity hint */
-       cpumask_clear(&mask);
-       cpumask_set_cpu(cpu, &mask);
-       if (irq_set_affinity_hint(irq->msi_desc->irq, &mask))
+       if (irq_set_affinity_hint(irq->msi_desc->irq, cpumask_of(cpu)))
                dev_err(&dpio_dev->dev,
                        "irq_set_affinity failed irq %d cpu %d\n",
                        irq->msi_desc->irq, cpu);
index 14c9d01..c028446 100644 (file)
@@ -1327,7 +1327,7 @@ int bcm_qspi_probe(struct platform_device *pdev,
 
        data = of_id->data;
 
-       master = spi_alloc_master(dev, sizeof(struct bcm_qspi));
+       master = devm_spi_alloc_master(dev, sizeof(struct bcm_qspi));
        if (!master) {
                dev_err(dev, "error allocating spi_master\n");
                return -ENOMEM;
@@ -1367,21 +1367,17 @@ int bcm_qspi_probe(struct platform_device *pdev,
 
        if (res) {
                qspi->base[MSPI]  = devm_ioremap_resource(dev, res);
-               if (IS_ERR(qspi->base[MSPI])) {
-                       ret = PTR_ERR(qspi->base[MSPI]);
-                       goto qspi_resource_err;
-               }
+               if (IS_ERR(qspi->base[MSPI]))
+                       return PTR_ERR(qspi->base[MSPI]);
        } else {
-               goto qspi_resource_err;
+               return 0;
        }
 
        res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "bspi");
        if (res) {
                qspi->base[BSPI]  = devm_ioremap_resource(dev, res);
-               if (IS_ERR(qspi->base[BSPI])) {
-                       ret = PTR_ERR(qspi->base[BSPI]);
-                       goto qspi_resource_err;
-               }
+               if (IS_ERR(qspi->base[BSPI]))
+                       return PTR_ERR(qspi->base[BSPI]);
                qspi->bspi_mode = true;
        } else {
                qspi->bspi_mode = false;
@@ -1392,18 +1388,14 @@ int bcm_qspi_probe(struct platform_device *pdev,
        res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cs_reg");
        if (res) {
                qspi->base[CHIP_SELECT]  = devm_ioremap_resource(dev, res);
-               if (IS_ERR(qspi->base[CHIP_SELECT])) {
-                       ret = PTR_ERR(qspi->base[CHIP_SELECT]);
-                       goto qspi_resource_err;
-               }
+               if (IS_ERR(qspi->base[CHIP_SELECT]))
+                       return PTR_ERR(qspi->base[CHIP_SELECT]);
        }
 
        qspi->dev_ids = kcalloc(num_irqs, sizeof(struct bcm_qspi_dev_id),
                                GFP_KERNEL);
-       if (!qspi->dev_ids) {
-               ret = -ENOMEM;
-               goto qspi_resource_err;
-       }
+       if (!qspi->dev_ids)
+               return -ENOMEM;
 
        for (val = 0; val < num_irqs; val++) {
                irq = -1;
@@ -1484,7 +1476,7 @@ int bcm_qspi_probe(struct platform_device *pdev,
        qspi->xfer_mode.addrlen = -1;
        qspi->xfer_mode.hp = -1;
 
-       ret = devm_spi_register_master(&pdev->dev, master);
+       ret = spi_register_master(master);
        if (ret < 0) {
                dev_err(dev, "can't register master\n");
                goto qspi_reg_err;
@@ -1497,8 +1489,6 @@ qspi_reg_err:
        clk_disable_unprepare(qspi->clk);
 qspi_probe_err:
        kfree(qspi->dev_ids);
-qspi_resource_err:
-       spi_master_put(master);
        return ret;
 }
 /* probe function to be called by SoC specific platform driver probe */
@@ -1508,10 +1498,10 @@ int bcm_qspi_remove(struct platform_device *pdev)
 {
        struct bcm_qspi *qspi = platform_get_drvdata(pdev);
 
+       spi_unregister_master(qspi->master);
        bcm_qspi_hw_uninit(qspi);
        clk_disable_unprepare(qspi->clk);
        kfree(qspi->dev_ids);
-       spi_unregister_master(qspi->master);
 
        return 0;
 }
index 7104cf1..197485f 100644 (file)
@@ -1278,7 +1278,7 @@ static int bcm2835_spi_probe(struct platform_device *pdev)
        struct bcm2835_spi *bs;
        int err;
 
-       ctlr = spi_alloc_master(&pdev->dev, ALIGN(sizeof(*bs),
+       ctlr = devm_spi_alloc_master(&pdev->dev, ALIGN(sizeof(*bs),
                                                  dma_get_cache_alignment()));
        if (!ctlr)
                return -ENOMEM;
@@ -1299,23 +1299,17 @@ static int bcm2835_spi_probe(struct platform_device *pdev)
        bs->ctlr = ctlr;
 
        bs->regs = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(bs->regs)) {
-               err = PTR_ERR(bs->regs);
-               goto out_controller_put;
-       }
+       if (IS_ERR(bs->regs))
+               return PTR_ERR(bs->regs);
 
        bs->clk = devm_clk_get(&pdev->dev, NULL);
-       if (IS_ERR(bs->clk)) {
-               err = dev_err_probe(&pdev->dev, PTR_ERR(bs->clk),
-                                   "could not get clk\n");
-               goto out_controller_put;
-       }
+       if (IS_ERR(bs->clk))
+               return dev_err_probe(&pdev->dev, PTR_ERR(bs->clk),
+                                    "could not get clk\n");
 
        bs->irq = platform_get_irq(pdev, 0);
-       if (bs->irq <= 0) {
-               err = bs->irq ? bs->irq : -ENODEV;
-               goto out_controller_put;
-       }
+       if (bs->irq <= 0)
+               return bs->irq ? bs->irq : -ENODEV;
 
        clk_prepare_enable(bs->clk);
 
@@ -1349,8 +1343,6 @@ out_dma_release:
        bcm2835_dma_release(ctlr, bs);
 out_clk_disable:
        clk_disable_unprepare(bs->clk);
-out_controller_put:
-       spi_controller_put(ctlr);
        return err;
 }
 
index 03b034c..1a26865 100644 (file)
@@ -494,7 +494,7 @@ static int bcm2835aux_spi_probe(struct platform_device *pdev)
        unsigned long clk_hz;
        int err;
 
-       master = spi_alloc_master(&pdev->dev, sizeof(*bs));
+       master = devm_spi_alloc_master(&pdev->dev, sizeof(*bs));
        if (!master)
                return -ENOMEM;
 
@@ -524,29 +524,25 @@ static int bcm2835aux_spi_probe(struct platform_device *pdev)
 
        /* the main area */
        bs->regs = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(bs->regs)) {
-               err = PTR_ERR(bs->regs);
-               goto out_master_put;
-       }
+       if (IS_ERR(bs->regs))
+               return PTR_ERR(bs->regs);
 
        bs->clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(bs->clk)) {
                err = PTR_ERR(bs->clk);
                dev_err(&pdev->dev, "could not get clk: %d\n", err);
-               goto out_master_put;
+               return err;
        }
 
        bs->irq = platform_get_irq(pdev, 0);
-       if (bs->irq <= 0) {
-               err = bs->irq ? bs->irq : -ENODEV;
-               goto out_master_put;
-       }
+       if (bs->irq <= 0)
+               return bs->irq ? bs->irq : -ENODEV;
 
        /* this also enables the HW block */
        err = clk_prepare_enable(bs->clk);
        if (err) {
                dev_err(&pdev->dev, "could not prepare clock: %d\n", err);
-               goto out_master_put;
+               return err;
        }
 
        /* just checking if the clock returns a sane value */
@@ -581,8 +577,6 @@ static int bcm2835aux_spi_probe(struct platform_device *pdev)
 
 out_clk_disable:
        clk_disable_unprepare(bs->clk);
-out_master_put:
-       spi_master_put(master);
        return err;
 }
 
index 40938cf..ba7d40c 100644 (file)
@@ -1260,12 +1260,14 @@ static int cqspi_probe(struct platform_device *pdev)
        /* Obtain QSPI reset control */
        rstc = devm_reset_control_get_optional_exclusive(dev, "qspi");
        if (IS_ERR(rstc)) {
+               ret = PTR_ERR(rstc);
                dev_err(dev, "Cannot get QSPI reset.\n");
                goto probe_reset_failed;
        }
 
        rstc_ocp = devm_reset_control_get_optional_exclusive(dev, "qspi-ocp");
        if (IS_ERR(rstc_ocp)) {
+               ret = PTR_ERR(rstc_ocp);
                dev_err(dev, "Cannot get QSPI OCP reset.\n");
                goto probe_reset_failed;
        }
index 2e50cc0..c33866f 100644 (file)
@@ -357,11 +357,11 @@ static void dw_spi_irq_setup(struct dw_spi *dws)
        dw_writel(dws, DW_SPI_TXFTLR, level);
        dw_writel(dws, DW_SPI_RXFTLR, level - 1);
 
+       dws->transfer_handler = dw_spi_transfer_handler;
+
        imask = SPI_INT_TXEI | SPI_INT_TXOI | SPI_INT_RXUI | SPI_INT_RXOI |
                SPI_INT_RXFI;
        spi_umask_intr(dws, imask);
-
-       dws->transfer_handler = dw_spi_transfer_handler;
 }
 
 /*
@@ -875,7 +875,8 @@ int dw_spi_add_host(struct device *dev, struct dw_spi *dws)
                master->set_cs = dw_spi_set_cs;
        master->transfer_one = dw_spi_transfer_one;
        master->handle_err = dw_spi_handle_err;
-       master->mem_ops = &dws->mem_ops;
+       if (dws->mem_ops.exec_op)
+               master->mem_ops = &dws->mem_ops;
        master->max_speed_hz = dws->max_freq;
        master->dev.of_node = dev->of_node;
        master->dev.fwnode = dev->fwnode;
index 8a440c7..3920cd3 100644 (file)
@@ -477,7 +477,7 @@ static int fsi_spi_transfer_one_message(struct spi_controller *ctlr,
 
        rc = fsi_spi_check_mux(ctx->fsi, ctx->dev);
        if (rc)
-               return rc;
+               goto error;
 
        list_for_each_entry(transfer, &mesg->transfers, transfer_list) {
                struct fsi_spi_sequence seq;
index 986b979..a2886ee 100644 (file)
@@ -938,9 +938,6 @@ static int fsl_lpspi_remove(struct platform_device *pdev)
                                spi_controller_get_devdata(controller);
 
        pm_runtime_disable(fsl_lpspi->dev);
-
-       spi_master_put(controller);
-
        return 0;
 }
 
index 4b80e27..0b59790 100644 (file)
@@ -1686,6 +1686,7 @@ static int spi_imx_probe(struct platform_device *pdev)
 
        pm_runtime_set_autosuspend_delay(spi_imx->dev, MXC_RPM_TIMEOUT);
        pm_runtime_use_autosuspend(spi_imx->dev);
+       pm_runtime_get_noresume(spi_imx->dev);
        pm_runtime_set_active(spi_imx->dev);
        pm_runtime_enable(spi_imx->dev);
 
index 341f7cf..1cb9329 100644 (file)
@@ -679,7 +679,7 @@ static int npcm_fiu_probe(struct platform_device *pdev)
        struct resource *res;
        int id;
 
-       ctrl = spi_alloc_master(dev, sizeof(*fiu));
+       ctrl = devm_spi_alloc_master(dev, sizeof(*fiu));
        if (!ctrl)
                return -ENOMEM;
 
index 0d41406..ab90356 100644 (file)
@@ -1001,6 +1001,7 @@ static int nxp_fspi_probe(struct platform_device *pdev)
        struct resource *res;
        struct nxp_fspi *f;
        int ret;
+       u32 reg;
 
        ctlr = spi_alloc_master(&pdev->dev, sizeof(*f));
        if (!ctlr)
@@ -1032,6 +1033,12 @@ static int nxp_fspi_probe(struct platform_device *pdev)
                goto err_put_ctrl;
        }
 
+       /* Clear potential interrupts */
+       reg = fspi_readl(f, f->iobase + FSPI_INTR);
+       if (reg)
+               fspi_writel(f, reg, f->iobase + FSPI_INTR);
+
+
        /* find the resources - controller memory mapped space */
        if (is_acpi_node(f->dev->fwnode))
                res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
index 0cab239..fc9a597 100644 (file)
@@ -812,18 +812,16 @@ static void spi_set_cs(struct spi_device *spi, bool enable)
                enable = !enable;
 
        if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio)) {
-               /*
-                * Honour the SPI_NO_CS flag and invert the enable line, as
-                * active low is default for SPI. Execution paths that handle
-                * polarity inversion in gpiolib (such as device tree) will
-                * enforce active high using the SPI_CS_HIGH resulting in a
-                * double inversion through the code above.
-                */
                if (!(spi->mode & SPI_NO_CS)) {
                        if (spi->cs_gpiod)
+                               /* polarity handled by gpiolib */
                                gpiod_set_value_cansleep(spi->cs_gpiod,
-                                                        !enable);
+                                                        enable1);
                        else
+                               /*
+                                * invert the enable line, as active low is
+                                * default for SPI.
+                                */
                                gpio_set_value_cansleep(spi->cs_gpio, !enable);
                }
                /* Some SPI masters need both GPIO CS & slave_select */
@@ -1992,15 +1990,6 @@ static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi,
        }
        spi->chip_select = value;
 
-       /*
-        * For descriptors associated with the device, polarity inversion is
-        * handled in the gpiolib, so all gpio chip selects are "active high"
-        * in the logical sense, the gpiolib will invert the line if need be.
-        */
-       if ((ctlr->use_gpio_descriptors) && ctlr->cs_gpiods &&
-           ctlr->cs_gpiods[spi->chip_select])
-               spi->mode |= SPI_CS_HIGH;
-
        /* Device speed */
        if (!of_property_read_u32(nc, "spi-max-frequency", &value))
                spi->max_speed_hz = value;
@@ -2453,6 +2442,49 @@ struct spi_controller *__spi_alloc_controller(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(__spi_alloc_controller);
 
+static void devm_spi_release_controller(struct device *dev, void *ctlr)
+{
+       spi_controller_put(*(struct spi_controller **)ctlr);
+}
+
+/**
+ * __devm_spi_alloc_controller - resource-managed __spi_alloc_controller()
+ * @dev: physical device of SPI controller
+ * @size: how much zeroed driver-private data to allocate
+ * @slave: whether to allocate an SPI master (false) or SPI slave (true)
+ * Context: can sleep
+ *
+ * Allocate an SPI controller and automatically release a reference on it
+ * when @dev is unbound from its driver.  Drivers are thus relieved from
+ * having to call spi_controller_put().
+ *
+ * The arguments to this function are identical to __spi_alloc_controller().
+ *
+ * Return: the SPI controller structure on success, else NULL.
+ */
+struct spi_controller *__devm_spi_alloc_controller(struct device *dev,
+                                                  unsigned int size,
+                                                  bool slave)
+{
+       struct spi_controller **ptr, *ctlr;
+
+       ptr = devres_alloc(devm_spi_release_controller, sizeof(*ptr),
+                          GFP_KERNEL);
+       if (!ptr)
+               return NULL;
+
+       ctlr = __spi_alloc_controller(dev, size, slave);
+       if (ctlr) {
+               *ptr = ctlr;
+               devres_add(dev, ptr);
+       } else {
+               devres_free(ptr);
+       }
+
+       return ctlr;
+}
+EXPORT_SYMBOL_GPL(__devm_spi_alloc_controller);
+
 #ifdef CONFIG_OF
 static int of_spi_get_gpio_numbers(struct spi_controller *ctlr)
 {
@@ -2789,6 +2821,11 @@ int devm_spi_register_controller(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(devm_spi_register_controller);
 
+static int devm_spi_match_controller(struct device *dev, void *res, void *ctlr)
+{
+       return *(struct spi_controller **)res == ctlr;
+}
+
 static int __unregister(struct device *dev, void *null)
 {
        spi_unregister_device(to_spi_device(dev));
@@ -2830,7 +2867,15 @@ void spi_unregister_controller(struct spi_controller *ctlr)
        list_del(&ctlr->list);
        mutex_unlock(&board_lock);
 
-       device_unregister(&ctlr->dev);
+       device_del(&ctlr->dev);
+
+       /* Release the last reference on the controller if its driver
+        * has not yet been converted to devm_spi_alloc_master/slave().
+        */
+       if (!devres_find(ctlr->dev.parent, devm_spi_release_controller,
+                        devm_spi_match_controller, ctlr))
+               put_device(&ctlr->dev);
+
        /* free bus id */
        mutex_lock(&board_lock);
        if (found == ctlr)
@@ -3327,12 +3372,15 @@ int spi_setup(struct spi_device *spi)
        if (!spi->max_speed_hz)
                spi->max_speed_hz = spi->controller->max_speed_hz;
 
+       mutex_lock(&spi->controller->io_mutex);
+
        if (spi->controller->setup)
                status = spi->controller->setup(spi);
 
        if (spi->controller->auto_runtime_pm && spi->controller->set_cs) {
                status = pm_runtime_get_sync(spi->controller->dev.parent);
                if (status < 0) {
+                       mutex_unlock(&spi->controller->io_mutex);
                        pm_runtime_put_noidle(spi->controller->dev.parent);
                        dev_err(&spi->controller->dev, "Failed to power device: %d\n",
                                status);
@@ -3354,6 +3402,8 @@ int spi_setup(struct spi_device *spi)
                spi_set_cs(spi, false);
        }
 
+       mutex_unlock(&spi->controller->io_mutex);
+
        if (spi->rt && !spi->controller->rt) {
                spi->controller->rt = true;
                spi_set_thread_rt(spi->controller);
index 2831935..781c84a 100644 (file)
@@ -446,7 +446,7 @@ static void cedrus_set_params(struct cedrus_ctx *ctx,
        reg |= (pps->second_chroma_qp_index_offset & 0x3f) << 16;
        reg |= (pps->chroma_qp_index_offset & 0x3f) << 8;
        reg |= (pps->pic_init_qp_minus26 + 26 + slice->slice_qp_delta) & 0x3f;
-       if (pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT)
+       if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT))
                reg |= VE_H264_SHS_QP_SCALING_MATRIX_DEFAULT;
        cedrus_write(dev, VE_H264_SHS_QP, reg);
 
index f961b35..8831db3 100644 (file)
@@ -653,16 +653,11 @@ static int mt7621_pcie_init_virtual_bridges(struct mt7621_pcie *pcie)
        return 0;
 }
 
-static int mt7621_pcie_request_resources(struct mt7621_pcie *pcie,
-                                        struct list_head *res)
+static void mt7621_pcie_add_resources(struct mt7621_pcie *pcie,
+                                     struct list_head *res)
 {
-       struct device *dev = pcie->dev;
-
        pci_add_resource_offset(res, &pcie->io, pcie->offset.io);
        pci_add_resource_offset(res, &pcie->mem, pcie->offset.mem);
-       pci_add_resource(res, &pcie->busn);
-
-       return devm_request_pci_bus_resources(dev, res);
 }
 
 static int mt7621_pcie_register_host(struct pci_host_bridge *host,
@@ -738,11 +733,7 @@ static int mt7621_pci_probe(struct platform_device *pdev)
 
        setup_cm_memory_region(pcie);
 
-       err = mt7621_pcie_request_resources(pcie, &res);
-       if (err) {
-               dev_err(dev, "Error requesting resources\n");
-               return err;
-       }
+       mt7621_pcie_add_resources(pcie, &res);
 
        err = mt7621_pcie_register_host(bridge, &res);
        if (err) {
index 54e8029..0017376 100644 (file)
@@ -2,6 +2,7 @@
 config DMA_RALINK
        tristate "RALINK DMA support"
        depends on RALINK && !SOC_RT288X
+       depends on DMADEVICES
        select DMA_ENGINE
        select DMA_VIRTUAL_CHANNELS
 
index 79b55ec..b2208e5 100644 (file)
@@ -20,6 +20,7 @@ static const struct sdio_device_id sdio_ids[] = {
        { SDIO_DEVICE(0x024c, 0x0525), },
        { SDIO_DEVICE(0x024c, 0x0623), },
        { SDIO_DEVICE(0x024c, 0x0626), },
+       { SDIO_DEVICE(0x024c, 0x0627), },
        { SDIO_DEVICE(0x024c, 0xb723), },
        { /* end: all zeroes */                         },
 };
index f77e5ee..518fac4 100644 (file)
@@ -483,8 +483,7 @@ EXPORT_SYMBOL(iscsit_queue_rsp);
 void iscsit_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 {
        spin_lock_bh(&conn->cmd_lock);
-       if (!list_empty(&cmd->i_conn_node) &&
-           !(cmd->se_cmd.transport_state & CMD_T_FABRIC_STOP))
+       if (!list_empty(&cmd->i_conn_node))
                list_del_init(&cmd->i_conn_node);
        spin_unlock_bh(&conn->cmd_lock);
 
@@ -4083,12 +4082,22 @@ static void iscsit_release_commands_from_conn(struct iscsi_conn *conn)
        spin_lock_bh(&conn->cmd_lock);
        list_splice_init(&conn->conn_cmd_list, &tmp_list);
 
-       list_for_each_entry(cmd, &tmp_list, i_conn_node) {
+       list_for_each_entry_safe(cmd, cmd_tmp, &tmp_list, i_conn_node) {
                struct se_cmd *se_cmd = &cmd->se_cmd;
 
                if (se_cmd->se_tfo != NULL) {
                        spin_lock_irq(&se_cmd->t_state_lock);
-                       se_cmd->transport_state |= CMD_T_FABRIC_STOP;
+                       if (se_cmd->transport_state & CMD_T_ABORTED) {
+                               /*
+                                * LIO's abort path owns the cleanup for this,
+                                * so put it back on the list and let
+                                * aborted_task handle it.
+                                */
+                               list_move_tail(&cmd->i_conn_node,
+                                              &conn->conn_cmd_list);
+                       } else {
+                               se_cmd->transport_state |= CMD_T_FABRIC_STOP;
+                       }
                        spin_unlock_irq(&se_cmd->t_state_lock);
                }
        }
index d7f798c..337c8d8 100644 (file)
@@ -64,9 +64,13 @@ struct amdtee_session {
 /**
  * struct amdtee_context_data - AMD-TEE driver context data
  * @sess_list:    Keeps track of sessions opened in current TEE context
+ * @shm_list:     Keeps track of buffers allocated and mapped in current TEE
+ *                context
  */
 struct amdtee_context_data {
        struct list_head sess_list;
+       struct list_head shm_list;
+       struct mutex shm_mutex;   /* synchronizes access to @shm_list */
 };
 
 struct amdtee_driver_data {
@@ -89,10 +93,6 @@ struct amdtee_shm_data {
        u32     buf_id;
 };
 
-struct amdtee_shm_context {
-       struct list_head shmdata_list;
-};
-
 #define LOWER_TWO_BYTE_MASK    0x0000FFFF
 
 /**
index 27b4cd7..8a6a8f3 100644 (file)
@@ -20,7 +20,6 @@
 
 static struct amdtee_driver_data *drv_data;
 static DEFINE_MUTEX(session_list_mutex);
-static struct amdtee_shm_context shmctx;
 
 static void amdtee_get_version(struct tee_device *teedev,
                               struct tee_ioctl_version_data *vers)
@@ -42,7 +41,8 @@ static int amdtee_open(struct tee_context *ctx)
                return -ENOMEM;
 
        INIT_LIST_HEAD(&ctxdata->sess_list);
-       INIT_LIST_HEAD(&shmctx.shmdata_list);
+       INIT_LIST_HEAD(&ctxdata->shm_list);
+       mutex_init(&ctxdata->shm_mutex);
 
        ctx->data = ctxdata;
        return 0;
@@ -86,6 +86,7 @@ static void amdtee_release(struct tee_context *ctx)
                list_del(&sess->list_node);
                release_session(sess);
        }
+       mutex_destroy(&ctxdata->shm_mutex);
        kfree(ctxdata);
 
        ctx->data = NULL;
@@ -152,14 +153,17 @@ static struct amdtee_session *find_session(struct amdtee_context_data *ctxdata,
 
 u32 get_buffer_id(struct tee_shm *shm)
 {
-       u32 buf_id = 0;
+       struct amdtee_context_data *ctxdata = shm->ctx->data;
        struct amdtee_shm_data *shmdata;
+       u32 buf_id = 0;
 
-       list_for_each_entry(shmdata, &shmctx.shmdata_list, shm_node)
+       mutex_lock(&ctxdata->shm_mutex);
+       list_for_each_entry(shmdata, &ctxdata->shm_list, shm_node)
                if (shmdata->kaddr == shm->kaddr) {
                        buf_id = shmdata->buf_id;
                        break;
                }
+       mutex_unlock(&ctxdata->shm_mutex);
 
        return buf_id;
 }
@@ -333,8 +337,9 @@ int amdtee_close_session(struct tee_context *ctx, u32 session)
 
 int amdtee_map_shmem(struct tee_shm *shm)
 {
-       struct shmem_desc shmem;
+       struct amdtee_context_data *ctxdata;
        struct amdtee_shm_data *shmnode;
+       struct shmem_desc shmem;
        int rc, count;
        u32 buf_id;
 
@@ -362,7 +367,10 @@ int amdtee_map_shmem(struct tee_shm *shm)
 
        shmnode->kaddr = shm->kaddr;
        shmnode->buf_id = buf_id;
-       list_add(&shmnode->shm_node, &shmctx.shmdata_list);
+       ctxdata = shm->ctx->data;
+       mutex_lock(&ctxdata->shm_mutex);
+       list_add(&shmnode->shm_node, &ctxdata->shm_list);
+       mutex_unlock(&ctxdata->shm_mutex);
 
        pr_debug("buf_id :[%x] kaddr[%p]\n", shmnode->buf_id, shmnode->kaddr);
 
@@ -371,6 +379,7 @@ int amdtee_map_shmem(struct tee_shm *shm)
 
 void amdtee_unmap_shmem(struct tee_shm *shm)
 {
+       struct amdtee_context_data *ctxdata;
        struct amdtee_shm_data *shmnode;
        u32 buf_id;
 
@@ -381,12 +390,15 @@ void amdtee_unmap_shmem(struct tee_shm *shm)
        /* Unmap the shared memory from TEE */
        handle_unmap_shmem(buf_id);
 
-       list_for_each_entry(shmnode, &shmctx.shmdata_list, shm_node)
+       ctxdata = shm->ctx->data;
+       mutex_lock(&ctxdata->shm_mutex);
+       list_for_each_entry(shmnode, &ctxdata->shm_list, shm_node)
                if (buf_id == shmnode->buf_id) {
                        list_del(&shmnode->shm_node);
                        kfree(shmnode);
                        break;
                }
+       mutex_unlock(&ctxdata->shm_mutex);
 }
 
 int amdtee_invoke_func(struct tee_context *ctx,
index 20b6fd7..c981757 100644 (file)
@@ -534,7 +534,8 @@ void optee_free_pages_list(void *list, size_t num_entries)
 static bool is_normal_memory(pgprot_t p)
 {
 #if defined(CONFIG_ARM)
-       return (pgprot_val(p) & L_PTE_MT_MASK) == L_PTE_MT_WRITEALLOC;
+       return (((pgprot_val(p) & L_PTE_MT_MASK) == L_PTE_MT_WRITEALLOC) ||
+               ((pgprot_val(p) & L_PTE_MT_MASK) == L_PTE_MT_WRITEBACK));
 #elif defined(CONFIG_ARM64)
        return (pgprot_val(p) & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL);
 #else
index 5e59616..dcac99f 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/err.h>
 #include <linux/types.h>
 #include <linux/spinlock.h>
+#include <linux/sys_soc.h>
 #include <linux/reboot.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
@@ -864,6 +865,17 @@ static struct ti_bandgap *ti_bandgap_build(struct platform_device *pdev)
        return bgp;
 }
 
+/*
+ * List of SoCs on which the CPU PM notifier can cause erros on the DTEMP
+ * readout.
+ * Enabled notifier on these machines results in erroneous, random values which
+ * could trigger unexpected thermal shutdown.
+ */
+static const struct soc_device_attribute soc_no_cpu_notifier[] = {
+       { .machine = "OMAP4430" },
+       { /* sentinel */ },
+};
+
 /***   Device driver call backs   ***/
 
 static
@@ -1020,7 +1032,8 @@ int ti_bandgap_probe(struct platform_device *pdev)
 
 #ifdef CONFIG_PM_SLEEP
        bgp->nb.notifier_call = bandgap_omap_cpu_notifier;
-       cpu_pm_register_notifier(&bgp->nb);
+       if (!soc_device_match(soc_no_cpu_notifier))
+               cpu_pm_register_notifier(&bgp->nb);
 #endif
 
        return 0;
@@ -1056,7 +1069,8 @@ int ti_bandgap_remove(struct platform_device *pdev)
        struct ti_bandgap *bgp = platform_get_drvdata(pdev);
        int i;
 
-       cpu_pm_unregister_notifier(&bgp->nb);
+       if (!soc_device_match(soc_no_cpu_notifier))
+               cpu_pm_unregister_notifier(&bgp->nb);
 
        /* Remove sensor interfaces */
        for (i = 0; i < bgp->conf->sensor_count; i++) {
index 977ba91..82c46b2 100644 (file)
@@ -1976,7 +1976,9 @@ static int complete_rpm(struct device *dev, void *data)
 
 static void remove_unplugged_switch(struct tb_switch *sw)
 {
-       pm_runtime_get_sync(sw->dev.parent);
+       struct device *parent = get_device(sw->dev.parent);
+
+       pm_runtime_get_sync(parent);
 
        /*
         * Signal this and switches below for rpm_complete because
@@ -1987,8 +1989,10 @@ static void remove_unplugged_switch(struct tb_switch *sw)
        bus_for_each_dev(&tb_bus_type, &sw->dev, NULL, complete_rpm);
        tb_switch_remove(sw);
 
-       pm_runtime_mark_last_busy(sw->dev.parent);
-       pm_runtime_put_autosuspend(sw->dev.parent);
+       pm_runtime_mark_last_busy(parent);
+       pm_runtime_put_autosuspend(parent);
+
+       put_device(parent);
 }
 
 static void icm_free_unplugged_children(struct tb_switch *sw)
index 0c80a79..c2be7cf 100644 (file)
@@ -789,8 +789,10 @@ static int ar933x_uart_probe(struct platform_device *pdev)
                goto err_disable_clk;
 
        up->gpios = mctrl_gpio_init(port, 0);
-       if (IS_ERR(up->gpios) && PTR_ERR(up->gpios) != -ENOSYS)
-               return PTR_ERR(up->gpios);
+       if (IS_ERR(up->gpios) && PTR_ERR(up->gpios) != -ENOSYS) {
+               ret = PTR_ERR(up->gpios);
+               goto err_disable_clk;
+       }
 
        up->rts_gpiod = mctrl_gpio_to_gpiod(up->gpios, UART_GPIO_RTS);
 
index 1731d97..cacf726 100644 (file)
@@ -942,8 +942,14 @@ static irqreturn_t imx_uart_int(int irq, void *dev_id)
        struct imx_port *sport = dev_id;
        unsigned int usr1, usr2, ucr1, ucr2, ucr3, ucr4;
        irqreturn_t ret = IRQ_NONE;
+       unsigned long flags = 0;
 
-       spin_lock(&sport->port.lock);
+       /*
+        * IRQs might not be disabled upon entering this interrupt handler,
+        * e.g. when interrupt handlers are forced to be threaded. To support
+        * this scenario as well, disable IRQs when acquiring the spinlock.
+        */
+       spin_lock_irqsave(&sport->port.lock, flags);
 
        usr1 = imx_uart_readl(sport, USR1);
        usr2 = imx_uart_readl(sport, USR2);
@@ -1013,7 +1019,7 @@ static irqreturn_t imx_uart_int(int irq, void *dev_id)
                ret = IRQ_HANDLED;
        }
 
-       spin_unlock(&sport->port.lock);
+       spin_unlock_irqrestore(&sport->port.lock, flags);
 
        return ret;
 }
@@ -2002,16 +2008,6 @@ imx_uart_console_write(struct console *co, const char *s, unsigned int count)
        unsigned int ucr1;
        unsigned long flags = 0;
        int locked = 1;
-       int retval;
-
-       retval = clk_enable(sport->clk_per);
-       if (retval)
-               return;
-       retval = clk_enable(sport->clk_ipg);
-       if (retval) {
-               clk_disable(sport->clk_per);
-               return;
-       }
 
        if (sport->port.sysrq)
                locked = 0;
@@ -2047,9 +2043,6 @@ imx_uart_console_write(struct console *co, const char *s, unsigned int count)
 
        if (locked)
                spin_unlock_irqrestore(&sport->port.lock, flags);
-
-       clk_disable(sport->clk_ipg);
-       clk_disable(sport->clk_per);
 }
 
 /*
@@ -2150,15 +2143,14 @@ imx_uart_console_setup(struct console *co, char *options)
 
        retval = uart_set_options(&sport->port, co, baud, parity, bits, flow);
 
-       clk_disable(sport->clk_ipg);
        if (retval) {
-               clk_unprepare(sport->clk_ipg);
+               clk_disable_unprepare(sport->clk_ipg);
                goto error_console;
        }
 
-       retval = clk_prepare(sport->clk_per);
+       retval = clk_prepare_enable(sport->clk_per);
        if (retval)
-               clk_unprepare(sport->clk_ipg);
+               clk_disable_unprepare(sport->clk_ipg);
 
 error_console:
        return retval;
index 9f8b9a5..56ade99 100644 (file)
@@ -2897,10 +2897,14 @@ void __do_SAK(struct tty_struct *tty)
        struct task_struct *g, *p;
        struct pid *session;
        int             i;
+       unsigned long flags;
 
        if (!tty)
                return;
-       session = tty->session;
+
+       spin_lock_irqsave(&tty->ctrl_lock, flags);
+       session = get_pid(tty->session);
+       spin_unlock_irqrestore(&tty->ctrl_lock, flags);
 
        tty_ldisc_flush(tty);
 
@@ -2932,6 +2936,7 @@ void __do_SAK(struct tty_struct *tty)
                task_unlock(p);
        } while_each_thread(g, p);
        read_unlock(&tasklist_lock);
+       put_pid(session);
 #endif
 }
 
index 28a23a0..aa6d053 100644 (file)
@@ -103,8 +103,8 @@ static void __proc_set_tty(struct tty_struct *tty)
        put_pid(tty->session);
        put_pid(tty->pgrp);
        tty->pgrp = get_pid(task_pgrp(current));
-       spin_unlock_irqrestore(&tty->ctrl_lock, flags);
        tty->session = get_pid(task_session(current));
+       spin_unlock_irqrestore(&tty->ctrl_lock, flags);
        if (current->signal->tty) {
                tty_debug(tty, "current tty %s not NULL!!\n",
                          current->signal->tty->name);
@@ -293,20 +293,23 @@ void disassociate_ctty(int on_exit)
        spin_lock_irq(&current->sighand->siglock);
        put_pid(current->signal->tty_old_pgrp);
        current->signal->tty_old_pgrp = NULL;
-
        tty = tty_kref_get(current->signal->tty);
+       spin_unlock_irq(&current->sighand->siglock);
+
        if (tty) {
                unsigned long flags;
+
+               tty_lock(tty);
                spin_lock_irqsave(&tty->ctrl_lock, flags);
                put_pid(tty->session);
                put_pid(tty->pgrp);
                tty->session = NULL;
                tty->pgrp = NULL;
                spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+               tty_unlock(tty);
                tty_kref_put(tty);
        }
 
-       spin_unlock_irq(&current->sighand->siglock);
        /* Now clear signal->tty under the lock */
        read_lock(&tasklist_lock);
        session_clear_tty(task_session(current));
@@ -477,14 +480,19 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
                return -ENOTTY;
        if (retval)
                return retval;
-       if (!current->signal->tty ||
-           (current->signal->tty != real_tty) ||
-           (real_tty->session != task_session(current)))
-               return -ENOTTY;
+
        if (get_user(pgrp_nr, p))
                return -EFAULT;
        if (pgrp_nr < 0)
                return -EINVAL;
+
+       spin_lock_irq(&real_tty->ctrl_lock);
+       if (!current->signal->tty ||
+           (current->signal->tty != real_tty) ||
+           (real_tty->session != task_session(current))) {
+               retval = -ENOTTY;
+               goto out_unlock_ctrl;
+       }
        rcu_read_lock();
        pgrp = find_vpid(pgrp_nr);
        retval = -ESRCH;
@@ -494,12 +502,12 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
        if (session_of_pgrp(pgrp) != task_session(current))
                goto out_unlock;
        retval = 0;
-       spin_lock_irq(&tty->ctrl_lock);
        put_pid(real_tty->pgrp);
        real_tty->pgrp = get_pid(pgrp);
-       spin_unlock_irq(&tty->ctrl_lock);
 out_unlock:
        rcu_read_unlock();
+out_unlock_ctrl:
+       spin_unlock_irq(&real_tty->ctrl_lock);
        return retval;
 }
 
@@ -511,20 +519,30 @@ out_unlock:
  *
  *     Obtain the session id of the tty. If there is no session
  *     return an error.
- *
- *     Locking: none. Reference to current->signal->tty is safe.
  */
 static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
 {
+       unsigned long flags;
+       pid_t sid;
+
        /*
         * (tty == real_tty) is a cheap way of
         * testing if the tty is NOT a master pty.
        */
        if (tty == real_tty && current->signal->tty != real_tty)
                return -ENOTTY;
+
+       spin_lock_irqsave(&real_tty->ctrl_lock, flags);
        if (!real_tty->session)
-               return -ENOTTY;
-       return put_user(pid_vnr(real_tty->session), p);
+               goto err;
+       sid = pid_vnr(real_tty->session);
+       spin_unlock_irqrestore(&real_tty->ctrl_lock, flags);
+
+       return put_user(sid, p);
+
+err:
+       spin_unlock_irqrestore(&real_tty->ctrl_lock, flags);
+       return -ENOTTY;
 }
 
 /*
index a0f73d4..039ab5d 100644 (file)
@@ -427,7 +427,6 @@ static irqreturn_t cdns3_wakeup_irq(int irq, void *data)
  */
 static int cdns3_probe(struct platform_device *pdev)
 {
-       struct usb_role_switch_desc sw_desc = { };
        struct device *dev = &pdev->dev;
        struct resource *res;
        struct cdns3 *cdns;
@@ -529,18 +528,21 @@ static int cdns3_probe(struct platform_device *pdev)
        if (ret)
                goto err2;
 
-       sw_desc.set = cdns3_role_set;
-       sw_desc.get = cdns3_role_get;
-       sw_desc.allow_userspace_control = true;
-       sw_desc.driver_data = cdns;
-       if (device_property_read_bool(dev, "usb-role-switch"))
+       if (device_property_read_bool(dev, "usb-role-switch")) {
+               struct usb_role_switch_desc sw_desc = { };
+
+               sw_desc.set = cdns3_role_set;
+               sw_desc.get = cdns3_role_get;
+               sw_desc.allow_userspace_control = true;
+               sw_desc.driver_data = cdns;
                sw_desc.fwnode = dev->fwnode;
 
-       cdns->role_sw = usb_role_switch_register(dev, &sw_desc);
-       if (IS_ERR(cdns->role_sw)) {
-               ret = PTR_ERR(cdns->role_sw);
-               dev_warn(dev, "Unable to register Role Switch\n");
-               goto err3;
+               cdns->role_sw = usb_role_switch_register(dev, &sw_desc);
+               if (IS_ERR(cdns->role_sw)) {
+                       ret = PTR_ERR(cdns->role_sw);
+                       dev_warn(dev, "Unable to register Role Switch\n");
+                       goto err3;
+               }
        }
 
        if (cdns->wakeup_irq) {
@@ -551,7 +553,7 @@ static int cdns3_probe(struct platform_device *pdev)
 
                if (ret) {
                        dev_err(cdns->dev, "couldn't register wakeup irq handler\n");
-                       goto err3;
+                       goto err4;
                }
        }
 
@@ -582,7 +584,8 @@ static int cdns3_probe(struct platform_device *pdev)
        return 0;
 err4:
        cdns3_drd_exit(cdns);
-       usb_role_switch_unregister(cdns->role_sw);
+       if (cdns->role_sw)
+               usb_role_switch_unregister(cdns->role_sw);
 err3:
        set_phy_power_off(cdns);
 err2:
index 66c1e67..0aa85cc 100644 (file)
@@ -1114,7 +1114,7 @@ static int cdns3_ep_run_transfer(struct cdns3_endpoint *priv_ep,
        struct cdns3_device *priv_dev = priv_ep->cdns3_dev;
        struct cdns3_request *priv_req;
        struct cdns3_trb *trb;
-       struct cdns3_trb *link_trb;
+       struct cdns3_trb *link_trb = NULL;
        dma_addr_t trb_dma;
        u32 togle_pcs = 1;
        int sg_iter = 0;
@@ -1193,10 +1193,20 @@ static int cdns3_ep_run_transfer(struct cdns3_endpoint *priv_ep,
 
        /* set incorrect Cycle Bit for first trb*/
        control = priv_ep->pcs ? 0 : TRB_CYCLE;
+       trb->length = 0;
+       if (priv_dev->dev_ver >= DEV_VER_V2) {
+               u16 td_size;
+
+               td_size = DIV_ROUND_UP(request->length,
+                                      priv_ep->endpoint.maxpacket);
+               if (priv_dev->gadget.speed == USB_SPEED_SUPER)
+                       trb->length = TRB_TDL_SS_SIZE(td_size);
+               else
+                       control |= TRB_TDL_HS_SIZE(td_size);
+       }
 
        do {
                u32 length;
-               u16 td_size = 0;
 
                /* fill TRB */
                control |= TRB_TYPE(TRB_NORMAL);
@@ -1208,20 +1218,12 @@ static int cdns3_ep_run_transfer(struct cdns3_endpoint *priv_ep,
                        length = request->length;
                }
 
-               if (likely(priv_dev->dev_ver >= DEV_VER_V2))
-                       td_size = DIV_ROUND_UP(length,
-                                              priv_ep->endpoint.maxpacket);
-               else if (priv_ep->flags & EP_TDLCHK_EN)
+               if (priv_ep->flags & EP_TDLCHK_EN)
                        total_tdl += DIV_ROUND_UP(length,
                                               priv_ep->endpoint.maxpacket);
 
-               trb->length = cpu_to_le32(TRB_BURST_LEN(priv_ep->trb_burst_size) |
+               trb->length |= cpu_to_le32(TRB_BURST_LEN(priv_ep->trb_burst_size) |
                                        TRB_LEN(length));
-               if (priv_dev->gadget.speed == USB_SPEED_SUPER)
-                       trb->length |= cpu_to_le32(TRB_TDL_SS_SIZE(td_size));
-               else
-                       control |= TRB_TDL_HS_SIZE(td_size);
-
                pcs = priv_ep->pcs ? TRB_CYCLE : 0;
 
                /*
@@ -1258,6 +1260,7 @@ static int cdns3_ep_run_transfer(struct cdns3_endpoint *priv_ep,
                priv_req->end_trb = priv_ep->enqueue;
                cdns3_ep_inc_enq(priv_ep);
                trb = priv_ep->trb_pool + priv_ep->enqueue;
+               trb->length = 0;
        } while (sg_iter < num_trb);
 
        trb = priv_req->trb;
index e96a858..5332363 100644 (file)
@@ -482,11 +482,11 @@ static void snoop_urb(struct usb_device *udev,
 
        if (userurb) {          /* Async */
                if (when == SUBMIT)
-                       dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, "
+                       dev_info(&udev->dev, "userurb %px, ep%d %s-%s, "
                                        "length %u\n",
                                        userurb, ep, t, d, length);
                else
-                       dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, "
+                       dev_info(&udev->dev, "userurb %px, ep%d %s-%s, "
                                        "actual_length %u status %d\n",
                                        userurb, ep, t, d, length,
                                        timeout_or_status);
@@ -1997,7 +1997,7 @@ static int proc_reapurb(struct usb_dev_state *ps, void __user *arg)
        if (as) {
                int retval;
 
-               snoop(&ps->dev->dev, "reap %pK\n", as->userurb);
+               snoop(&ps->dev->dev, "reap %px\n", as->userurb);
                retval = processcompl(as, (void __user * __user *)arg);
                free_async(as);
                return retval;
@@ -2014,7 +2014,7 @@ static int proc_reapurbnonblock(struct usb_dev_state *ps, void __user *arg)
 
        as = async_getcompleted(ps);
        if (as) {
-               snoop(&ps->dev->dev, "reap %pK\n", as->userurb);
+               snoop(&ps->dev->dev, "reap %px\n", as->userurb);
                retval = processcompl(as, (void __user * __user *)arg);
                free_async(as);
        } else {
@@ -2142,7 +2142,7 @@ static int proc_reapurb_compat(struct usb_dev_state *ps, void __user *arg)
        if (as) {
                int retval;
 
-               snoop(&ps->dev->dev, "reap %pK\n", as->userurb);
+               snoop(&ps->dev->dev, "reap %px\n", as->userurb);
                retval = processcompl_compat(as, (void __user * __user *)arg);
                free_async(as);
                return retval;
@@ -2159,7 +2159,7 @@ static int proc_reapurbnonblock_compat(struct usb_dev_state *ps, void __user *ar
 
        as = async_getcompleted(ps);
        if (as) {
-               snoop(&ps->dev->dev, "reap %pK\n", as->userurb);
+               snoop(&ps->dev->dev, "reap %px\n", as->userurb);
                retval = processcompl_compat(as, (void __user * __user *)arg);
                free_async(as);
        } else {
@@ -2624,7 +2624,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
 #endif
 
        case USBDEVFS_DISCARDURB:
-               snoop(&dev->dev, "%s: DISCARDURB %pK\n", __func__, p);
+               snoop(&dev->dev, "%s: DISCARDURB %px\n", __func__, p);
                ret = proc_unlinkurb(ps, p);
                break;
 
index a1e3a03..fad31cc 100644 (file)
@@ -348,6 +348,10 @@ static const struct usb_device_id usb_quirk_list[] = {
        /* Guillemot Webcam Hercules Dualpix Exchange*/
        { USB_DEVICE(0x06f8, 0x3005), .driver_info = USB_QUIRK_RESET_RESUME },
 
+       /* Guillemot Hercules DJ Console audio card (BZ 208357) */
+       { USB_DEVICE(0x06f8, 0xb000), .driver_info =
+                       USB_QUIRK_ENDPOINT_IGNORE },
+
        /* Midiman M-Audio Keystation 88es */
        { USB_DEVICE(0x0763, 0x0192), .driver_info = USB_QUIRK_RESET_RESUME },
 
@@ -421,6 +425,10 @@ static const struct usb_device_id usb_quirk_list[] = {
        { USB_DEVICE(0x1532, 0x0116), .driver_info =
                        USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
 
+       /* Lenovo ThinkCenter A630Z TI024Gen3 usb-audio */
+       { USB_DEVICE(0x17ef, 0xa012), .driver_info =
+                       USB_QUIRK_DISCONNECT_SUSPEND },
+
        /* BUILDWIN Photo Frame */
        { USB_DEVICE(0x1908, 0x1315), .driver_info =
                        USB_QUIRK_HONOR_BNUMINTERFACES },
@@ -521,6 +529,8 @@ static const struct usb_device_id usb_amd_resume_quirk_list[] = {
  * Matched for devices with USB_QUIRK_ENDPOINT_IGNORE.
  */
 static const struct usb_device_id usb_endpoint_ignore[] = {
+       { USB_DEVICE_INTERFACE_NUMBER(0x06f8, 0xb000, 5), .driver_info = 0x01 },
+       { USB_DEVICE_INTERFACE_NUMBER(0x06f8, 0xb000, 5), .driver_info = 0x81 },
        { USB_DEVICE_INTERFACE_NUMBER(0x0926, 0x0202, 1), .driver_info = 0x85 },
        { USB_DEVICE_INTERFACE_NUMBER(0x0926, 0x0208, 1), .driver_info = 0x85 },
        { }
index 046f770..c727cb5 100644 (file)
@@ -1324,7 +1324,7 @@ static long ffs_epfile_ioctl(struct file *file, unsigned code,
        case FUNCTIONFS_ENDPOINT_DESC:
        {
                int desc_idx;
-               struct usb_endpoint_descriptor *desc;
+               struct usb_endpoint_descriptor desc1, *desc;
 
                switch (epfile->ffs->gadget->speed) {
                case USB_SPEED_SUPER:
@@ -1336,10 +1336,12 @@ static long ffs_epfile_ioctl(struct file *file, unsigned code,
                default:
                        desc_idx = 0;
                }
+
                desc = epfile->ep->descs[desc_idx];
+               memcpy(&desc1, desc, desc->bLength);
 
                spin_unlock_irq(&epfile->ffs->eps_lock);
-               ret = copy_to_user((void __user *)value, desc, desc->bLength);
+               ret = copy_to_user((void __user *)value, &desc1, desc1.bLength);
                if (ret)
                        ret = -EFAULT;
                return ret;
index 85cb157..19d9794 100644 (file)
@@ -1315,7 +1315,7 @@ static struct usb_function *f_midi_alloc(struct usb_function_instance *fi)
        midi->id = kstrdup(opts->id, GFP_KERNEL);
        if (opts->id && !midi->id) {
                status = -ENOMEM;
-               goto setup_fail;
+               goto midi_free;
        }
        midi->in_ports = opts->in_ports;
        midi->out_ports = opts->out_ports;
@@ -1327,7 +1327,7 @@ static struct usb_function *f_midi_alloc(struct usb_function_instance *fi)
 
        status = kfifo_alloc(&midi->in_req_fifo, midi->qlen, GFP_KERNEL);
        if (status)
-               goto setup_fail;
+               goto midi_free;
 
        spin_lock_init(&midi->transmit_lock);
 
@@ -1343,9 +1343,13 @@ static struct usb_function *f_midi_alloc(struct usb_function_instance *fi)
 
        return &midi->func;
 
+midi_free:
+       if (midi)
+               kfree(midi->id);
+       kfree(midi);
 setup_fail:
        mutex_unlock(&opts->lock);
-       kfree(midi);
+
        return ERR_PTR(status);
 }
 
index 1b430b3..71e7d10 100644 (file)
@@ -2039,6 +2039,9 @@ gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc)
        return 0;
 
 Enomem:
+       kfree(CHIP);
+       CHIP = NULL;
+
        return -ENOMEM;
 }
 
index 9ccdf2c..6374501 100644 (file)
@@ -91,14 +91,14 @@ static int omap_ohci_transceiver_power(struct ohci_omap_priv *priv, int on)
                                | ((1 << 5/*usb1*/) | (1 << 3/*usb2*/)),
                               INNOVATOR_FPGA_CAM_USB_CONTROL);
                else if (priv->power)
-                       gpiod_set_value(priv->power, 0);
+                       gpiod_set_value_cansleep(priv->power, 0);
        } else {
                if (machine_is_omap_innovator() && cpu_is_omap1510())
                        __raw_writeb(__raw_readb(INNOVATOR_FPGA_CAM_USB_CONTROL)
                                & ~((1 << 5/*usb1*/) | (1 << 3/*usb2*/)),
                               INNOVATOR_FPGA_CAM_USB_CONTROL);
                else if (priv->power)
-                       gpiod_set_value(priv->power, 1);
+                       gpiod_set_value_cansleep(priv->power, 1);
        }
 
        return 0;
index a2e2f56..28deaae 100644 (file)
 #define CH341_QUIRK_SIMULATE_BREAK     BIT(1)
 
 static const struct usb_device_id id_table[] = {
-       { USB_DEVICE(0x4348, 0x5523) },
+       { USB_DEVICE(0x1a86, 0x5512) },
+       { USB_DEVICE(0x1a86, 0x5523) },
        { USB_DEVICE(0x1a86, 0x7522) },
        { USB_DEVICE(0x1a86, 0x7523) },
-       { USB_DEVICE(0x1a86, 0x5523) },
+       { USB_DEVICE(0x4348, 0x5523) },
        { },
 };
 MODULE_DEVICE_TABLE(usb, id_table);
index 5ee48b0..5f6b82e 100644 (file)
@@ -276,12 +276,12 @@ static int  klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port)
        priv->cfg.unknown2 = cfg->unknown2;
        spin_unlock_irqrestore(&priv->lock, flags);
 
+       kfree(cfg);
+
        /* READ_ON and urb submission */
        rc = usb_serial_generic_open(tty, port);
-       if (rc) {
-               retval = rc;
-               goto err_free_cfg;
-       }
+       if (rc)
+               return rc;
 
        rc = usb_control_msg(port->serial->dev,
                             usb_sndctrlpipe(port->serial->dev, 0),
@@ -324,8 +324,6 @@ err_disable_read:
                             KLSI_TIMEOUT);
 err_generic_close:
        usb_serial_generic_close(port);
-err_free_cfg:
-       kfree(cfg);
 
        return retval;
 }
index 54ca85c..56d6f6d 100644 (file)
@@ -419,6 +419,7 @@ static void option_instat_callback(struct urb *urb);
 #define CINTERION_PRODUCT_PH8                  0x0053
 #define CINTERION_PRODUCT_AHXX                 0x0055
 #define CINTERION_PRODUCT_PLXX                 0x0060
+#define CINTERION_PRODUCT_EXS82                        0x006c
 #define CINTERION_PRODUCT_PH8_2RMNET           0x0082
 #define CINTERION_PRODUCT_PH8_AUDIO            0x0083
 #define CINTERION_PRODUCT_AHXX_2RMNET          0x0084
@@ -1105,9 +1106,8 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0xff, 0xff),
          .driver_info = NUMEP2 },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0, 0) },
-       { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96, 0xff, 0xff, 0xff),
-         .driver_info = NUMEP2 },
-       { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96, 0xff, 0, 0) },
+       { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96),
+         .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff),
          .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) },
@@ -1902,6 +1902,7 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX_AUDIO, 0xff) },
        { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_CLS8, 0xff),
          .driver_info = RSVD(0) | RSVD(4) },
+       { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_EXS82, 0xff) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) },
        { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDMNET) },
        { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC25_MDM) },
@@ -2046,12 +2047,13 @@ static const struct usb_device_id option_ids[] = {
          .driver_info = RSVD(0) | RSVD(1) | RSVD(6) },
        { USB_DEVICE(0x0489, 0xe0b5),                                           /* Foxconn T77W968 ESIM */
          .driver_info = RSVD(0) | RSVD(1) | RSVD(6) },
-       { USB_DEVICE(0x1508, 0x1001),                                           /* Fibocom NL668 */
+       { USB_DEVICE(0x1508, 0x1001),                                           /* Fibocom NL668 (IOT version) */
          .driver_info = RSVD(4) | RSVD(5) | RSVD(6) },
        { USB_DEVICE(0x2cb7, 0x0104),                                           /* Fibocom NL678 series */
          .driver_info = RSVD(4) | RSVD(5) },
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff),                     /* Fibocom NL678 series */
          .driver_info = RSVD(6) },
+       { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) },                   /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */
        { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) },                   /* GosunCn GM500 RNDIS */
        { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) },                   /* GosunCn GM500 MBIM */
        { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) },                   /* GosunCn GM500 ECM/NCM */
index 560efd1..e5a971b 100644 (file)
@@ -92,7 +92,7 @@ static int slave_alloc (struct scsi_device *sdev)
 static int slave_configure(struct scsi_device *sdev)
 {
        struct us_data *us = host_to_us(sdev->host);
-       struct device *dev = sdev->host->dma_dev;
+       struct device *dev = us->pusb_dev->bus->sysdev;
 
        /*
         * Many devices have trouble transferring more than 32KB at a time,
index c8a5773..652d6d6 100644 (file)
@@ -837,24 +837,17 @@ static int uas_slave_alloc(struct scsi_device *sdev)
         */
        blk_queue_update_dma_alignment(sdev->request_queue, (512 - 1));
 
+       if (devinfo->flags & US_FL_MAX_SECTORS_64)
+               blk_queue_max_hw_sectors(sdev->request_queue, 64);
+       else if (devinfo->flags & US_FL_MAX_SECTORS_240)
+               blk_queue_max_hw_sectors(sdev->request_queue, 240);
+
        return 0;
 }
 
 static int uas_slave_configure(struct scsi_device *sdev)
 {
        struct uas_dev_info *devinfo = sdev->hostdata;
-       struct device *dev = sdev->host->dma_dev;
-
-       if (devinfo->flags & US_FL_MAX_SECTORS_64)
-               blk_queue_max_hw_sectors(sdev->request_queue, 64);
-       else if (devinfo->flags & US_FL_MAX_SECTORS_240)
-               blk_queue_max_hw_sectors(sdev->request_queue, 240);
-       else if (devinfo->udev->speed >= USB_SPEED_SUPER)
-               blk_queue_max_hw_sectors(sdev->request_queue, 2048);
-
-       blk_queue_max_hw_sectors(sdev->request_queue,
-               min_t(size_t, queue_max_hw_sectors(sdev->request_queue),
-                     dma_max_mapping_size(dev) >> SECTOR_SHIFT));
 
        if (devinfo->flags & US_FL_NO_REPORT_OPCODES)
                sdev->no_report_opcodes = 1;
@@ -1040,7 +1033,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id)
        shost->can_queue = devinfo->qdepth - 2;
 
        usb_set_intfdata(intf, shost);
-       result = scsi_add_host_with_dma(shost, &intf->dev, udev->bus->sysdev);
+       result = scsi_add_host(shost, &intf->dev);
        if (result)
                goto free_streams;
 
index c2ef367..94a6472 100644 (file)
@@ -1049,9 +1049,8 @@ int usb_stor_probe2(struct us_data *us)
                goto BadDevice;
        usb_autopm_get_interface_no_resume(us->pusb_intf);
        snprintf(us->scsi_name, sizeof(us->scsi_name), "usb-storage %s",
-                                       dev_name(dev));
-       result = scsi_add_host_with_dma(us_to_host(us), dev,
-                                       us->pusb_dev->bus->sysdev);
+                                       dev_name(&us->pusb_intf->dev));
+       result = scsi_add_host(us_to_host(us), dev);
        if (result) {
                dev_warn(dev,
                                "Unable to add the scsi host\n");
index 6c5908a..e7f1208 100644 (file)
@@ -88,6 +88,7 @@ config TYPEC_STUSB160X
 config TYPEC_QCOM_PMIC
        tristate "Qualcomm PMIC USB Type-C driver"
        depends on ARCH_QCOM || COMPILE_TEST
+       depends on USB_ROLE_SWITCH || !USB_ROLE_SWITCH
        help
          Driver for supporting role switch over the Qualcomm PMIC.  This will
          handle the USB Type-C role and orientation detection reported by the
index 2a618f0..d21750b 100644 (file)
@@ -562,7 +562,7 @@ static int stusb160x_get_fw_caps(struct stusb160x *chip,
         * Supported power operation mode can be configured through device tree
         * else it is read from chip registers in stusb160x_get_caps.
         */
-       ret = fwnode_property_read_string(fwnode, "power-opmode", &cap_str);
+       ret = fwnode_property_read_string(fwnode, "typec-power-opmode", &cap_str);
        if (!ret) {
                ret = typec_find_pwr_opmode(cap_str);
                /* Power delivery not yet supported */
index d7d32b6..6caf539 100644 (file)
@@ -13,6 +13,7 @@ config VDPA_SIM
        depends on RUNTIME_TESTING_MENU && HAS_DMA
        select DMA_OPS
        select VHOST_RING
+       select GENERIC_NET_UTILS
        default n
        help
          vDPA networking device simulator which loop TX traffic back
@@ -31,6 +32,7 @@ config IFCVF
 
 config MLX5_VDPA
        bool
+       select VHOST_IOTLB
        help
          Support library for Mellanox VDPA drivers. Provides code that is
          common for all types of VDPA drivers. The following drivers are planned:
index 997cb5d..414e98d 100644 (file)
@@ -46,6 +46,9 @@ static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void
        __poll_t flags = key_to_poll(key);
 
        if (flags & EPOLLIN) {
+               u64 cnt;
+               eventfd_ctx_do_read(virqfd->eventfd, &cnt);
+
                /* An event has been signaled, call function */
                if ((!virqfd->handler ||
                     virqfd->handler(virqfd->opaque, virqfd->data)) &&
index b22adf0..6ff8a50 100644 (file)
@@ -52,7 +52,6 @@
 #define VHOST_SCSI_VERSION  "v0.1"
 #define VHOST_SCSI_NAMELEN 256
 #define VHOST_SCSI_MAX_CDB_SIZE 32
-#define VHOST_SCSI_DEFAULT_TAGS 256
 #define VHOST_SCSI_PREALLOC_SGLS 2048
 #define VHOST_SCSI_PREALLOC_UPAGES 2048
 #define VHOST_SCSI_PREALLOC_PROT_SGLS 2048
@@ -140,6 +139,7 @@ struct vhost_scsi_tpg {
        struct se_portal_group se_tpg;
        /* Pointer back to vhost_scsi, protected by tv_tpg_mutex */
        struct vhost_scsi *vhost_scsi;
+       struct list_head tmf_queue;
 };
 
 struct vhost_scsi_tport {
@@ -189,6 +189,9 @@ struct vhost_scsi_virtqueue {
         * Writers must also take dev mutex and flush under it.
         */
        int inflight_idx;
+       struct vhost_scsi_cmd *scsi_cmds;
+       struct sbitmap scsi_tags;
+       int max_cmds;
 };
 
 struct vhost_scsi {
@@ -209,6 +212,21 @@ struct vhost_scsi {
        int vs_events_nr; /* num of pending events, protected by vq->mutex */
 };
 
+struct vhost_scsi_tmf {
+       struct vhost_work vwork;
+       struct vhost_scsi_tpg *tpg;
+       struct vhost_scsi *vhost;
+       struct vhost_scsi_virtqueue *svq;
+       struct list_head queue_entry;
+
+       struct se_cmd se_cmd;
+       u8 scsi_resp;
+       struct vhost_scsi_inflight *inflight;
+       struct iovec resp_iov;
+       int in_iovs;
+       int vq_desc;
+};
+
 /*
  * Context for processing request and control queue operations.
  */
@@ -320,11 +338,13 @@ static u32 vhost_scsi_tpg_get_inst_index(struct se_portal_group *se_tpg)
        return 1;
 }
 
-static void vhost_scsi_release_cmd(struct se_cmd *se_cmd)
+static void vhost_scsi_release_cmd_res(struct se_cmd *se_cmd)
 {
        struct vhost_scsi_cmd *tv_cmd = container_of(se_cmd,
                                struct vhost_scsi_cmd, tvc_se_cmd);
-       struct se_session *se_sess = tv_cmd->tvc_nexus->tvn_se_sess;
+       struct vhost_scsi_virtqueue *svq = container_of(tv_cmd->tvc_vq,
+                               struct vhost_scsi_virtqueue, vq);
+       struct vhost_scsi_inflight *inflight = tv_cmd->inflight;
        int i;
 
        if (tv_cmd->tvc_sgl_count) {
@@ -336,8 +356,36 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd)
                        put_page(sg_page(&tv_cmd->tvc_prot_sgl[i]));
        }
 
-       vhost_scsi_put_inflight(tv_cmd->inflight);
-       target_free_tag(se_sess, se_cmd);
+       sbitmap_clear_bit(&svq->scsi_tags, se_cmd->map_tag);
+       vhost_scsi_put_inflight(inflight);
+}
+
+static void vhost_scsi_release_tmf_res(struct vhost_scsi_tmf *tmf)
+{
+       struct vhost_scsi_tpg *tpg = tmf->tpg;
+       struct vhost_scsi_inflight *inflight = tmf->inflight;
+
+       mutex_lock(&tpg->tv_tpg_mutex);
+       list_add_tail(&tpg->tmf_queue, &tmf->queue_entry);
+       mutex_unlock(&tpg->tv_tpg_mutex);
+       vhost_scsi_put_inflight(inflight);
+}
+
+static void vhost_scsi_release_cmd(struct se_cmd *se_cmd)
+{
+       if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) {
+               struct vhost_scsi_tmf *tmf = container_of(se_cmd,
+                                       struct vhost_scsi_tmf, se_cmd);
+
+               vhost_work_queue(&tmf->vhost->dev, &tmf->vwork);
+       } else {
+               struct vhost_scsi_cmd *cmd = container_of(se_cmd,
+                                       struct vhost_scsi_cmd, tvc_se_cmd);
+               struct vhost_scsi *vs = cmd->tvc_vhost;
+
+               llist_add(&cmd->tvc_completion_list, &vs->vs_completion_list);
+               vhost_work_queue(&vs->dev, &vs->vs_completion_work);
+       }
 }
 
 static u32 vhost_scsi_sess_get_index(struct se_session *se_sess)
@@ -362,34 +410,25 @@ static int vhost_scsi_get_cmd_state(struct se_cmd *se_cmd)
        return 0;
 }
 
-static void vhost_scsi_complete_cmd(struct vhost_scsi_cmd *cmd)
-{
-       struct vhost_scsi *vs = cmd->tvc_vhost;
-
-       llist_add(&cmd->tvc_completion_list, &vs->vs_completion_list);
-
-       vhost_work_queue(&vs->dev, &vs->vs_completion_work);
-}
-
 static int vhost_scsi_queue_data_in(struct se_cmd *se_cmd)
 {
-       struct vhost_scsi_cmd *cmd = container_of(se_cmd,
-                               struct vhost_scsi_cmd, tvc_se_cmd);
-       vhost_scsi_complete_cmd(cmd);
+       transport_generic_free_cmd(se_cmd, 0);
        return 0;
 }
 
 static int vhost_scsi_queue_status(struct se_cmd *se_cmd)
 {
-       struct vhost_scsi_cmd *cmd = container_of(se_cmd,
-                               struct vhost_scsi_cmd, tvc_se_cmd);
-       vhost_scsi_complete_cmd(cmd);
+       transport_generic_free_cmd(se_cmd, 0);
        return 0;
 }
 
 static void vhost_scsi_queue_tm_rsp(struct se_cmd *se_cmd)
 {
-       return;
+       struct vhost_scsi_tmf *tmf = container_of(se_cmd, struct vhost_scsi_tmf,
+                                                 se_cmd);
+
+       tmf->scsi_resp = se_cmd->se_tmr_req->response;
+       transport_generic_free_cmd(&tmf->se_cmd, 0);
 }
 
 static void vhost_scsi_aborted_task(struct se_cmd *se_cmd)
@@ -429,15 +468,6 @@ vhost_scsi_allocate_evt(struct vhost_scsi *vs,
        return evt;
 }
 
-static void vhost_scsi_free_cmd(struct vhost_scsi_cmd *cmd)
-{
-       struct se_cmd *se_cmd = &cmd->tvc_se_cmd;
-
-       /* TODO locking against target/backend threads? */
-       transport_generic_free_cmd(se_cmd, 0);
-
-}
-
 static int vhost_scsi_check_stop_free(struct se_cmd *se_cmd)
 {
        return target_put_sess_cmd(se_cmd);
@@ -556,7 +586,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
                } else
                        pr_err("Faulted on virtio_scsi_cmd_resp\n");
 
-               vhost_scsi_free_cmd(cmd);
+               vhost_scsi_release_cmd_res(se_cmd);
        }
 
        vq = -1;
@@ -566,31 +596,31 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
 }
 
 static struct vhost_scsi_cmd *
-vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg,
+vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg,
                   unsigned char *cdb, u64 scsi_tag, u16 lun, u8 task_attr,
                   u32 exp_data_len, int data_direction)
 {
+       struct vhost_scsi_virtqueue *svq = container_of(vq,
+                                       struct vhost_scsi_virtqueue, vq);
        struct vhost_scsi_cmd *cmd;
        struct vhost_scsi_nexus *tv_nexus;
-       struct se_session *se_sess;
        struct scatterlist *sg, *prot_sg;
        struct page **pages;
-       int tag, cpu;
+       int tag;
 
        tv_nexus = tpg->tpg_nexus;
        if (!tv_nexus) {
                pr_err("Unable to locate active struct vhost_scsi_nexus\n");
                return ERR_PTR(-EIO);
        }
-       se_sess = tv_nexus->tvn_se_sess;
 
-       tag = sbitmap_queue_get(&se_sess->sess_tag_pool, &cpu);
+       tag = sbitmap_get(&svq->scsi_tags, 0, false);
        if (tag < 0) {
                pr_err("Unable to obtain tag for vhost_scsi_cmd\n");
                return ERR_PTR(-ENOMEM);
        }
 
-       cmd = &((struct vhost_scsi_cmd *)se_sess->sess_cmd_map)[tag];
+       cmd = &svq->scsi_cmds[tag];
        sg = cmd->tvc_sgl;
        prot_sg = cmd->tvc_prot_sgl;
        pages = cmd->tvc_upages;
@@ -599,7 +629,6 @@ vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg,
        cmd->tvc_prot_sgl = prot_sg;
        cmd->tvc_upages = pages;
        cmd->tvc_se_cmd.map_tag = tag;
-       cmd->tvc_se_cmd.map_cpu = cpu;
        cmd->tvc_tag = scsi_tag;
        cmd->tvc_lun = lun;
        cmd->tvc_task_attr = task_attr;
@@ -907,6 +936,11 @@ vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc,
        return ret;
 }
 
+static u16 vhost_buf_to_lun(u8 *lun_buf)
+{
+       return ((lun_buf[2] << 8) | lun_buf[3]) & 0x3FFF;
+}
+
 static void
 vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 {
@@ -1045,12 +1079,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
                        tag = vhost64_to_cpu(vq, v_req_pi.tag);
                        task_attr = v_req_pi.task_attr;
                        cdb = &v_req_pi.cdb[0];
-                       lun = ((v_req_pi.lun[2] << 8) | v_req_pi.lun[3]) & 0x3FFF;
+                       lun = vhost_buf_to_lun(v_req_pi.lun);
                } else {
                        tag = vhost64_to_cpu(vq, v_req.tag);
                        task_attr = v_req.task_attr;
                        cdb = &v_req.cdb[0];
-                       lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF;
+                       lun = vhost_buf_to_lun(v_req.lun);
                }
                /*
                 * Check that the received CDB size does not exceeded our
@@ -1065,11 +1099,11 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
                                scsi_command_size(cdb), VHOST_SCSI_MAX_CDB_SIZE);
                                goto err;
                }
-               cmd = vhost_scsi_get_tag(vq, tpg, cdb, tag, lun, task_attr,
+               cmd = vhost_scsi_get_cmd(vq, tpg, cdb, tag, lun, task_attr,
                                         exp_data_len + prot_bytes,
                                         data_direction);
                if (IS_ERR(cmd)) {
-                       vq_err(vq, "vhost_scsi_get_tag failed %ld\n",
+                       vq_err(vq, "vhost_scsi_get_cmd failed %ld\n",
                               PTR_ERR(cmd));
                        goto err;
                }
@@ -1088,7 +1122,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
                                                      &prot_iter, exp_data_len,
                                                      &data_iter))) {
                                vq_err(vq, "Failed to map iov to sgl\n");
-                               vhost_scsi_release_cmd(&cmd->tvc_se_cmd);
+                               vhost_scsi_release_cmd_res(&cmd->tvc_se_cmd);
                                goto err;
                        }
                }
@@ -1124,9 +1158,9 @@ out:
 }
 
 static void
-vhost_scsi_send_tmf_reject(struct vhost_scsi *vs,
-                          struct vhost_virtqueue *vq,
-                          struct vhost_scsi_ctx *vc)
+vhost_scsi_send_tmf_resp(struct vhost_scsi *vs, struct vhost_virtqueue *vq,
+                        int in_iovs, int vq_desc, struct iovec *resp_iov,
+                        int tmf_resp_code)
 {
        struct virtio_scsi_ctrl_tmf_resp rsp;
        struct iov_iter iov_iter;
@@ -1134,17 +1168,87 @@ vhost_scsi_send_tmf_reject(struct vhost_scsi *vs,
 
        pr_debug("%s\n", __func__);
        memset(&rsp, 0, sizeof(rsp));
-       rsp.response = VIRTIO_SCSI_S_FUNCTION_REJECTED;
+       rsp.response = tmf_resp_code;
 
-       iov_iter_init(&iov_iter, READ, &vq->iov[vc->out], vc->in, sizeof(rsp));
+       iov_iter_init(&iov_iter, READ, resp_iov, in_iovs, sizeof(rsp));
 
        ret = copy_to_iter(&rsp, sizeof(rsp), &iov_iter);
        if (likely(ret == sizeof(rsp)))
-               vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0);
+               vhost_add_used_and_signal(&vs->dev, vq, vq_desc, 0);
        else
                pr_err("Faulted on virtio_scsi_ctrl_tmf_resp\n");
 }
 
+static void vhost_scsi_tmf_resp_work(struct vhost_work *work)
+{
+       struct vhost_scsi_tmf *tmf = container_of(work, struct vhost_scsi_tmf,
+                                                 vwork);
+       int resp_code;
+
+       if (tmf->scsi_resp == TMR_FUNCTION_COMPLETE)
+               resp_code = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
+       else
+               resp_code = VIRTIO_SCSI_S_FUNCTION_REJECTED;
+
+       vhost_scsi_send_tmf_resp(tmf->vhost, &tmf->svq->vq, tmf->in_iovs,
+                                tmf->vq_desc, &tmf->resp_iov, resp_code);
+       vhost_scsi_release_tmf_res(tmf);
+}
+
+static void
+vhost_scsi_handle_tmf(struct vhost_scsi *vs, struct vhost_scsi_tpg *tpg,
+                     struct vhost_virtqueue *vq,
+                     struct virtio_scsi_ctrl_tmf_req *vtmf,
+                     struct vhost_scsi_ctx *vc)
+{
+       struct vhost_scsi_virtqueue *svq = container_of(vq,
+                                       struct vhost_scsi_virtqueue, vq);
+       struct vhost_scsi_tmf *tmf;
+
+       if (vhost32_to_cpu(vq, vtmf->subtype) !=
+           VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET)
+               goto send_reject;
+
+       if (!tpg->tpg_nexus || !tpg->tpg_nexus->tvn_se_sess) {
+               pr_err("Unable to locate active struct vhost_scsi_nexus for LUN RESET.\n");
+               goto send_reject;
+       }
+
+       mutex_lock(&tpg->tv_tpg_mutex);
+       if (list_empty(&tpg->tmf_queue)) {
+               pr_err("Missing reserve TMF. Could not handle LUN RESET.\n");
+               mutex_unlock(&tpg->tv_tpg_mutex);
+               goto send_reject;
+       }
+
+       tmf = list_first_entry(&tpg->tmf_queue, struct vhost_scsi_tmf,
+                              queue_entry);
+       list_del_init(&tmf->queue_entry);
+       mutex_unlock(&tpg->tv_tpg_mutex);
+
+       tmf->tpg = tpg;
+       tmf->vhost = vs;
+       tmf->svq = svq;
+       tmf->resp_iov = vq->iov[vc->out];
+       tmf->vq_desc = vc->head;
+       tmf->in_iovs = vc->in;
+       tmf->inflight = vhost_scsi_get_inflight(vq);
+
+       if (target_submit_tmr(&tmf->se_cmd, tpg->tpg_nexus->tvn_se_sess, NULL,
+                             vhost_buf_to_lun(vtmf->lun), NULL,
+                             TMR_LUN_RESET, GFP_KERNEL, 0,
+                             TARGET_SCF_ACK_KREF) < 0) {
+               vhost_scsi_release_tmf_res(tmf);
+               goto send_reject;
+       }
+
+       return;
+
+send_reject:
+       vhost_scsi_send_tmf_resp(vs, vq, vc->in, vc->head, &vq->iov[vc->out],
+                                VIRTIO_SCSI_S_FUNCTION_REJECTED);
+}
+
 static void
 vhost_scsi_send_an_resp(struct vhost_scsi *vs,
                        struct vhost_virtqueue *vq,
@@ -1170,6 +1274,7 @@ vhost_scsi_send_an_resp(struct vhost_scsi *vs,
 static void
 vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 {
+       struct vhost_scsi_tpg *tpg;
        union {
                __virtio32 type;
                struct virtio_scsi_ctrl_an_req an;
@@ -1251,12 +1356,12 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
                vc.req += typ_size;
                vc.req_size -= typ_size;
 
-               ret = vhost_scsi_get_req(vq, &vc, NULL);
+               ret = vhost_scsi_get_req(vq, &vc, &tpg);
                if (ret)
                        goto err;
 
                if (v_req.type == VIRTIO_SCSI_T_TMF)
-                       vhost_scsi_send_tmf_reject(vs, vq, &vc);
+                       vhost_scsi_handle_tmf(vs, tpg, vq, &v_req.tmf, &vc);
                else
                        vhost_scsi_send_an_resp(vs, vq, &vc);
 err:
@@ -1373,6 +1478,83 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
                wait_for_completion(&old_inflight[i]->comp);
 }
 
+static void vhost_scsi_destroy_vq_cmds(struct vhost_virtqueue *vq)
+{
+       struct vhost_scsi_virtqueue *svq = container_of(vq,
+                                       struct vhost_scsi_virtqueue, vq);
+       struct vhost_scsi_cmd *tv_cmd;
+       unsigned int i;
+
+       if (!svq->scsi_cmds)
+               return;
+
+       for (i = 0; i < svq->max_cmds; i++) {
+               tv_cmd = &svq->scsi_cmds[i];
+
+               kfree(tv_cmd->tvc_sgl);
+               kfree(tv_cmd->tvc_prot_sgl);
+               kfree(tv_cmd->tvc_upages);
+       }
+
+       sbitmap_free(&svq->scsi_tags);
+       kfree(svq->scsi_cmds);
+       svq->scsi_cmds = NULL;
+}
+
+static int vhost_scsi_setup_vq_cmds(struct vhost_virtqueue *vq, int max_cmds)
+{
+       struct vhost_scsi_virtqueue *svq = container_of(vq,
+                                       struct vhost_scsi_virtqueue, vq);
+       struct vhost_scsi_cmd *tv_cmd;
+       unsigned int i;
+
+       if (svq->scsi_cmds)
+               return 0;
+
+       if (sbitmap_init_node(&svq->scsi_tags, max_cmds, -1, GFP_KERNEL,
+                             NUMA_NO_NODE))
+               return -ENOMEM;
+       svq->max_cmds = max_cmds;
+
+       svq->scsi_cmds = kcalloc(max_cmds, sizeof(*tv_cmd), GFP_KERNEL);
+       if (!svq->scsi_cmds) {
+               sbitmap_free(&svq->scsi_tags);
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < max_cmds; i++) {
+               tv_cmd = &svq->scsi_cmds[i];
+
+               tv_cmd->tvc_sgl = kcalloc(VHOST_SCSI_PREALLOC_SGLS,
+                                         sizeof(struct scatterlist),
+                                         GFP_KERNEL);
+               if (!tv_cmd->tvc_sgl) {
+                       pr_err("Unable to allocate tv_cmd->tvc_sgl\n");
+                       goto out;
+               }
+
+               tv_cmd->tvc_upages = kcalloc(VHOST_SCSI_PREALLOC_UPAGES,
+                                            sizeof(struct page *),
+                                            GFP_KERNEL);
+               if (!tv_cmd->tvc_upages) {
+                       pr_err("Unable to allocate tv_cmd->tvc_upages\n");
+                       goto out;
+               }
+
+               tv_cmd->tvc_prot_sgl = kcalloc(VHOST_SCSI_PREALLOC_PROT_SGLS,
+                                              sizeof(struct scatterlist),
+                                              GFP_KERNEL);
+               if (!tv_cmd->tvc_prot_sgl) {
+                       pr_err("Unable to allocate tv_cmd->tvc_prot_sgl\n");
+                       goto out;
+               }
+       }
+       return 0;
+out:
+       vhost_scsi_destroy_vq_cmds(vq);
+       return -ENOMEM;
+}
+
 /*
  * Called from vhost_scsi_ioctl() context to walk the list of available
  * vhost_scsi_tpg with an active struct vhost_scsi_nexus
@@ -1427,10 +1609,9 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
 
                if (!strcmp(tv_tport->tport_name, t->vhost_wwpn)) {
                        if (vs->vs_tpg && vs->vs_tpg[tpg->tport_tpgt]) {
-                               kfree(vs_tpg);
                                mutex_unlock(&tpg->tv_tpg_mutex);
                                ret = -EEXIST;
-                               goto out;
+                               goto undepend;
                        }
                        /*
                         * In order to ensure individual vhost-scsi configfs
@@ -1442,9 +1623,8 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
                        ret = target_depend_item(&se_tpg->tpg_group.cg_item);
                        if (ret) {
                                pr_warn("target_depend_item() failed: %d\n", ret);
-                               kfree(vs_tpg);
                                mutex_unlock(&tpg->tv_tpg_mutex);
-                               goto out;
+                               goto undepend;
                        }
                        tpg->tv_tpg_vhost_count++;
                        tpg->vhost_scsi = vs;
@@ -1457,6 +1637,16 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
        if (match) {
                memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn,
                       sizeof(vs->vs_vhost_wwpn));
+
+               for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) {
+                       vq = &vs->vqs[i].vq;
+                       if (!vhost_vq_is_setup(vq))
+                               continue;
+
+                       if (vhost_scsi_setup_vq_cmds(vq, vq->num))
+                               goto destroy_vq_cmds;
+               }
+
                for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
                        vq = &vs->vqs[i].vq;
                        mutex_lock(&vq->mutex);
@@ -1476,7 +1666,22 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
        vhost_scsi_flush(vs);
        kfree(vs->vs_tpg);
        vs->vs_tpg = vs_tpg;
+       goto out;
 
+destroy_vq_cmds:
+       for (i--; i >= VHOST_SCSI_VQ_IO; i--) {
+               if (!vhost_vq_get_backend(&vs->vqs[i].vq))
+                       vhost_scsi_destroy_vq_cmds(&vs->vqs[i].vq);
+       }
+undepend:
+       for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) {
+               tpg = vs_tpg[i];
+               if (tpg) {
+                       tpg->tv_tpg_vhost_count--;
+                       target_undepend_item(&tpg->se_tpg.tpg_group.cg_item);
+               }
+       }
+       kfree(vs_tpg);
 out:
        mutex_unlock(&vs->dev.mutex);
        mutex_unlock(&vhost_scsi_mutex);
@@ -1549,6 +1754,12 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
                        mutex_lock(&vq->mutex);
                        vhost_vq_set_backend(vq, NULL);
                        mutex_unlock(&vq->mutex);
+                       /*
+                        * Make sure cmds are not running before tearing them
+                        * down.
+                        */
+                       vhost_scsi_flush(vs);
+                       vhost_scsi_destroy_vq_cmds(vq);
                }
        }
        /*
@@ -1811,11 +2022,19 @@ static int vhost_scsi_port_link(struct se_portal_group *se_tpg,
 {
        struct vhost_scsi_tpg *tpg = container_of(se_tpg,
                                struct vhost_scsi_tpg, se_tpg);
+       struct vhost_scsi_tmf *tmf;
+
+       tmf = kzalloc(sizeof(*tmf), GFP_KERNEL);
+       if (!tmf)
+               return -ENOMEM;
+       INIT_LIST_HEAD(&tmf->queue_entry);
+       vhost_work_init(&tmf->vwork, vhost_scsi_tmf_resp_work);
 
        mutex_lock(&vhost_scsi_mutex);
 
        mutex_lock(&tpg->tv_tpg_mutex);
        tpg->tv_tpg_port_count++;
+       list_add_tail(&tmf->queue_entry, &tpg->tmf_queue);
        mutex_unlock(&tpg->tv_tpg_mutex);
 
        vhost_scsi_hotplug(tpg, lun);
@@ -1830,11 +2049,16 @@ static void vhost_scsi_port_unlink(struct se_portal_group *se_tpg,
 {
        struct vhost_scsi_tpg *tpg = container_of(se_tpg,
                                struct vhost_scsi_tpg, se_tpg);
+       struct vhost_scsi_tmf *tmf;
 
        mutex_lock(&vhost_scsi_mutex);
 
        mutex_lock(&tpg->tv_tpg_mutex);
        tpg->tv_tpg_port_count--;
+       tmf = list_first_entry(&tpg->tmf_queue, struct vhost_scsi_tmf,
+                              queue_entry);
+       list_del(&tmf->queue_entry);
+       kfree(tmf);
        mutex_unlock(&tpg->tv_tpg_mutex);
 
        vhost_scsi_hotunplug(tpg, lun);
@@ -1842,23 +2066,6 @@ static void vhost_scsi_port_unlink(struct se_portal_group *se_tpg,
        mutex_unlock(&vhost_scsi_mutex);
 }
 
-static void vhost_scsi_free_cmd_map_res(struct se_session *se_sess)
-{
-       struct vhost_scsi_cmd *tv_cmd;
-       unsigned int i;
-
-       if (!se_sess->sess_cmd_map)
-               return;
-
-       for (i = 0; i < VHOST_SCSI_DEFAULT_TAGS; i++) {
-               tv_cmd = &((struct vhost_scsi_cmd *)se_sess->sess_cmd_map)[i];
-
-               kfree(tv_cmd->tvc_sgl);
-               kfree(tv_cmd->tvc_prot_sgl);
-               kfree(tv_cmd->tvc_upages);
-       }
-}
-
 static ssize_t vhost_scsi_tpg_attrib_fabric_prot_type_store(
                struct config_item *item, const char *page, size_t count)
 {
@@ -1898,45 +2105,6 @@ static struct configfs_attribute *vhost_scsi_tpg_attrib_attrs[] = {
        NULL,
 };
 
-static int vhost_scsi_nexus_cb(struct se_portal_group *se_tpg,
-                              struct se_session *se_sess, void *p)
-{
-       struct vhost_scsi_cmd *tv_cmd;
-       unsigned int i;
-
-       for (i = 0; i < VHOST_SCSI_DEFAULT_TAGS; i++) {
-               tv_cmd = &((struct vhost_scsi_cmd *)se_sess->sess_cmd_map)[i];
-
-               tv_cmd->tvc_sgl = kcalloc(VHOST_SCSI_PREALLOC_SGLS,
-                                         sizeof(struct scatterlist),
-                                         GFP_KERNEL);
-               if (!tv_cmd->tvc_sgl) {
-                       pr_err("Unable to allocate tv_cmd->tvc_sgl\n");
-                       goto out;
-               }
-
-               tv_cmd->tvc_upages = kcalloc(VHOST_SCSI_PREALLOC_UPAGES,
-                                            sizeof(struct page *),
-                                            GFP_KERNEL);
-               if (!tv_cmd->tvc_upages) {
-                       pr_err("Unable to allocate tv_cmd->tvc_upages\n");
-                       goto out;
-               }
-
-               tv_cmd->tvc_prot_sgl = kcalloc(VHOST_SCSI_PREALLOC_PROT_SGLS,
-                                              sizeof(struct scatterlist),
-                                              GFP_KERNEL);
-               if (!tv_cmd->tvc_prot_sgl) {
-                       pr_err("Unable to allocate tv_cmd->tvc_prot_sgl\n");
-                       goto out;
-               }
-       }
-       return 0;
-out:
-       vhost_scsi_free_cmd_map_res(se_sess);
-       return -ENOMEM;
-}
-
 static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg,
                                const char *name)
 {
@@ -1960,12 +2128,9 @@ static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg,
         * struct se_node_acl for the vhost_scsi struct se_portal_group with
         * the SCSI Initiator port name of the passed configfs group 'name'.
         */
-       tv_nexus->tvn_se_sess = target_setup_session(&tpg->se_tpg,
-                                       VHOST_SCSI_DEFAULT_TAGS,
-                                       sizeof(struct vhost_scsi_cmd),
+       tv_nexus->tvn_se_sess = target_setup_session(&tpg->se_tpg, 0, 0,
                                        TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS,
-                                       (unsigned char *)name, tv_nexus,
-                                       vhost_scsi_nexus_cb);
+                                       (unsigned char *)name, tv_nexus, NULL);
        if (IS_ERR(tv_nexus->tvn_se_sess)) {
                mutex_unlock(&tpg->tv_tpg_mutex);
                kfree(tv_nexus);
@@ -2015,7 +2180,6 @@ static int vhost_scsi_drop_nexus(struct vhost_scsi_tpg *tpg)
                " %s Initiator Port: %s\n", vhost_scsi_dump_proto_id(tpg->tport),
                tv_nexus->tvn_se_sess->se_node_acl->initiatorname);
 
-       vhost_scsi_free_cmd_map_res(se_sess);
        /*
         * Release the SCSI I_T Nexus to the emulated vhost Target Port
         */
@@ -2155,6 +2319,7 @@ vhost_scsi_make_tpg(struct se_wwn *wwn, const char *name)
        }
        mutex_init(&tpg->tv_tpg_mutex);
        INIT_LIST_HEAD(&tpg->tv_tpg_list);
+       INIT_LIST_HEAD(&tpg->tmf_queue);
        tpg->tport = tport;
        tpg->tport_tpgt = tpgt;
 
index 2754f30..29ed417 100644 (file)
@@ -348,7 +348,9 @@ static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
                .last = v->range.last,
        };
 
-       return copy_to_user(argp, &range, sizeof(range));
+       if (copy_to_user(argp, &range, sizeof(range)))
+               return -EFAULT;
+       return 0;
 }
 
 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
@@ -577,6 +579,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
 
        if (r)
                vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+       else
+               atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
 
        return r;
 }
@@ -608,8 +612,9 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
        unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
        unsigned int gup_flags = FOLL_LONGTERM;
        unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-       unsigned long locked, lock_limit, pinned, i;
+       unsigned long lock_limit, sz2pin, nchunks, i;
        u64 iova = msg->iova;
+       long pinned;
        int ret = 0;
 
        if (msg->iova < v->range.first ||
@@ -620,6 +625,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
                                    msg->iova + msg->size - 1))
                return -EEXIST;
 
+       /* Limit the use of memory for bookkeeping */
        page_list = (struct page **) __get_free_page(GFP_KERNEL);
        if (!page_list)
                return -ENOMEM;
@@ -628,52 +634,75 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
                gup_flags |= FOLL_WRITE;
 
        npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;
-       if (!npages)
-               return -EINVAL;
+       if (!npages) {
+               ret = -EINVAL;
+               goto free;
+       }
 
        mmap_read_lock(dev->mm);
 
-       locked = atomic64_add_return(npages, &dev->mm->pinned_vm);
        lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-       if (locked > lock_limit) {
+       if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
                ret = -ENOMEM;
-               goto out;
+               goto unlock;
        }
 
        cur_base = msg->uaddr & PAGE_MASK;
        iova &= PAGE_MASK;
+       nchunks = 0;
 
        while (npages) {
-               pinned = min_t(unsigned long, npages, list_size);
-               ret = pin_user_pages(cur_base, pinned,
-                                    gup_flags, page_list, NULL);
-               if (ret != pinned)
+               sz2pin = min_t(unsigned long, npages, list_size);
+               pinned = pin_user_pages(cur_base, sz2pin,
+                                       gup_flags, page_list, NULL);
+               if (sz2pin != pinned) {
+                       if (pinned < 0) {
+                               ret = pinned;
+                       } else {
+                               unpin_user_pages(page_list, pinned);
+                               ret = -ENOMEM;
+                       }
                        goto out;
+               }
+               nchunks++;
 
                if (!last_pfn)
                        map_pfn = page_to_pfn(page_list[0]);
 
-               for (i = 0; i < ret; i++) {
+               for (i = 0; i < pinned; i++) {
                        unsigned long this_pfn = page_to_pfn(page_list[i]);
                        u64 csize;
 
                        if (last_pfn && (this_pfn != last_pfn + 1)) {
                                /* Pin a contiguous chunk of memory */
                                csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-                               if (vhost_vdpa_map(v, iova, csize,
-                                                  map_pfn << PAGE_SHIFT,
-                                                  msg->perm))
+                               ret = vhost_vdpa_map(v, iova, csize,
+                                                    map_pfn << PAGE_SHIFT,
+                                                    msg->perm);
+                               if (ret) {
+                                       /*
+                                        * Unpin the pages that are left unmapped
+                                        * from this point on in the current
+                                        * page_list. The remaining outstanding
+                                        * ones which may stride across several
+                                        * chunks will be covered in the common
+                                        * error path subsequently.
+                                        */
+                                       unpin_user_pages(&page_list[i],
+                                                        pinned - i);
                                        goto out;
+                               }
+
                                map_pfn = this_pfn;
                                iova += csize;
+                               nchunks = 0;
                        }
 
                        last_pfn = this_pfn;
                }
 
-               cur_base += ret << PAGE_SHIFT;
-               npages -= ret;
+               cur_base += pinned << PAGE_SHIFT;
+               npages -= pinned;
        }
 
        /* Pin the rest chunk */
@@ -681,10 +710,27 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
                             map_pfn << PAGE_SHIFT, msg->perm);
 out:
        if (ret) {
+               if (nchunks) {
+                       unsigned long pfn;
+
+                       /*
+                        * Unpin the outstanding pages which are yet to be
+                        * mapped but haven't due to vdpa_map() or
+                        * pin_user_pages() failure.
+                        *
+                        * Mapped pages are accounted in vdpa_map(), hence
+                        * the corresponding unpinning will be handled by
+                        * vdpa_unmap().
+                        */
+                       WARN_ON(!last_pfn);
+                       for (pfn = map_pfn; pfn <= last_pfn; pfn++)
+                               unpin_user_page(pfn_to_page(pfn));
+               }
                vhost_vdpa_unmap(v, msg->iova, msg->size);
-               atomic64_sub(npages, &dev->mm->pinned_vm);
        }
+unlock:
        mmap_read_unlock(dev->mm);
+free:
        free_page((unsigned long)page_list);
        return ret;
 }
index 5c835a2..a262e12 100644 (file)
@@ -304,6 +304,12 @@ static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx)
        memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer));
 }
 
+bool vhost_vq_is_setup(struct vhost_virtqueue *vq)
+{
+       return vq->avail && vq->desc && vq->used && vhost_vq_access_ok(vq);
+}
+EXPORT_SYMBOL_GPL(vhost_vq_is_setup);
+
 static void vhost_vq_reset(struct vhost_dev *dev,
                           struct vhost_virtqueue *vq)
 {
index e016cd3..b063324 100644 (file)
@@ -190,6 +190,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *,
                      struct vhost_log *log, unsigned int *log_num);
 void vhost_discard_vq_desc(struct vhost_virtqueue *, int n);
 
+bool vhost_vq_is_setup(struct vhost_virtqueue *vq);
 int vhost_vq_init_access(struct vhost_virtqueue *);
 int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
 int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
index 8bd8b40..b7403ba 100644 (file)
@@ -730,7 +730,7 @@ EXPORT_SYMBOL(vringh_iov_pull_user);
 /**
  * vringh_iov_push_user - copy bytes into vring_iov.
  * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume)
- * @dst: the place to copy.
+ * @src: the place to copy from.
  * @len: the maximum length to copy.
  *
  * Returns the bytes copied <= len or a negative errno.
@@ -976,7 +976,7 @@ EXPORT_SYMBOL(vringh_iov_pull_kern);
 /**
  * vringh_iov_push_kern - copy bytes into vring_iov.
  * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume)
- * @dst: the place to copy.
+ * @src: the place to copy from.
  * @len: the maximum length to copy.
  *
  * Returns the bytes copied <= len or a negative errno.
@@ -1333,7 +1333,7 @@ EXPORT_SYMBOL(vringh_iov_pull_iotlb);
  * vringh_iov_push_iotlb - copy bytes into vring_iov.
  * @vrh: the vring.
  * @wiov: the wiov as passed to vringh_getdesc_iotlb() (updated as we consume)
- * @dst: the place to copy.
+ * @src: the place to copy from.
  * @len: the maximum length to copy.
  *
  * Returns the bytes copied <= len or a negative errno.
index 5bc86f4..c8b0ae6 100644 (file)
@@ -1093,7 +1093,12 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
                goto err1;
        }
 
-       fb_virt = ioremap(par->mem->start, screen_fb_size);
+       /*
+        * Map the VRAM cacheable for performance. This is also required for
+        * VM Connect to display properly for ARM64 Linux VM, as the host also
+        * maps the VRAM cacheable.
+        */
+       fb_virt = ioremap_cache(par->mem->start, screen_fb_size);
        if (!fb_virt)
                goto err2;
 
index 523dcdf..3729bea 100644 (file)
@@ -813,6 +813,129 @@ int gnttab_alloc_pages(int nr_pages, struct page **pages)
 }
 EXPORT_SYMBOL_GPL(gnttab_alloc_pages);
 
+#ifdef CONFIG_XEN_UNPOPULATED_ALLOC
+static inline void cache_init(struct gnttab_page_cache *cache)
+{
+       cache->pages = NULL;
+}
+
+static inline bool cache_empty(struct gnttab_page_cache *cache)
+{
+       return !cache->pages;
+}
+
+static inline struct page *cache_deq(struct gnttab_page_cache *cache)
+{
+       struct page *page;
+
+       page = cache->pages;
+       cache->pages = page->zone_device_data;
+
+       return page;
+}
+
+static inline void cache_enq(struct gnttab_page_cache *cache, struct page *page)
+{
+       page->zone_device_data = cache->pages;
+       cache->pages = page;
+}
+#else
+static inline void cache_init(struct gnttab_page_cache *cache)
+{
+       INIT_LIST_HEAD(&cache->pages);
+}
+
+static inline bool cache_empty(struct gnttab_page_cache *cache)
+{
+       return list_empty(&cache->pages);
+}
+
+static inline struct page *cache_deq(struct gnttab_page_cache *cache)
+{
+       struct page *page;
+
+       page = list_first_entry(&cache->pages, struct page, lru);
+       list_del(&page->lru);
+
+       return page;
+}
+
+static inline void cache_enq(struct gnttab_page_cache *cache, struct page *page)
+{
+       list_add(&page->lru, &cache->pages);
+}
+#endif
+
+void gnttab_page_cache_init(struct gnttab_page_cache *cache)
+{
+       spin_lock_init(&cache->lock);
+       cache_init(cache);
+       cache->num_pages = 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_page_cache_init);
+
+int gnttab_page_cache_get(struct gnttab_page_cache *cache, struct page **page)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&cache->lock, flags);
+
+       if (cache_empty(cache)) {
+               spin_unlock_irqrestore(&cache->lock, flags);
+               return gnttab_alloc_pages(1, page);
+       }
+
+       page[0] = cache_deq(cache);
+       cache->num_pages--;
+
+       spin_unlock_irqrestore(&cache->lock, flags);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_page_cache_get);
+
+void gnttab_page_cache_put(struct gnttab_page_cache *cache, struct page **page,
+                          unsigned int num)
+{
+       unsigned long flags;
+       unsigned int i;
+
+       spin_lock_irqsave(&cache->lock, flags);
+
+       for (i = 0; i < num; i++)
+               cache_enq(cache, page[i]);
+       cache->num_pages += num;
+
+       spin_unlock_irqrestore(&cache->lock, flags);
+}
+EXPORT_SYMBOL_GPL(gnttab_page_cache_put);
+
+void gnttab_page_cache_shrink(struct gnttab_page_cache *cache, unsigned int num)
+{
+       struct page *page[10];
+       unsigned int i = 0;
+       unsigned long flags;
+
+       spin_lock_irqsave(&cache->lock, flags);
+
+       while (cache->num_pages > num) {
+               page[i] = cache_deq(cache);
+               cache->num_pages--;
+               if (++i == ARRAY_SIZE(page)) {
+                       spin_unlock_irqrestore(&cache->lock, flags);
+                       gnttab_free_pages(i, page);
+                       i = 0;
+                       spin_lock_irqsave(&cache->lock, flags);
+               }
+       }
+
+       spin_unlock_irqrestore(&cache->lock, flags);
+
+       if (i != 0)
+               gnttab_free_pages(i, page);
+}
+EXPORT_SYMBOL_GPL(gnttab_page_cache_shrink);
+
 void gnttab_pages_clear_private(int nr_pages, struct page **pages)
 {
        int i;
index 8c512ea..7762c1b 100644 (file)
@@ -12,7 +12,7 @@
 #include <xen/xen.h>
 
 static DEFINE_MUTEX(list_lock);
-static LIST_HEAD(page_list);
+static struct page *page_list;
 static unsigned int list_count;
 
 static int fill_list(unsigned int nr_pages)
@@ -84,7 +84,8 @@ static int fill_list(unsigned int nr_pages)
                struct page *pg = virt_to_page(vaddr + PAGE_SIZE * i);
 
                BUG_ON(!virt_addr_valid(vaddr + PAGE_SIZE * i));
-               list_add(&pg->lru, &page_list);
+               pg->zone_device_data = page_list;
+               page_list = pg;
                list_count++;
        }
 
@@ -118,12 +119,10 @@ int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages)
        }
 
        for (i = 0; i < nr_pages; i++) {
-               struct page *pg = list_first_entry_or_null(&page_list,
-                                                          struct page,
-                                                          lru);
+               struct page *pg = page_list;
 
                BUG_ON(!pg);
-               list_del(&pg->lru);
+               page_list = pg->zone_device_data;
                list_count--;
                pages[i] = pg;
 
@@ -134,7 +133,8 @@ int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages)
                                unsigned int j;
 
                                for (j = 0; j <= i; j++) {
-                                       list_add(&pages[j]->lru, &page_list);
+                                       pages[j]->zone_device_data = page_list;
+                                       page_list = pages[j];
                                        list_count++;
                                }
                                goto out;
@@ -160,7 +160,8 @@ void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages)
 
        mutex_lock(&list_lock);
        for (i = 0; i < nr_pages; i++) {
-               list_add(&pages[i]->lru, &page_list);
+               pages[i]->zone_device_data = page_list;
+               page_list = pages[i];
                list_count++;
        }
        mutex_unlock(&list_lock);
@@ -189,7 +190,8 @@ static int __init init(void)
                        struct page *pg =
                                pfn_to_page(xen_extra_mem[i].start_pfn + j);
 
-                       list_add(&pg->lru, &page_list);
+                       pg->zone_device_data = page_list;
+                       page_list = pg;
                        list_count++;
                }
        }
index 4acc4e8..862162d 100644 (file)
@@ -99,6 +99,8 @@ struct vscsibk_info {
        struct list_head v2p_entry_lists;
 
        wait_queue_head_t waiting_to_free;
+
+       struct gnttab_page_cache free_pages;
 };
 
 /* theoretical maximum of grants for one request */
@@ -188,10 +190,6 @@ module_param_named(max_buffer_pages, scsiback_max_buffer_pages, int, 0644);
 MODULE_PARM_DESC(max_buffer_pages,
 "Maximum number of free pages to keep in backend buffer");
 
-static DEFINE_SPINLOCK(free_pages_lock);
-static int free_pages_num;
-static LIST_HEAD(scsiback_free_pages);
-
 /* Global spinlock to protect scsiback TPG list */
 static DEFINE_MUTEX(scsiback_mutex);
 static LIST_HEAD(scsiback_list);
@@ -207,41 +205,6 @@ static void scsiback_put(struct vscsibk_info *info)
                wake_up(&info->waiting_to_free);
 }
 
-static void put_free_pages(struct page **page, int num)
-{
-       unsigned long flags;
-       int i = free_pages_num + num, n = num;
-
-       if (num == 0)
-               return;
-       if (i > scsiback_max_buffer_pages) {
-               n = min(num, i - scsiback_max_buffer_pages);
-               gnttab_free_pages(n, page + num - n);
-               n = num - n;
-       }
-       spin_lock_irqsave(&free_pages_lock, flags);
-       for (i = 0; i < n; i++)
-               list_add(&page[i]->lru, &scsiback_free_pages);
-       free_pages_num += n;
-       spin_unlock_irqrestore(&free_pages_lock, flags);
-}
-
-static int get_free_page(struct page **page)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&free_pages_lock, flags);
-       if (list_empty(&scsiback_free_pages)) {
-               spin_unlock_irqrestore(&free_pages_lock, flags);
-               return gnttab_alloc_pages(1, page);
-       }
-       page[0] = list_first_entry(&scsiback_free_pages, struct page, lru);
-       list_del(&page[0]->lru);
-       free_pages_num--;
-       spin_unlock_irqrestore(&free_pages_lock, flags);
-       return 0;
-}
-
 static unsigned long vaddr_page(struct page *page)
 {
        unsigned long pfn = page_to_pfn(page);
@@ -302,7 +265,8 @@ static void scsiback_fast_flush_area(struct vscsibk_pend *req)
                BUG_ON(err);
        }
 
-       put_free_pages(req->pages, req->n_grants);
+       gnttab_page_cache_put(&req->info->free_pages, req->pages,
+                             req->n_grants);
        req->n_grants = 0;
 }
 
@@ -445,8 +409,8 @@ static int scsiback_gnttab_data_map_list(struct vscsibk_pend *pending_req,
        struct vscsibk_info *info = pending_req->info;
 
        for (i = 0; i < cnt; i++) {
-               if (get_free_page(pg + mapcount)) {
-                       put_free_pages(pg, mapcount);
+               if (gnttab_page_cache_get(&info->free_pages, pg + mapcount)) {
+                       gnttab_page_cache_put(&info->free_pages, pg, mapcount);
                        pr_err("no grant page\n");
                        return -ENOMEM;
                }
@@ -796,6 +760,8 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info,
                cond_resched();
        }
 
+       gnttab_page_cache_shrink(&info->free_pages, scsiback_max_buffer_pages);
+
        RING_FINAL_CHECK_FOR_REQUESTS(&info->ring, more_to_do);
        return more_to_do;
 }
@@ -1233,6 +1199,8 @@ static int scsiback_remove(struct xenbus_device *dev)
 
        scsiback_release_translation_entry(info);
 
+       gnttab_page_cache_shrink(&info->free_pages, 0);
+
        dev_set_drvdata(&dev->dev, NULL);
 
        return 0;
@@ -1263,6 +1231,7 @@ static int scsiback_probe(struct xenbus_device *dev,
        info->irq = 0;
        INIT_LIST_HEAD(&info->v2p_entry_lists);
        spin_lock_init(&info->v2p_lock);
+       gnttab_page_cache_init(&info->free_pages);
 
        err = xenbus_printf(XBT_NIL, dev->nodename, "feature-sg-grant", "%u",
                            SG_ALL);
@@ -1879,13 +1848,6 @@ out:
 
 static void __exit scsiback_exit(void)
 {
-       struct page *page;
-
-       while (free_pages_num) {
-               if (get_free_page(&page))
-                       BUG();
-               gnttab_free_pages(1, &page);
-       }
        target_unregister_template(&scsiback_ops);
        xenbus_unregister_driver(&scsiback_driver);
 }
index b177fd3..be57689 100644 (file)
@@ -655,6 +655,8 @@ const struct file_operations v9fs_cached_file_operations = {
        .release = v9fs_dir_release,
        .lock = v9fs_file_lock,
        .mmap = v9fs_file_mmap,
+       .splice_read = generic_file_splice_read,
+       .splice_write = iter_file_splice_write,
        .fsync = v9fs_file_fsync,
 };
 
@@ -667,6 +669,8 @@ const struct file_operations v9fs_cached_file_operations_dotl = {
        .lock = v9fs_file_lock_dotl,
        .flock = v9fs_file_flock_dotl,
        .mmap = v9fs_file_mmap,
+       .splice_read = generic_file_splice_read,
+       .splice_write = iter_file_splice_write,
        .fsync = v9fs_file_fsync_dotl,
 };
 
@@ -678,6 +682,8 @@ const struct file_operations v9fs_file_operations = {
        .release = v9fs_dir_release,
        .lock = v9fs_file_lock,
        .mmap = generic_file_readonly_mmap,
+       .splice_read = generic_file_splice_read,
+       .splice_write = iter_file_splice_write,
        .fsync = v9fs_file_fsync,
 };
 
@@ -690,6 +696,8 @@ const struct file_operations v9fs_file_operations_dotl = {
        .lock = v9fs_file_lock_dotl,
        .flock = v9fs_file_flock_dotl,
        .mmap = generic_file_readonly_mmap,
+       .splice_read = generic_file_splice_read,
+       .splice_write = iter_file_splice_write,
        .fsync = v9fs_file_fsync_dotl,
 };
 
@@ -701,6 +709,8 @@ const struct file_operations v9fs_mmap_file_operations = {
        .release = v9fs_dir_release,
        .lock = v9fs_file_lock,
        .mmap = v9fs_mmap_file_mmap,
+       .splice_read = generic_file_splice_read,
+       .splice_write = iter_file_splice_write,
        .fsync = v9fs_file_fsync,
 };
 
@@ -713,5 +723,7 @@ const struct file_operations v9fs_mmap_file_operations_dotl = {
        .lock = v9fs_file_lock_dotl,
        .flock = v9fs_file_flock_dotl,
        .mmap = v9fs_mmap_file_mmap,
+       .splice_read = generic_file_splice_read,
+       .splice_write = iter_file_splice_write,
        .fsync = v9fs_file_fsync_dotl,
 };
index 1bb5b9d..9068d55 100644 (file)
@@ -823,6 +823,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
                                vp->cb_break_before = afs_calc_vnode_cb_break(vnode);
                                vp->vnode = vnode;
                                vp->put_vnode = true;
+                               vp->speculative = true; /* vnode not locked */
                        }
                }
        }
index 0fe8844..b0d7b89 100644 (file)
@@ -294,6 +294,13 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v
                        op->flags &= ~AFS_OPERATION_DIR_CONFLICT;
                }
        } else if (vp->scb.have_status) {
+               if (vp->dv_before + vp->dv_delta != vp->scb.status.data_version &&
+                   vp->speculative)
+                       /* Ignore the result of a speculative bulk status fetch
+                        * if it splits around a modification op, thereby
+                        * appearing to regress the data version.
+                        */
+                       goto out;
                afs_apply_status(op, vp);
                if (vp->scb.have_cb)
                        afs_apply_callback(op, vp);
@@ -305,6 +312,7 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v
                }
        }
 
+out:
        write_sequnlock(&vnode->cb_lock);
 
        if (vp->scb.have_status)
index 14d5d75..0d150a2 100644 (file)
@@ -755,6 +755,7 @@ struct afs_vnode_param {
        bool                    update_ctime:1; /* Need to update the ctime */
        bool                    set_size:1;     /* Must update i_size */
        bool                    op_unlinked:1;  /* True if file was unlinked by op */
+       bool                    speculative:1;  /* T if speculative status fetch (no vnode lock) */
 };
 
 /*
index 6c5900d..e38bb1e 100644 (file)
@@ -230,6 +230,9 @@ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param)
 
        _enter(",%s", name);
 
+       if (fc->source)
+               return invalf(fc, "kAFS: Multiple sources not supported");
+
        if (!name) {
                printk(KERN_ERR "kAFS: no volume name specified\n");
                return -EINVAL;
index 0378933..0b29bdb 100644 (file)
@@ -878,7 +878,10 @@ struct btrfs_fs_info {
         */
        struct ulist *qgroup_ulist;
 
-       /* protect user change for quota operations */
+       /*
+        * Protect user change for quota operations. If a transaction is needed,
+        * it must be started before locking this lock.
+        */
        struct mutex qgroup_ioctl_lock;
 
        /* list of dirty qgroups to be written at next commit */
index 87355a3..4373da7 100644 (file)
@@ -452,46 +452,6 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
        }
 }
 
-static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
-                                        const u64 start,
-                                        const u64 len,
-                                        struct extent_state **cached_state)
-{
-       u64 search_start = start;
-       const u64 end = start + len - 1;
-
-       while (search_start < end) {
-               const u64 search_len = end - search_start + 1;
-               struct extent_map *em;
-               u64 em_len;
-               int ret = 0;
-
-               em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
-               if (IS_ERR(em))
-                       return PTR_ERR(em);
-
-               if (em->block_start != EXTENT_MAP_HOLE)
-                       goto next;
-
-               em_len = em->len;
-               if (em->start < search_start)
-                       em_len -= search_start - em->start;
-               if (em_len > search_len)
-                       em_len = search_len;
-
-               ret = set_extent_bit(&inode->io_tree, search_start,
-                                    search_start + em_len - 1,
-                                    EXTENT_DELALLOC_NEW,
-                                    NULL, cached_state, GFP_NOFS);
-next:
-               search_start = extent_map_end(em);
-               free_extent_map(em);
-               if (ret)
-                       return ret;
-       }
-       return 0;
-}
-
 /*
  * after copy_from_user, pages need to be dirtied and we need to make
  * sure holes are created between the current EOF and the start of
@@ -528,23 +488,6 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
                         EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
                         0, 0, cached);
 
-       if (!btrfs_is_free_space_inode(inode)) {
-               if (start_pos >= isize &&
-                   !(inode->flags & BTRFS_INODE_PREALLOC)) {
-                       /*
-                        * There can't be any extents following eof in this case
-                        * so just set the delalloc new bit for the range
-                        * directly.
-                        */
-                       extra_bits |= EXTENT_DELALLOC_NEW;
-               } else {
-                       err = btrfs_find_new_delalloc_bytes(inode, start_pos,
-                                                           num_bytes, cached);
-                       if (err)
-                               return err;
-               }
-       }
-
        err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
                                        extra_bits, cached);
        if (err)
index da58c58..7e8d816 100644 (file)
@@ -2253,11 +2253,69 @@ static int add_pending_csums(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
+                                        const u64 start,
+                                        const u64 len,
+                                        struct extent_state **cached_state)
+{
+       u64 search_start = start;
+       const u64 end = start + len - 1;
+
+       while (search_start < end) {
+               const u64 search_len = end - search_start + 1;
+               struct extent_map *em;
+               u64 em_len;
+               int ret = 0;
+
+               em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
+               if (IS_ERR(em))
+                       return PTR_ERR(em);
+
+               if (em->block_start != EXTENT_MAP_HOLE)
+                       goto next;
+
+               em_len = em->len;
+               if (em->start < search_start)
+                       em_len -= search_start - em->start;
+               if (em_len > search_len)
+                       em_len = search_len;
+
+               ret = set_extent_bit(&inode->io_tree, search_start,
+                                    search_start + em_len - 1,
+                                    EXTENT_DELALLOC_NEW,
+                                    NULL, cached_state, GFP_NOFS);
+next:
+               search_start = extent_map_end(em);
+               free_extent_map(em);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+}
+
 int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
                              unsigned int extra_bits,
                              struct extent_state **cached_state)
 {
        WARN_ON(PAGE_ALIGNED(end));
+
+       if (start >= i_size_read(&inode->vfs_inode) &&
+           !(inode->flags & BTRFS_INODE_PREALLOC)) {
+               /*
+                * There can't be any extents following eof in this case so just
+                * set the delalloc new bit for the range directly.
+                */
+               extra_bits |= EXTENT_DELALLOC_NEW;
+       } else {
+               int ret;
+
+               ret = btrfs_find_new_delalloc_bytes(inode, start,
+                                                   end + 1 - start,
+                                                   cached_state);
+               if (ret)
+                       return ret;
+       }
+
        return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
                                   cached_state);
 }
index 77c5474..87bd37b 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/btrfs.h>
+#include <linux/sched/mm.h>
 
 #include "ctree.h"
 #include "transaction.h"
@@ -497,13 +498,13 @@ next2:
                        break;
        }
 out:
+       btrfs_free_path(path);
        fs_info->qgroup_flags |= flags;
        if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
                clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
        else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
                 ret >= 0)
                ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
-       btrfs_free_path(path);
 
        if (ret < 0) {
                ulist_free(fs_info->qgroup_ulist);
@@ -936,6 +937,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
        struct btrfs_key found_key;
        struct btrfs_qgroup *qgroup = NULL;
        struct btrfs_trans_handle *trans = NULL;
+       struct ulist *ulist = NULL;
        int ret = 0;
        int slot;
 
@@ -943,8 +945,8 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
        if (fs_info->quota_root)
                goto out;
 
-       fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
-       if (!fs_info->qgroup_ulist) {
+       ulist = ulist_alloc(GFP_KERNEL);
+       if (!ulist) {
                ret = -ENOMEM;
                goto out;
        }
@@ -952,6 +954,22 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
        ret = btrfs_sysfs_add_qgroups(fs_info);
        if (ret < 0)
                goto out;
+
+       /*
+        * Unlock qgroup_ioctl_lock before starting the transaction. This is to
+        * avoid lock acquisition inversion problems (reported by lockdep) between
+        * qgroup_ioctl_lock and the vfs freeze semaphores, acquired when we
+        * start a transaction.
+        * After we started the transaction lock qgroup_ioctl_lock again and
+        * check if someone else created the quota root in the meanwhile. If so,
+        * just return success and release the transaction handle.
+        *
+        * Also we don't need to worry about someone else calling
+        * btrfs_sysfs_add_qgroups() after we unlock and getting an error because
+        * that function returns 0 (success) when the sysfs entries already exist.
+        */
+       mutex_unlock(&fs_info->qgroup_ioctl_lock);
+
        /*
         * 1 for quota root item
         * 1 for BTRFS_QGROUP_STATUS item
@@ -961,12 +979,20 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
         * would be a lot of overkill.
         */
        trans = btrfs_start_transaction(tree_root, 2);
+
+       mutex_lock(&fs_info->qgroup_ioctl_lock);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
                trans = NULL;
                goto out;
        }
 
+       if (fs_info->quota_root)
+               goto out;
+
+       fs_info->qgroup_ulist = ulist;
+       ulist = NULL;
+
        /*
         * initially create the quota tree
         */
@@ -1124,11 +1150,14 @@ out:
        if (ret) {
                ulist_free(fs_info->qgroup_ulist);
                fs_info->qgroup_ulist = NULL;
-               if (trans)
-                       btrfs_end_transaction(trans);
                btrfs_sysfs_del_qgroups(fs_info);
        }
        mutex_unlock(&fs_info->qgroup_ioctl_lock);
+       if (ret && trans)
+               btrfs_end_transaction(trans);
+       else if (trans)
+               ret = btrfs_end_transaction(trans);
+       ulist_free(ulist);
        return ret;
 }
 
@@ -1141,19 +1170,29 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
        mutex_lock(&fs_info->qgroup_ioctl_lock);
        if (!fs_info->quota_root)
                goto out;
+       mutex_unlock(&fs_info->qgroup_ioctl_lock);
 
        /*
         * 1 For the root item
         *
         * We should also reserve enough items for the quota tree deletion in
         * btrfs_clean_quota_tree but this is not done.
+        *
+        * Also, we must always start a transaction without holding the mutex
+        * qgroup_ioctl_lock, see btrfs_quota_enable().
         */
        trans = btrfs_start_transaction(fs_info->tree_root, 1);
+
+       mutex_lock(&fs_info->qgroup_ioctl_lock);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
+               trans = NULL;
                goto out;
        }
 
+       if (!fs_info->quota_root)
+               goto out;
+
        clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
        btrfs_qgroup_wait_for_completion(fs_info, false);
        spin_lock(&fs_info->qgroup_lock);
@@ -1167,13 +1206,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
        ret = btrfs_clean_quota_tree(trans, quota_root);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
-               goto end_trans;
+               goto out;
        }
 
        ret = btrfs_del_root(trans, &quota_root->root_key);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
-               goto end_trans;
+               goto out;
        }
 
        list_del(&quota_root->dirty_list);
@@ -1185,10 +1224,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
 
        btrfs_put_root(quota_root);
 
-end_trans:
-       ret = btrfs_end_transaction(trans);
 out:
        mutex_unlock(&fs_info->qgroup_ioctl_lock);
+       if (ret && trans)
+               btrfs_end_transaction(trans);
+       else if (trans)
+               ret = btrfs_end_transaction(trans);
+
        return ret;
 }
 
@@ -1324,13 +1366,17 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
        struct btrfs_qgroup *member;
        struct btrfs_qgroup_list *list;
        struct ulist *tmp;
+       unsigned int nofs_flag;
        int ret = 0;
 
        /* Check the level of src and dst first */
        if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
                return -EINVAL;
 
+       /* We hold a transaction handle open, must do a NOFS allocation. */
+       nofs_flag = memalloc_nofs_save();
        tmp = ulist_alloc(GFP_KERNEL);
+       memalloc_nofs_restore(nofs_flag);
        if (!tmp)
                return -ENOMEM;
 
@@ -1387,10 +1433,14 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
        struct btrfs_qgroup_list *list;
        struct ulist *tmp;
        bool found = false;
+       unsigned int nofs_flag;
        int ret = 0;
        int ret2;
 
+       /* We hold a transaction handle open, must do a NOFS allocation. */
+       nofs_flag = memalloc_nofs_save();
        tmp = ulist_alloc(GFP_KERNEL);
+       memalloc_nofs_restore(nofs_flag);
        if (!tmp)
                return -ENOMEM;
 
@@ -3512,6 +3562,7 @@ static int try_flush_qgroup(struct btrfs_root *root)
 {
        struct btrfs_trans_handle *trans;
        int ret;
+       bool can_commit = true;
 
        /*
         * We don't want to run flush again and again, so if there is a running
@@ -3523,6 +3574,20 @@ static int try_flush_qgroup(struct btrfs_root *root)
                return 0;
        }
 
+       /*
+        * If current process holds a transaction, we shouldn't flush, as we
+        * assume all space reservation happens before a transaction handle is
+        * held.
+        *
+        * But there are cases like btrfs_delayed_item_reserve_metadata() where
+        * we try to reserve space with one transction handle already held.
+        * In that case we can't commit transaction, but at least try to end it
+        * and hope the started data writes can free some space.
+        */
+       if (current->journal_info &&
+           current->journal_info != BTRFS_SEND_TRANS_STUB)
+               can_commit = false;
+
        ret = btrfs_start_delalloc_snapshot(root);
        if (ret < 0)
                goto out;
@@ -3534,7 +3599,10 @@ static int try_flush_qgroup(struct btrfs_root *root)
                goto out;
        }
 
-       ret = btrfs_commit_transaction(trans);
+       if (can_commit)
+               ret = btrfs_commit_transaction(trans);
+       else
+               ret = btrfs_end_transaction(trans);
 out:
        clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state);
        wake_up(&root->qgroup_flush_wait);
index e6719f7..0402206 100644 (file)
@@ -983,7 +983,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
        ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
                               BTRFS_MAX_EXTENT_SIZE >> 1,
                               (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
-                              EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
+                              EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+                              EXTENT_UPTODATE, 0, 0, NULL);
        if (ret) {
                test_err("clear_extent_bit returned %d", ret);
                goto out;
@@ -1050,7 +1051,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
        ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
                               BTRFS_MAX_EXTENT_SIZE + sectorsize,
                               BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
-                              EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
+                              EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+                              EXTENT_UPTODATE, 0, 0, NULL);
        if (ret) {
                test_err("clear_extent_bit returned %d", ret);
                goto out;
@@ -1082,7 +1084,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 
        /* Empty */
        ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
-                              EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
+                              EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+                              EXTENT_UPTODATE, 0, 0, NULL);
        if (ret) {
                test_err("clear_extent_bit returned %d", ret);
                goto out;
@@ -1097,7 +1100,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
 out:
        if (ret)
                clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
-                                EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
+                                EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+                                EXTENT_UPTODATE, 0, 0, NULL);
        iput(inode);
        btrfs_free_dummy_root(root);
        btrfs_free_dummy_fs_info(fs_info);
index 8784b74..ea2bb4c 100644 (file)
@@ -1068,6 +1068,7 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
                            "invalid root item size, have %u expect %zu or %u",
                            btrfs_item_size_nr(leaf, slot), sizeof(ri),
                            btrfs_legacy_root_item_size());
+               return -EUCLEAN;
        }
 
        /*
@@ -1423,6 +1424,7 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
        "invalid item size, have %u expect aligned to %zu for key type %u",
                            btrfs_item_size_nr(leaf, slot),
                            sizeof(*dref), key->type);
+               return -EUCLEAN;
        }
        if (!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize)) {
                generic_err(leaf, slot,
@@ -1451,6 +1453,7 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
                        extent_err(leaf, slot,
        "invalid extent data backref offset, have %llu expect aligned to %u",
                                   offset, leaf->fs_info->sectorsize);
+                       return -EUCLEAN;
                }
        }
        return 0;
index a6406b3..7863766 100644 (file)
@@ -940,7 +940,13 @@ static noinline struct btrfs_device *device_list_add(const char *path,
                        if (device->bdev != path_bdev) {
                                bdput(path_bdev);
                                mutex_unlock(&fs_devices->device_list_mutex);
-                               btrfs_warn_in_rcu(device->fs_info,
+                               /*
+                                * device->fs_info may not be reliable here, so
+                                * pass in a NULL instead. This avoids a
+                                * possible use-after-free when the fs_info and
+                                * fs_info->sb are already torn down.
+                                */
+                               btrfs_warn_in_rcu(NULL,
        "duplicate device %s devid %llu generation %llu scanned by %s (%d)",
                                                  path, devid, found_transid,
                                                  current->comm,
index 23b21e9..ef4784e 100644 (file)
@@ -1266,6 +1266,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
                cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc);
        } else if (mode_from_special_sid) {
                rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr, true);
+               kfree(pntsd);
        } else {
                /* get approximated mode from ACL */
                rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr, false);
index c38156f..44f9cce 100644 (file)
@@ -876,6 +876,8 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
        list_del_init(&server->tcp_ses_list);
        spin_unlock(&cifs_tcp_ses_lock);
 
+       cancel_delayed_work_sync(&server->echo);
+
        spin_lock(&GlobalMid_Lock);
        server->tcpStatus = CifsExiting;
        spin_unlock(&GlobalMid_Lock);
@@ -4544,7 +4546,8 @@ static void set_root_ses(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
        if (ses) {
                spin_lock(&cifs_tcp_ses_lock);
                ses->ses_count++;
-               ses->tcon_ipc->remap = cifs_remap(cifs_sb);
+               if (ses->tcon_ipc)
+                       ses->tcon_ipc->remap = cifs_remap(cifs_sb);
                spin_unlock(&cifs_tcp_ses_lock);
        }
        *root_ses = ses;
index 504766c..3d914d7 100644 (file)
@@ -264,7 +264,7 @@ smb2_revert_current_mid(struct TCP_Server_Info *server, const unsigned int val)
 }
 
 static struct mid_q_entry *
-smb2_find_mid(struct TCP_Server_Info *server, char *buf)
+__smb2_find_mid(struct TCP_Server_Info *server, char *buf, bool dequeue)
 {
        struct mid_q_entry *mid;
        struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
@@ -281,6 +281,10 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
                    (mid->mid_state == MID_REQUEST_SUBMITTED) &&
                    (mid->command == shdr->Command)) {
                        kref_get(&mid->refcount);
+                       if (dequeue) {
+                               list_del_init(&mid->qhead);
+                               mid->mid_flags |= MID_DELETED;
+                       }
                        spin_unlock(&GlobalMid_Lock);
                        return mid;
                }
@@ -289,6 +293,18 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
        return NULL;
 }
 
+static struct mid_q_entry *
+smb2_find_mid(struct TCP_Server_Info *server, char *buf)
+{
+       return __smb2_find_mid(server, buf, false);
+}
+
+static struct mid_q_entry *
+smb2_find_dequeue_mid(struct TCP_Server_Info *server, char *buf)
+{
+       return __smb2_find_mid(server, buf, true);
+}
+
 static void
 smb2_dump_detail(void *buf, struct TCP_Server_Info *server)
 {
@@ -3098,8 +3114,8 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon,
        rqst[1].rq_nvec = SMB2_IOCTL_IOV_SIZE;
 
        rc = SMB2_ioctl_init(tcon, server,
-                            &rqst[1], fid.persistent_fid,
-                            fid.volatile_fid, FSCTL_GET_REPARSE_POINT,
+                            &rqst[1], COMPOUND_FID,
+                            COMPOUND_FID, FSCTL_GET_REPARSE_POINT,
                             true /* is_fctl */, NULL, 0,
                             CIFSMaxBufSize -
                             MAX_SMB2_CREATE_RESPONSE_SIZE -
@@ -4356,7 +4372,8 @@ init_read_bvec(struct page **pages, unsigned int npages, unsigned int data_size,
 static int
 handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
                 char *buf, unsigned int buf_len, struct page **pages,
-                unsigned int npages, unsigned int page_data_size)
+                unsigned int npages, unsigned int page_data_size,
+                bool is_offloaded)
 {
        unsigned int data_offset;
        unsigned int data_len;
@@ -4378,7 +4395,8 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
 
        if (server->ops->is_session_expired &&
            server->ops->is_session_expired(buf)) {
-               cifs_reconnect(server);
+               if (!is_offloaded)
+                       cifs_reconnect(server);
                return -1;
        }
 
@@ -4402,7 +4420,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
                cifs_dbg(FYI, "%s: server returned error %d\n",
                         __func__, rdata->result);
                /* normal error on read response */
-               dequeue_mid(mid, false);
+               if (is_offloaded)
+                       mid->mid_state = MID_RESPONSE_RECEIVED;
+               else
+                       dequeue_mid(mid, false);
                return 0;
        }
 
@@ -4426,7 +4447,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
                cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n",
                         __func__, data_offset);
                rdata->result = -EIO;
-               dequeue_mid(mid, rdata->result);
+               if (is_offloaded)
+                       mid->mid_state = MID_RESPONSE_MALFORMED;
+               else
+                       dequeue_mid(mid, rdata->result);
                return 0;
        }
 
@@ -4442,21 +4466,30 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
                        cifs_dbg(FYI, "%s: data offset (%u) beyond 1st page of response\n",
                                 __func__, data_offset);
                        rdata->result = -EIO;
-                       dequeue_mid(mid, rdata->result);
+                       if (is_offloaded)
+                               mid->mid_state = MID_RESPONSE_MALFORMED;
+                       else
+                               dequeue_mid(mid, rdata->result);
                        return 0;
                }
 
                if (data_len > page_data_size - pad_len) {
                        /* data_len is corrupt -- discard frame */
                        rdata->result = -EIO;
-                       dequeue_mid(mid, rdata->result);
+                       if (is_offloaded)
+                               mid->mid_state = MID_RESPONSE_MALFORMED;
+                       else
+                               dequeue_mid(mid, rdata->result);
                        return 0;
                }
 
                rdata->result = init_read_bvec(pages, npages, page_data_size,
                                               cur_off, &bvec);
                if (rdata->result != 0) {
-                       dequeue_mid(mid, rdata->result);
+                       if (is_offloaded)
+                               mid->mid_state = MID_RESPONSE_MALFORMED;
+                       else
+                               dequeue_mid(mid, rdata->result);
                        return 0;
                }
 
@@ -4471,7 +4504,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
                /* read response payload cannot be in both buf and pages */
                WARN_ONCE(1, "buf can not contain only a part of read data");
                rdata->result = -EIO;
-               dequeue_mid(mid, rdata->result);
+               if (is_offloaded)
+                       mid->mid_state = MID_RESPONSE_MALFORMED;
+               else
+                       dequeue_mid(mid, rdata->result);
                return 0;
        }
 
@@ -4482,7 +4518,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
        if (length < 0)
                return length;
 
-       dequeue_mid(mid, false);
+       if (is_offloaded)
+               mid->mid_state = MID_RESPONSE_RECEIVED;
+       else
+               dequeue_mid(mid, false);
        return length;
 }
 
@@ -4511,15 +4550,34 @@ static void smb2_decrypt_offload(struct work_struct *work)
        }
 
        dw->server->lstrp = jiffies;
-       mid = smb2_find_mid(dw->server, dw->buf);
+       mid = smb2_find_dequeue_mid(dw->server, dw->buf);
        if (mid == NULL)
                cifs_dbg(FYI, "mid not found\n");
        else {
                mid->decrypted = true;
                rc = handle_read_data(dw->server, mid, dw->buf,
                                      dw->server->vals->read_rsp_size,
-                                     dw->ppages, dw->npages, dw->len);
-               mid->callback(mid);
+                                     dw->ppages, dw->npages, dw->len,
+                                     true);
+               if (rc >= 0) {
+#ifdef CONFIG_CIFS_STATS2
+                       mid->when_received = jiffies;
+#endif
+                       mid->callback(mid);
+               } else {
+                       spin_lock(&GlobalMid_Lock);
+                       if (dw->server->tcpStatus == CifsNeedReconnect) {
+                               mid->mid_state = MID_RETRY_NEEDED;
+                               spin_unlock(&GlobalMid_Lock);
+                               mid->callback(mid);
+                       } else {
+                               mid->mid_state = MID_REQUEST_SUBMITTED;
+                               mid->mid_flags &= ~(MID_DELETED);
+                               list_add_tail(&mid->qhead,
+                                       &dw->server->pending_mid_q);
+                               spin_unlock(&GlobalMid_Lock);
+                       }
+               }
                cifs_mid_q_entry_release(mid);
        }
 
@@ -4622,7 +4680,7 @@ non_offloaded_decrypt:
                (*mid)->decrypted = true;
                rc = handle_read_data(server, *mid, buf,
                                      server->vals->read_rsp_size,
-                                     pages, npages, len);
+                                     pages, npages, len, false);
        }
 
 free_pages:
@@ -4765,7 +4823,7 @@ smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid)
        char *buf = server->large_buf ? server->bigbuf : server->smallbuf;
 
        return handle_read_data(server, mid, buf, server->pdu_size,
-                               NULL, 0, 0);
+                               NULL, 0, 0, false);
 }
 
 static int
index 445e808..acb7270 100644 (file)
@@ -2272,17 +2272,15 @@ static struct crt_sd_ctxt *
 create_sd_buf(umode_t mode, bool set_owner, unsigned int *len)
 {
        struct crt_sd_ctxt *buf;
-       struct cifs_ace *pace;
-       unsigned int sdlen, acelen;
+       __u8 *ptr, *aclptr;
+       unsigned int acelen, acl_size, ace_count;
        unsigned int owner_offset = 0;
        unsigned int group_offset = 0;
+       struct smb3_acl acl;
 
-       *len = roundup(sizeof(struct crt_sd_ctxt) + (sizeof(struct cifs_ace) * 2), 8);
+       *len = roundup(sizeof(struct crt_sd_ctxt) + (sizeof(struct cifs_ace) * 4), 8);
 
        if (set_owner) {
-               /* offset fields are from beginning of security descriptor not of create context */
-               owner_offset = sizeof(struct smb3_acl) + (sizeof(struct cifs_ace) * 2);
-
                /* sizeof(struct owner_group_sids) is already multiple of 8 so no need to round */
                *len += sizeof(struct owner_group_sids);
        }
@@ -2291,26 +2289,22 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len)
        if (buf == NULL)
                return buf;
 
+       ptr = (__u8 *)&buf[1];
        if (set_owner) {
+               /* offset fields are from beginning of security descriptor not of create context */
+               owner_offset = ptr - (__u8 *)&buf->sd;
                buf->sd.OffsetOwner = cpu_to_le32(owner_offset);
-               group_offset = owner_offset + sizeof(struct owner_sid);
+               group_offset = owner_offset + offsetof(struct owner_group_sids, group);
                buf->sd.OffsetGroup = cpu_to_le32(group_offset);
+
+               setup_owner_group_sids(ptr);
+               ptr += sizeof(struct owner_group_sids);
        } else {
                buf->sd.OffsetOwner = 0;
                buf->sd.OffsetGroup = 0;
        }
 
-       sdlen = sizeof(struct smb3_sd) + sizeof(struct smb3_acl) +
-                2 * sizeof(struct cifs_ace);
-       if (set_owner) {
-               sdlen += sizeof(struct owner_group_sids);
-               setup_owner_group_sids(owner_offset + sizeof(struct create_context) + 8 /* name */
-                       + (char *)buf);
-       }
-
-       buf->ccontext.DataOffset = cpu_to_le16(offsetof
-                                       (struct crt_sd_ctxt, sd));
-       buf->ccontext.DataLength = cpu_to_le32(sdlen);
+       buf->ccontext.DataOffset = cpu_to_le16(offsetof(struct crt_sd_ctxt, sd));
        buf->ccontext.NameOffset = cpu_to_le16(offsetof(struct crt_sd_ctxt, Name));
        buf->ccontext.NameLength = cpu_to_le16(4);
        /* SMB2_CREATE_SD_BUFFER_TOKEN is "SecD" */
@@ -2319,6 +2313,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len)
        buf->Name[2] = 'c';
        buf->Name[3] = 'D';
        buf->sd.Revision = 1;  /* Must be one see MS-DTYP 2.4.6 */
+
        /*
         * ACL is "self relative" ie ACL is stored in contiguous block of memory
         * and "DP" ie the DACL is present
@@ -2326,28 +2321,38 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len)
        buf->sd.Control = cpu_to_le16(ACL_CONTROL_SR | ACL_CONTROL_DP);
 
        /* offset owner, group and Sbz1 and SACL are all zero */
-       buf->sd.OffsetDacl = cpu_to_le32(sizeof(struct smb3_sd));
-       buf->acl.AclRevision = ACL_REVISION; /* See 2.4.4.1 of MS-DTYP */
+       buf->sd.OffsetDacl = cpu_to_le32(ptr - (__u8 *)&buf->sd);
+       /* Ship the ACL for now. we will copy it into buf later. */
+       aclptr = ptr;
+       ptr += sizeof(struct cifs_acl);
 
        /* create one ACE to hold the mode embedded in reserved special SID */
-       pace = (struct cifs_ace *)(sizeof(struct crt_sd_ctxt) + (char *)buf);
-       acelen = setup_special_mode_ACE(pace, (__u64)mode);
+       acelen = setup_special_mode_ACE((struct cifs_ace *)ptr, (__u64)mode);
+       ptr += acelen;
+       acl_size = acelen + sizeof(struct smb3_acl);
+       ace_count = 1;
 
        if (set_owner) {
                /* we do not need to reallocate buffer to add the two more ACEs. plenty of space */
-               pace = (struct cifs_ace *)(acelen + (sizeof(struct crt_sd_ctxt) + (char *)buf));
-               acelen += setup_special_user_owner_ACE(pace);
-               /* it does not appear necessary to add an ACE for the NFS group SID */
-               buf->acl.AceCount = cpu_to_le16(3);
-       } else
-               buf->acl.AceCount = cpu_to_le16(2);
+               acelen = setup_special_user_owner_ACE((struct cifs_ace *)ptr);
+               ptr += acelen;
+               acl_size += acelen;
+               ace_count += 1;
+       }
 
        /* and one more ACE to allow access for authenticated users */
-       pace = (struct cifs_ace *)(acelen + (sizeof(struct crt_sd_ctxt) +
-               (char *)buf));
-       acelen += setup_authusers_ACE(pace);
-
-       buf->acl.AclSize = cpu_to_le16(sizeof(struct cifs_acl) + acelen);
+       acelen = setup_authusers_ACE((struct cifs_ace *)ptr);
+       ptr += acelen;
+       acl_size += acelen;
+       ace_count += 1;
+
+       acl.AclRevision = ACL_REVISION; /* See 2.4.4.1 of MS-DTYP */
+       acl.AclSize = cpu_to_le16(acl_size);
+       acl.AceCount = cpu_to_le16(ace_count);
+       memcpy(aclptr, &acl, sizeof(struct cifs_acl));
+
+       buf->ccontext.DataLength = cpu_to_le32(ptr - (__u8 *)&buf->sd);
+       *len = ptr - (__u8 *)buf;
 
        return buf;
 }
index f05f9b1..fa57b03 100644 (file)
@@ -963,8 +963,6 @@ struct crt_sd_ctxt {
        struct create_context ccontext;
        __u8    Name[8];
        struct smb3_sd sd;
-       struct smb3_acl acl;
-       /* Followed by at least 4 ACEs */
 } __packed;
 
 
index e27e255..36b2ece 100644 (file)
@@ -339,8 +339,8 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
                return -EAGAIN;
 
        if (signal_pending(current)) {
-               cifs_dbg(FYI, "signal is pending before sending any data\n");
-               return -EINTR;
+               cifs_dbg(FYI, "signal pending before send request\n");
+               return -ERESTARTSYS;
        }
 
        /* cork the socket */
index 0cd9056..c6acfc6 100644 (file)
@@ -229,7 +229,8 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
                 */
                if (ispipe) {
                        if (isspace(*pat_ptr)) {
-                               was_space = true;
+                               if (cn->used != 0)
+                                       was_space = true;
                                pat_ptr++;
                                continue;
                        } else if (was_space) {
index 96c0c86..0297ad9 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/efi.h>
 #include <linux/fs.h>
 #include <linux/ctype.h>
+#include <linux/kmemleak.h>
 #include <linux/slab.h>
 #include <linux/uuid.h>
 
@@ -103,6 +104,7 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
        var->var.VariableName[i] = '\0';
 
        inode->i_private = var;
+       kmemleak_ignore(var);
 
        err = efivar_entry_add(var, &efivarfs_list);
        if (err)
index df466ef..e265b6d 100644 (file)
@@ -182,11 +182,14 @@ static __poll_t eventfd_poll(struct file *file, poll_table *wait)
        return events;
 }
 
-static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
+void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
 {
+       lockdep_assert_held(&ctx->wqh.lock);
+
        *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
        ctx->count -= *cnt;
 }
+EXPORT_SYMBOL_GPL(eventfd_ctx_do_read);
 
 /**
  * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
index bf94294..65ecaf9 100644 (file)
@@ -2695,7 +2695,8 @@ void ext4_insert_dentry(struct inode *inode,
                        struct ext4_filename *fname);
 static inline void ext4_update_dx_flag(struct inode *inode)
 {
-       if (!ext4_has_feature_dir_index(inode->i_sb)) {
+       if (!ext4_has_feature_dir_index(inode->i_sb) &&
+           ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) {
                /* ext4_iget() should have caught this... */
                WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb));
                ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
index 6633b20..9447204 100644 (file)
@@ -2638,10 +2638,6 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
        } else if (test_opt2(sb, DAX_INODE)) {
                SEQ_OPTS_PUTS("dax=inode");
        }
-
-       if (test_opt2(sb, JOURNAL_FAST_COMMIT))
-               SEQ_OPTS_PUTS("fast_commit");
-
        ext4_show_quota_options(seq, sb);
        return 0;
 }
index d98a2e5..35a6fd1 100644 (file)
@@ -1035,6 +1035,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
        gl->gl_node.next = NULL;
        gl->gl_flags = 0;
        gl->gl_name = name;
+       lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass);
        gl->gl_lockref.count = 1;
        gl->gl_state = LM_ST_UNLOCKED;
        gl->gl_target = LM_ST_UNLOCKED;
index 6c1432d..3faa421 100644 (file)
@@ -245,7 +245,7 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
 static void gfs2_rgrp_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
                              const char *fs_id_buf)
 {
-       struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
+       struct gfs2_rgrpd *rgd = gl->gl_object;
 
        if (rgd)
                gfs2_rgrp_dump(seq, rgd, fs_id_buf);
@@ -571,7 +571,19 @@ static int freeze_go_sync(struct gfs2_glock *gl)
        int error = 0;
        struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 
-       if (gl->gl_req == LM_ST_EXCLUSIVE && !gfs2_withdrawn(sdp)) {
+       /*
+        * We need to check gl_state == LM_ST_SHARED here and not gl_req ==
+        * LM_ST_EXCLUSIVE. That's because when any node does a freeze,
+        * all the nodes should have the freeze glock in SH mode and they all
+        * call do_xmote: One for EX and the others for UN. They ALL must
+        * freeze locally, and they ALL must queue freeze work. The freeze_work
+        * calls freeze_func, which tries to reacquire the freeze glock in SH,
+        * effectively waiting for the thaw on the node who holds it in EX.
+        * Once thawed, the work func acquires the freeze glock in
+        * SH and everybody goes back to thawed.
+        */
+       if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) &&
+           !test_bit(SDF_NORECOVERY, &sdp->sd_flags)) {
                atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
                error = freeze_super(sdp->sd_vfs);
                if (error) {
@@ -770,6 +782,7 @@ const struct gfs2_glock_operations gfs2_iopen_glops = {
        .go_callback = iopen_go_callback,
        .go_demote_ok = iopen_go_demote_ok,
        .go_flags = GLOF_LRU | GLOF_NONDISK,
+       .go_subclass = 1,
 };
 
 const struct gfs2_glock_operations gfs2_flock_glops = {
index d770730..f8858d9 100644 (file)
@@ -247,6 +247,7 @@ struct gfs2_glock_operations {
                        const char *fs_id_buf);
        void (*go_callback)(struct gfs2_glock *gl, bool remote);
        void (*go_free)(struct gfs2_glock *gl);
+       const int go_subclass;
        const int go_type;
        const unsigned long go_flags;
 #define GLOF_ASPACE 1 /* address space attached */
index 077ccb1..65ae4fc 100644 (file)
@@ -150,6 +150,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
                error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
                if (unlikely(error))
                        goto fail;
+               if (blktype != GFS2_BLKST_UNLINKED)
+                       gfs2_cancel_delete_work(io_gl);
 
                if (type == DT_UNKNOWN || blktype != GFS2_BLKST_FREE) {
                        /*
@@ -180,8 +182,6 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
                error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
                if (unlikely(error))
                        goto fail;
-               if (blktype != GFS2_BLKST_UNLINKED)
-                       gfs2_cancel_delete_work(ip->i_iopen_gh.gh_gl);
                glock_set_object(ip->i_iopen_gh.gh_gl, ip);
                gfs2_glock_put(io_gl);
                io_gl = NULL;
@@ -725,13 +725,19 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        flush_delayed_work(&ip->i_gl->gl_work);
        glock_set_object(ip->i_gl, ip);
 
-       error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
+       error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
        if (error)
                goto fail_free_inode;
+       gfs2_cancel_delete_work(io_gl);
+       glock_set_object(io_gl, ip);
+
+       error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
+       if (error)
+               goto fail_gunlock2;
 
        error = gfs2_trans_begin(sdp, blocks, 0);
        if (error)
-               goto fail_free_inode;
+               goto fail_gunlock2;
 
        if (blocks > 1) {
                ip->i_eattr = ip->i_no_addr + 1;
@@ -740,18 +746,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        init_dinode(dip, ip, symname);
        gfs2_trans_end(sdp);
 
-       error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
-       if (error)
-               goto fail_free_inode;
-
        BUG_ON(test_and_set_bit(GLF_INODE_CREATING, &io_gl->gl_flags));
 
        error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
        if (error)
                goto fail_gunlock2;
 
-       gfs2_cancel_delete_work(ip->i_iopen_gh.gh_gl);
-       glock_set_object(ip->i_iopen_gh.gh_gl, ip);
        gfs2_set_iop(inode);
        insert_inode_hash(inode);
 
@@ -803,6 +803,7 @@ fail_gunlock3:
        gfs2_glock_dq_uninit(&ip->i_iopen_gh);
 fail_gunlock2:
        clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
+       glock_clear_object(io_gl, ip);
        gfs2_glock_put(io_gl);
 fail_free_inode:
        if (ip->i_gl) {
@@ -2116,6 +2117,25 @@ loff_t gfs2_seek_hole(struct file *file, loff_t offset)
        return vfs_setpos(file, ret, inode->i_sb->s_maxbytes);
 }
 
+static int gfs2_update_time(struct inode *inode, struct timespec64 *time,
+                           int flags)
+{
+       struct gfs2_inode *ip = GFS2_I(inode);
+       struct gfs2_glock *gl = ip->i_gl;
+       struct gfs2_holder *gh;
+       int error;
+
+       gh = gfs2_glock_is_locked_by_me(gl);
+       if (gh && !gfs2_glock_is_held_excl(gl)) {
+               gfs2_glock_dq(gh);
+               gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, gh);
+               error = gfs2_glock_nq(gh);
+               if (error)
+                       return error;
+       }
+       return generic_update_time(inode, time, flags);
+}
+
 const struct inode_operations gfs2_file_iops = {
        .permission = gfs2_permission,
        .setattr = gfs2_setattr,
@@ -2124,6 +2144,7 @@ const struct inode_operations gfs2_file_iops = {
        .fiemap = gfs2_fiemap,
        .get_acl = gfs2_get_acl,
        .set_acl = gfs2_set_acl,
+       .update_time = gfs2_update_time,
 };
 
 const struct inode_operations gfs2_dir_iops = {
@@ -2143,6 +2164,7 @@ const struct inode_operations gfs2_dir_iops = {
        .fiemap = gfs2_fiemap,
        .get_acl = gfs2_get_acl,
        .set_acl = gfs2_set_acl,
+       .update_time = gfs2_update_time,
        .atomic_open = gfs2_atomic_open,
 };
 
index f7addc6..5e8eef9 100644 (file)
@@ -985,6 +985,10 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
        if (error < 0)
                return error;
 
+       if (RB_EMPTY_ROOT(&sdp->sd_rindex_tree)) {
+               fs_err(sdp, "no resource groups found in the file system.\n");
+               return -ENOENT;
+       }
        set_rgrp_preferences(sdp);
 
        sdp->sd_rindex_uptodate = 1;
index 4ead291..86dac2b 100644 (file)
@@ -205,6 +205,7 @@ struct fixed_file_ref_node {
        struct list_head                file_list;
        struct fixed_file_data          *file_data;
        struct llist_node               llist;
+       bool                            done;
 };
 
 struct fixed_file_data {
@@ -478,6 +479,7 @@ struct io_sr_msg {
 struct io_open {
        struct file                     *file;
        int                             dfd;
+       bool                            ignore_nonblock;
        struct filename                 *filename;
        struct open_how                 how;
        unsigned long                   nofile;
@@ -1282,7 +1284,7 @@ static bool io_identity_cow(struct io_kiocb *req)
         */
        io_init_identity(id);
        if (creds)
-               req->work.identity->creds = creds;
+               id->creds = creds;
 
        /* add one for this request */
        refcount_inc(&id->count);
@@ -1311,22 +1313,6 @@ static bool io_grab_identity(struct io_kiocb *req)
                        return false;
                req->work.flags |= IO_WQ_WORK_FSIZE;
        }
-
-       if (!(req->work.flags & IO_WQ_WORK_FILES) &&
-           (def->work_flags & IO_WQ_WORK_FILES) &&
-           !(req->flags & REQ_F_NO_FILE_TABLE)) {
-               if (id->files != current->files ||
-                   id->nsproxy != current->nsproxy)
-                       return false;
-               atomic_inc(&id->files->count);
-               get_nsproxy(id->nsproxy);
-               req->flags |= REQ_F_INFLIGHT;
-
-               spin_lock_irq(&ctx->inflight_lock);
-               list_add(&req->inflight_entry, &ctx->inflight_list);
-               spin_unlock_irq(&ctx->inflight_lock);
-               req->work.flags |= IO_WQ_WORK_FILES;
-       }
 #ifdef CONFIG_BLK_CGROUP
        if (!(req->work.flags & IO_WQ_WORK_BLKCG) &&
            (def->work_flags & IO_WQ_WORK_BLKCG)) {
@@ -1368,6 +1354,21 @@ static bool io_grab_identity(struct io_kiocb *req)
                }
                spin_unlock(&current->fs->lock);
        }
+       if (!(req->work.flags & IO_WQ_WORK_FILES) &&
+           (def->work_flags & IO_WQ_WORK_FILES) &&
+           !(req->flags & REQ_F_NO_FILE_TABLE)) {
+               if (id->files != current->files ||
+                   id->nsproxy != current->nsproxy)
+                       return false;
+               atomic_inc(&id->files->count);
+               get_nsproxy(id->nsproxy);
+               req->flags |= REQ_F_INFLIGHT;
+
+               spin_lock_irq(&ctx->inflight_lock);
+               list_add(&req->inflight_entry, &ctx->inflight_list);
+               spin_unlock_irq(&ctx->inflight_lock);
+               req->work.flags |= IO_WQ_WORK_FILES;
+       }
 
        return true;
 }
@@ -2577,7 +2578,6 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
        }
 end_req:
        req_set_fail_links(req);
-       io_req_complete(req, ret);
        return false;
 }
 #endif
@@ -3192,7 +3192,7 @@ static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
        rw->free_iovec = iovec;
        rw->bytes_done = 0;
        /* can only be fixed buffers, no need to do anything */
-       if (iter->type == ITER_BVEC)
+       if (iov_iter_is_bvec(iter))
                return;
        if (!iovec) {
                unsigned iov_off = 0;
@@ -3795,6 +3795,7 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
                return ret;
        }
        req->open.nofile = rlimit(RLIMIT_NOFILE);
+       req->open.ignore_nonblock = false;
        req->flags |= REQ_F_NEED_CLEANUP;
        return 0;
 }
@@ -3838,7 +3839,7 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock)
        struct file *file;
        int ret;
 
-       if (force_nonblock)
+       if (force_nonblock && !req->open.ignore_nonblock)
                return -EAGAIN;
 
        ret = build_open_flags(&req->open.how, &op);
@@ -3853,6 +3854,21 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock)
        if (IS_ERR(file)) {
                put_unused_fd(ret);
                ret = PTR_ERR(file);
+               /*
+                * A work-around to ensure that /proc/self works that way
+                * that it should - if we get -EOPNOTSUPP back, then assume
+                * that proc_self_get_link() failed us because we're in async
+                * context. We should be safe to retry this from the task
+                * itself with force_nonblock == false set, as it should not
+                * block on lookup. Would be nice to know this upfront and
+                * avoid the async dance, but doesn't seem feasible.
+                */
+               if (ret == -EOPNOTSUPP && io_wq_current_is_worker()) {
+                       req->open.ignore_nonblock = true;
+                       refcount_inc(&req->refs);
+                       io_req_task_queue(req);
+                       return 0;
+               }
        } else {
                fsnotify_open(file);
                fd_install(ret, file);
@@ -4483,7 +4499,8 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
                        return -EFAULT;
                if (clen < 0)
                        return -EINVAL;
-               sr->len = iomsg->iov[0].iov_len;
+               sr->len = clen;
+               iomsg->iov[0].iov_len = clen;
                iomsg->iov = NULL;
        } else {
                ret = __import_iovec(READ, (struct iovec __user *)uiov, len,
@@ -6957,9 +6974,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
                return -ENXIO;
 
        spin_lock(&data->lock);
-       if (!list_empty(&data->ref_list))
-               ref_node = list_first_entry(&data->ref_list,
-                               struct fixed_file_ref_node, node);
+       ref_node = data->node;
        spin_unlock(&data->lock);
        if (ref_node)
                percpu_ref_kill(&ref_node->refs);
@@ -7308,10 +7323,6 @@ static void __io_file_put_work(struct fixed_file_ref_node *ref_node)
                kfree(pfile);
        }
 
-       spin_lock(&file_data->lock);
-       list_del(&ref_node->node);
-       spin_unlock(&file_data->lock);
-
        percpu_ref_exit(&ref_node->refs);
        kfree(ref_node);
        percpu_ref_put(&file_data->refs);
@@ -7338,17 +7349,32 @@ static void io_file_put_work(struct work_struct *work)
 static void io_file_data_ref_zero(struct percpu_ref *ref)
 {
        struct fixed_file_ref_node *ref_node;
+       struct fixed_file_data *data;
        struct io_ring_ctx *ctx;
-       bool first_add;
+       bool first_add = false;
        int delay = HZ;
 
        ref_node = container_of(ref, struct fixed_file_ref_node, refs);
-       ctx = ref_node->file_data->ctx;
+       data = ref_node->file_data;
+       ctx = data->ctx;
+
+       spin_lock(&data->lock);
+       ref_node->done = true;
 
-       if (percpu_ref_is_dying(&ctx->file_data->refs))
+       while (!list_empty(&data->ref_list)) {
+               ref_node = list_first_entry(&data->ref_list,
+                                       struct fixed_file_ref_node, node);
+               /* recycle ref nodes in order */
+               if (!ref_node->done)
+                       break;
+               list_del(&ref_node->node);
+               first_add |= llist_add(&ref_node->llist, &ctx->file_put_llist);
+       }
+       spin_unlock(&data->lock);
+
+       if (percpu_ref_is_dying(&data->refs))
                delay = 0;
 
-       first_add = llist_add(&ref_node->llist, &ctx->file_put_llist);
        if (!delay)
                mod_delayed_work(system_wq, &ctx->file_put_work, 0);
        else if (first_add)
@@ -7372,6 +7398,7 @@ static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
        INIT_LIST_HEAD(&ref_node->node);
        INIT_LIST_HEAD(&ref_node->file_list);
        ref_node->file_data = ctx->file_data;
+       ref_node->done = false;
        return ref_node;
 }
 
@@ -7467,7 +7494,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 
        file_data->node = ref_node;
        spin_lock(&file_data->lock);
-       list_add(&ref_node->node, &file_data->ref_list);
+       list_add_tail(&ref_node->node, &file_data->ref_list);
        spin_unlock(&file_data->lock);
        percpu_ref_get(&file_data->refs);
        return ret;
@@ -7626,7 +7653,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
        if (needs_switch) {
                percpu_ref_kill(&data->node->refs);
                spin_lock(&data->lock);
-               list_add(&ref_node->node, &data->ref_list);
+               list_add_tail(&ref_node->node, &data->ref_list);
                data->node = ref_node;
                spin_unlock(&data->lock);
                percpu_ref_get(&ctx->file_data->refs);
@@ -9156,6 +9183,7 @@ static int io_uring_get_fd(struct io_ring_ctx *ctx)
 {
        struct file *file;
        int ret;
+       int fd;
 
 #if defined(CONFIG_UNIX)
        ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP,
@@ -9167,12 +9195,12 @@ static int io_uring_get_fd(struct io_ring_ctx *ctx)
        ret = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
        if (ret < 0)
                goto err;
+       fd = ret;
 
        file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
                                        O_RDWR | O_CLOEXEC);
        if (IS_ERR(file)) {
-err_fd:
-               put_unused_fd(ret);
+               put_unused_fd(fd);
                ret = PTR_ERR(file);
                goto err;
        }
@@ -9180,12 +9208,14 @@ err_fd:
 #if defined(CONFIG_UNIX)
        ctx->ring_sock->file = file;
 #endif
-       if (unlikely(io_uring_add_task_file(ctx, file))) {
-               file = ERR_PTR(-ENOMEM);
-               goto err_fd;
+       ret = io_uring_add_task_file(ctx, file);
+       if (ret) {
+               fput(file);
+               put_unused_fd(fd);
+               goto err;
        }
-       fd_install(ret, file);
-       return ret;
+       fd_install(fd, file);
+       return fd;
 err:
 #if defined(CONFIG_UNIX)
        sock_release(ctx->ring_sock);
@@ -9225,14 +9255,16 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
                 * to a power-of-two, if it isn't already. We do NOT impose
                 * any cq vs sq ring sizing.
                 */
-               p->cq_entries = roundup_pow_of_two(p->cq_entries);
-               if (p->cq_entries < p->sq_entries)
+               if (!p->cq_entries)
                        return -EINVAL;
                if (p->cq_entries > IORING_MAX_CQ_ENTRIES) {
                        if (!(p->flags & IORING_SETUP_CLAMP))
                                return -EINVAL;
                        p->cq_entries = IORING_MAX_CQ_ENTRIES;
                }
+               p->cq_entries = roundup_pow_of_two(p->cq_entries);
+               if (p->cq_entries < p->sq_entries)
+                       return -EINVAL;
        } else {
                p->cq_entries = 2 * p->sq_entries;
        }
index 0c3d5e3..188f79d 100644 (file)
@@ -566,12 +566,14 @@ static int __jbd2_journal_force_commit(journal_t *journal)
 }
 
 /**
- * Force and wait upon a commit if the calling process is not within
- * transaction.  This is used for forcing out undo-protected data which contains
- * bitmaps, when the fs is running out of space.
+ * jbd2_journal_force_commit_nested - Force and wait upon a commit if the
+ * calling process is not within transaction.
  *
  * @journal: journal to force
  * Returns true if progress was made.
+ *
+ * This is used for forcing out undo-protected data which contains
+ * bitmaps, when the fs is running out of space.
  */
 int jbd2_journal_force_commit_nested(journal_t *journal)
 {
@@ -582,7 +584,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
 }
 
 /**
- * int journal_force_commit() - force any uncommitted transactions
+ * jbd2_journal_force_commit() - force any uncommitted transactions
  * @journal: journal to force
  *
  * Caller want unconditional commit. We can only force the running transaction
@@ -1881,7 +1883,7 @@ static int load_superblock(journal_t *journal)
 
 
 /**
- * int jbd2_journal_load() - Read journal from disk.
+ * jbd2_journal_load() - Read journal from disk.
  * @journal: Journal to act on.
  *
  * Given a journal_t structure which tells us which disk blocks contain
@@ -1951,7 +1953,7 @@ recovery_error:
 }
 
 /**
- * void jbd2_journal_destroy() - Release a journal_t structure.
+ * jbd2_journal_destroy() - Release a journal_t structure.
  * @journal: Journal to act on.
  *
  * Release a journal_t structure once it is no longer in use by the
@@ -2028,7 +2030,7 @@ int jbd2_journal_destroy(journal_t *journal)
 
 
 /**
- *int jbd2_journal_check_used_features() - Check if features specified are used.
+ * jbd2_journal_check_used_features() - Check if features specified are used.
  * @journal: Journal to check.
  * @compat: bitmask of compatible features
  * @ro: bitmask of features that force read-only mount
@@ -2063,7 +2065,7 @@ int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat,
 }
 
 /**
- * int jbd2_journal_check_available_features() - Check feature set in journalling layer
+ * jbd2_journal_check_available_features() - Check feature set in journalling layer
  * @journal: Journal to check.
  * @compat: bitmask of compatible features
  * @ro: bitmask of features that force read-only mount
@@ -2126,7 +2128,7 @@ jbd2_journal_initialize_fast_commit(journal_t *journal)
 }
 
 /**
- * int jbd2_journal_set_features() - Mark a given journal feature in the superblock
+ * jbd2_journal_set_features() - Mark a given journal feature in the superblock
  * @journal: Journal to act on.
  * @compat: bitmask of compatible features
  * @ro: bitmask of features that force read-only mount
@@ -2217,7 +2219,7 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
 }
 
 /*
- * jbd2_journal_clear_features () - Clear a given journal feature in the
+ * jbd2_journal_clear_features() - Clear a given journal feature in the
  *                                 superblock
  * @journal: Journal to act on.
  * @compat: bitmask of compatible features
@@ -2246,7 +2248,7 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
 EXPORT_SYMBOL(jbd2_journal_clear_features);
 
 /**
- * int jbd2_journal_flush () - Flush journal
+ * jbd2_journal_flush() - Flush journal
  * @journal: Journal to act on.
  *
  * Flush all data for a given journal to disk and empty the journal.
@@ -2321,7 +2323,7 @@ out:
 }
 
 /**
- * int jbd2_journal_wipe() - Wipe journal contents
+ * jbd2_journal_wipe() - Wipe journal contents
  * @journal: Journal to act on.
  * @write: flag (see below)
  *
@@ -2362,7 +2364,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
 }
 
 /**
- * void jbd2_journal_abort () - Shutdown the journal immediately.
+ * jbd2_journal_abort () - Shutdown the journal immediately.
  * @journal: the journal to shutdown.
  * @errno:   an error number to record in the journal indicating
  *           the reason for the shutdown.
@@ -2453,7 +2455,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
 }
 
 /**
- * int jbd2_journal_errno () - returns the journal's error state.
+ * jbd2_journal_errno() - returns the journal's error state.
  * @journal: journal to examine.
  *
  * This is the errno number set with jbd2_journal_abort(), the last
@@ -2477,7 +2479,7 @@ int jbd2_journal_errno(journal_t *journal)
 }
 
 /**
- * int jbd2_journal_clear_err () - clears the journal's error state
+ * jbd2_journal_clear_err() - clears the journal's error state
  * @journal: journal to act on.
  *
  * An error must be cleared or acked to take a FS out of readonly
@@ -2497,7 +2499,7 @@ int jbd2_journal_clear_err(journal_t *journal)
 }
 
 /**
- * void jbd2_journal_ack_err() - Ack journal err.
+ * jbd2_journal_ack_err() - Ack journal err.
  * @journal: journal to act on.
  *
  * An error must be cleared or acked to take a FS out of readonly
index d54f046..9396666 100644 (file)
@@ -519,7 +519,7 @@ EXPORT_SYMBOL(jbd2__journal_start);
 
 
 /**
- * handle_t *jbd2_journal_start() - Obtain a new handle.
+ * jbd2_journal_start() - Obtain a new handle.
  * @journal: Journal to start transaction on.
  * @nblocks: number of block buffer we might modify
  *
@@ -566,7 +566,7 @@ void jbd2_journal_free_reserved(handle_t *handle)
 EXPORT_SYMBOL(jbd2_journal_free_reserved);
 
 /**
- * int jbd2_journal_start_reserved() - start reserved handle
+ * jbd2_journal_start_reserved() - start reserved handle
  * @handle: handle to start
  * @type: for handle statistics
  * @line_no: for handle statistics
@@ -620,7 +620,7 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type,
 EXPORT_SYMBOL(jbd2_journal_start_reserved);
 
 /**
- * int jbd2_journal_extend() - extend buffer credits.
+ * jbd2_journal_extend() - extend buffer credits.
  * @handle:  handle to 'extend'
  * @nblocks: nr blocks to try to extend by.
  * @revoke_records: number of revoke records to try to extend by.
@@ -745,7 +745,7 @@ static void stop_this_handle(handle_t *handle)
 }
 
 /**
- * int jbd2_journal_restart() - restart a handle .
+ * jbd2__journal_restart() - restart a handle .
  * @handle:  handle to restart
  * @nblocks: nr credits requested
  * @revoke_records: number of revoke record credits requested
@@ -815,7 +815,7 @@ int jbd2_journal_restart(handle_t *handle, int nblocks)
 EXPORT_SYMBOL(jbd2_journal_restart);
 
 /**
- * void jbd2_journal_lock_updates () - establish a transaction barrier.
+ * jbd2_journal_lock_updates () - establish a transaction barrier.
  * @journal:  Journal to establish a barrier on.
  *
  * This locks out any further updates from being started, and blocks
@@ -874,7 +874,7 @@ void jbd2_journal_lock_updates(journal_t *journal)
 }
 
 /**
- * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier
+ * jbd2_journal_unlock_updates () - release barrier
  * @journal:  Journal to release the barrier on.
  *
  * Release a transaction barrier obtained with jbd2_journal_lock_updates().
@@ -1182,7 +1182,8 @@ out:
 }
 
 /**
- * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
+ * jbd2_journal_get_write_access() - notify intent to modify a buffer
+ *                                  for metadata (not data) update.
  * @handle: transaction to add buffer modifications to
  * @bh:     bh to be used for metadata writes
  *
@@ -1226,7 +1227,7 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
  * unlocked buffer beforehand. */
 
 /**
- * int jbd2_journal_get_create_access () - notify intent to use newly created bh
+ * jbd2_journal_get_create_access () - notify intent to use newly created bh
  * @handle: transaction to new buffer to
  * @bh: new buffer.
  *
@@ -1306,7 +1307,7 @@ out:
 }
 
 /**
- * int jbd2_journal_get_undo_access() -  Notify intent to modify metadata with
+ * jbd2_journal_get_undo_access() -  Notify intent to modify metadata with
  *     non-rewindable consequences
  * @handle: transaction
  * @bh: buffer to undo
@@ -1383,7 +1384,7 @@ out:
 }
 
 /**
- * void jbd2_journal_set_triggers() - Add triggers for commit writeout
+ * jbd2_journal_set_triggers() - Add triggers for commit writeout
  * @bh: buffer to trigger on
  * @type: struct jbd2_buffer_trigger_type containing the trigger(s).
  *
@@ -1425,7 +1426,7 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
 }
 
 /**
- * int jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
+ * jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
  * @handle: transaction to add buffer to.
  * @bh: buffer to mark
  *
@@ -1593,7 +1594,7 @@ out:
 }
 
 /**
- * void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
+ * jbd2_journal_forget() - bforget() for potentially-journaled buffers.
  * @handle: transaction handle
  * @bh:     bh to 'forget'
  *
@@ -1762,7 +1763,7 @@ drop:
 }
 
 /**
- * int jbd2_journal_stop() - complete a transaction
+ * jbd2_journal_stop() - complete a transaction
  * @handle: transaction to complete.
  *
  * All done for a particular handle.
@@ -2080,7 +2081,7 @@ out:
 }
 
 /**
- * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
+ * jbd2_journal_try_to_free_buffers() - try to free page buffers.
  * @journal: journal for operation
  * @page: to try and free
  *
@@ -2411,7 +2412,7 @@ zap_buffer_unlocked:
 }
 
 /**
- * void jbd2_journal_invalidatepage()
+ * jbd2_journal_invalidatepage()
  * @journal: journal to use for flush...
  * @page:    page to flush
  * @offset:  start of the range to invalidate
index fc34361..7124c2e 100644 (file)
@@ -959,7 +959,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
                          size_t len, loff_t *ppos)
 {
        struct simple_attr *attr;
-       u64 val;
+       unsigned long long val;
        size_t size;
        ssize_t ret;
 
@@ -977,7 +977,9 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
                goto out;
 
        attr->set_buf[size] = '\0';
-       val = simple_strtoll(attr->set_buf, NULL, 0);
+       ret = kstrtoull(attr->set_buf, 0, &val);
+       if (ret)
+               goto out;
        ret = attr->set(attr->data, val);
        if (ret == 0)
                ret = len; /* on success, claim we got the whole input */
index 88e1763..e2a488d 100644 (file)
@@ -205,3 +205,12 @@ config NFS_DISABLE_UDP_SUPPORT
         Choose Y here to disable the use of NFS over UDP. NFS over UDP
         on modern networks (1Gb+) can lead to data corruption caused by
         fragmentation during high loads.
+
+config NFS_V4_2_READ_PLUS
+       bool "NFS: Enable support for the NFSv4.2 READ_PLUS operation"
+       depends on NFS_V4_2
+       default n
+       help
+        This is intended for developers only. The READ_PLUS operation has
+        been shown to have issues under specific conditions and should not
+        be used in production.
index a163533..24bf579 100644 (file)
@@ -838,7 +838,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
        struct nfs_pgio_mirror *pgm;
        struct nfs4_ff_layout_mirror *mirror;
        struct nfs4_pnfs_ds *ds;
-       u32 ds_idx, i;
+       u32 ds_idx;
 
 retry:
        ff_layout_pg_check_layout(pgio, req);
@@ -864,11 +864,9 @@ retry:
                goto retry;
        }
 
-       for (i = 0; i < pgio->pg_mirror_count; i++) {
-               mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
-               pgm = &pgio->pg_mirrors[i];
-               pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
-       }
+       mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
+       pgm = &pgio->pg_mirrors[0];
+       pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
 
        pgio->pg_mirror_idx = ds_idx;
 
@@ -985,6 +983,21 @@ out:
        return 1;
 }
 
+static u32
+ff_layout_pg_set_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx)
+{
+       u32 old = desc->pg_mirror_idx;
+
+       desc->pg_mirror_idx = idx;
+       return old;
+}
+
+static struct nfs_pgio_mirror *
+ff_layout_pg_get_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx)
+{
+       return &desc->pg_mirrors[idx];
+}
+
 static const struct nfs_pageio_ops ff_layout_pg_read_ops = {
        .pg_init = ff_layout_pg_init_read,
        .pg_test = pnfs_generic_pg_test,
@@ -998,6 +1011,8 @@ static const struct nfs_pageio_ops ff_layout_pg_write_ops = {
        .pg_doio = pnfs_generic_pg_writepages,
        .pg_get_mirror_count = ff_layout_pg_get_mirror_count_write,
        .pg_cleanup = pnfs_generic_pg_cleanup,
+       .pg_get_mirror = ff_layout_pg_get_mirror_write,
+       .pg_set_mirror = ff_layout_pg_set_mirror_write,
 };
 
 static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
index 2b2211d..4fc61e3 100644 (file)
@@ -1241,12 +1241,13 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
                .rpc_resp       = &res,
        };
        u32 xdrlen;
-       int ret, np;
+       int ret, np, i;
 
 
+       ret = -ENOMEM;
        res.scratch = alloc_page(GFP_KERNEL);
        if (!res.scratch)
-               return -ENOMEM;
+               goto out;
 
        xdrlen = nfs42_listxattr_xdrsize(buflen);
        if (xdrlen > server->lxasize)
@@ -1254,9 +1255,12 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
        np = xdrlen / PAGE_SIZE + 1;
 
        pages = kcalloc(np, sizeof(struct page *), GFP_KERNEL);
-       if (pages == NULL) {
-               __free_page(res.scratch);
-               return -ENOMEM;
+       if (!pages)
+               goto out_free_scratch;
+       for (i = 0; i < np; i++) {
+               pages[i] = alloc_page(GFP_KERNEL);
+               if (!pages[i])
+                       goto out_free_pages;
        }
 
        arg.xattr_pages = pages;
@@ -1271,14 +1275,15 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
                *eofp = res.eof;
        }
 
+out_free_pages:
        while (--np >= 0) {
                if (pages[np])
                        __free_page(pages[np]);
        }
-
-       __free_page(res.scratch);
        kfree(pages);
-
+out_free_scratch:
+       __free_page(res.scratch);
+out:
        return ret;
 
 }
index 6e060a8..8432bd6 100644 (file)
@@ -1528,7 +1528,6 @@ static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req,
 
        rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count,
            hdr.replen);
-       req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
 
        encode_nops(&hdr);
 }
index 9d354de..57b3821 100644 (file)
@@ -377,10 +377,10 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
                goto out_stateowner;
 
        set_bit(NFS_SRV_SSC_COPY_STATE, &ctx->state->flags);
-       set_bit(NFS_OPEN_STATE, &ctx->state->flags);
        memcpy(&ctx->state->open_stateid.other, &stateid->other,
               NFS4_STATEID_OTHER_SIZE);
        update_open_stateid(ctx->state, stateid, NULL, filep->f_mode);
+       set_bit(NFS_OPEN_STATE, &ctx->state->flags);
 
        nfs_file_set_open_context(filep, ctx);
        put_nfs_open_context(ctx);
index 9e0ca9b..e894686 100644 (file)
@@ -5309,7 +5309,7 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
                                    nfs4_read_done_cb(task, hdr);
 }
 
-#ifdef CONFIG_NFS_V4_2
+#if defined CONFIG_NFS_V4_2 && defined CONFIG_NFS_V4_2_READ_PLUS
 static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
 {
        if (server->caps & NFS_CAP_READ_PLUS)
index 6985cac..78c9c4b 100644 (file)
 static struct kmem_cache *nfs_page_cachep;
 static const struct rpc_call_ops nfs_pgio_common_ops;
 
+static struct nfs_pgio_mirror *
+nfs_pgio_get_mirror(struct nfs_pageio_descriptor *desc, u32 idx)
+{
+       if (desc->pg_ops->pg_get_mirror)
+               return desc->pg_ops->pg_get_mirror(desc, idx);
+       return &desc->pg_mirrors[0];
+}
+
 struct nfs_pgio_mirror *
 nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc)
 {
-       return &desc->pg_mirrors[desc->pg_mirror_idx];
+       return nfs_pgio_get_mirror(desc, desc->pg_mirror_idx);
 }
 EXPORT_SYMBOL_GPL(nfs_pgio_current_mirror);
 
+static u32
+nfs_pgio_set_current_mirror(struct nfs_pageio_descriptor *desc, u32 idx)
+{
+       if (desc->pg_ops->pg_set_mirror)
+               return desc->pg_ops->pg_set_mirror(desc, idx);
+       return desc->pg_mirror_idx;
+}
+
 void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
                       struct nfs_pgio_header *hdr,
                       void (*release)(struct nfs_pgio_header *hdr))
@@ -1259,7 +1275,7 @@ static void nfs_pageio_error_cleanup(struct nfs_pageio_descriptor *desc)
                return;
 
        for (midx = 0; midx < desc->pg_mirror_count; midx++) {
-               mirror = &desc->pg_mirrors[midx];
+               mirror = nfs_pgio_get_mirror(desc, midx);
                desc->pg_completion_ops->error_cleanup(&mirror->pg_list,
                                desc->pg_error);
        }
@@ -1293,12 +1309,12 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
                        goto out_failed;
                }
 
-               desc->pg_mirror_idx = midx;
+               nfs_pgio_set_current_mirror(desc, midx);
                if (!nfs_pageio_add_request_mirror(desc, dupreq))
                        goto out_cleanup_subreq;
        }
 
-       desc->pg_mirror_idx = 0;
+       nfs_pgio_set_current_mirror(desc, 0);
        if (!nfs_pageio_add_request_mirror(desc, req))
                goto out_failed;
 
@@ -1320,10 +1336,12 @@ out_failed:
 static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc,
                                       u32 mirror_idx)
 {
-       struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx];
-       u32 restore_idx = desc->pg_mirror_idx;
+       struct nfs_pgio_mirror *mirror;
+       u32 restore_idx;
+
+       restore_idx = nfs_pgio_set_current_mirror(desc, mirror_idx);
+       mirror = nfs_pgio_current_mirror(desc);
 
-       desc->pg_mirror_idx = mirror_idx;
        for (;;) {
                nfs_pageio_doio(desc);
                if (desc->pg_error < 0 || !mirror->pg_recoalesce)
@@ -1331,7 +1349,7 @@ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc,
                if (!nfs_do_recoalesce(desc))
                        break;
        }
-       desc->pg_mirror_idx = restore_idx;
+       nfs_pgio_set_current_mirror(desc, restore_idx);
 }
 
 /*
@@ -1405,7 +1423,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
        u32 midx;
 
        for (midx = 0; midx < desc->pg_mirror_count; midx++) {
-               mirror = &desc->pg_mirrors[midx];
+               mirror = nfs_pgio_get_mirror(desc, midx);
                if (!list_empty(&mirror->pg_list)) {
                        prev = nfs_list_entry(mirror->pg_list.prev);
                        if (index != prev->wb_index + 1) {
index a960ec3..8d3ad5e 100644 (file)
@@ -178,6 +178,7 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
        struct inode *inode = d_inode(dentry);
        struct dentry *parent;
        bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED;
+       bool parent_needed, parent_interested;
        __u32 p_mask;
        struct inode *p_inode = NULL;
        struct name_snapshot name;
@@ -193,7 +194,8 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
                return 0;
 
        parent = NULL;
-       if (!parent_watched && !fsnotify_event_needs_parent(inode, mnt, mask))
+       parent_needed = fsnotify_event_needs_parent(inode, mnt, mask);
+       if (!parent_watched && !parent_needed)
                goto notify;
 
        /* Does parent inode care about events on children? */
@@ -205,17 +207,17 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
 
        /*
         * Include parent/name in notification either if some notification
-        * groups require parent info (!parent_watched case) or the parent is
-        * interested in this event.
+        * groups require parent info or the parent is interested in this event.
         */
-       if (!parent_watched || (mask & p_mask & ALL_FSNOTIFY_EVENTS)) {
+       parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS;
+       if (parent_needed || parent_interested) {
                /* When notifying parent, child should be passed as data */
                WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type));
 
                /* Notify both parent and child with child name info */
                take_dentry_name_snapshot(&name, dentry);
                file_name = &name.name;
-               if (parent_watched)
+               if (parent_interested)
                        mask |= FS_EVENT_ON_CHILD;
        }
 
index 72cd69b..cc71ce3 100644 (file)
@@ -16,6 +16,13 @@ static const char *proc_self_get_link(struct dentry *dentry,
        pid_t tgid = task_tgid_nr_ns(current, ns);
        char *name;
 
+       /*
+        * Not currently supported. Once we can inherit all of struct pid,
+        * we can allow this.
+        */
+       if (current->flags & PF_KTHREAD)
+               return ERR_PTR(-EOPNOTSUPP);
+
        if (!tgid)
                return ERR_PTR(-ENOENT);
        /* max length of unsigned int in decimal + NULL term */
index 217aa27..ee5a235 100644 (file)
@@ -1599,11 +1599,15 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 
        src = *ppos;
        svpfn = src / PM_ENTRY_BYTES;
-       start_vaddr = svpfn << PAGE_SHIFT;
        end_vaddr = mm->task_size;
 
        /* watch out for wraparound */
-       if (svpfn > mm->task_size >> PAGE_SHIFT)
+       start_vaddr = end_vaddr;
+       if (svpfn <= (ULONG_MAX >> PAGE_SHIFT))
+               start_vaddr = untagged_addr(svpfn << PAGE_SHIFT);
+
+       /* Ensure the address is inside the task */
+       if (start_vaddr > mm->task_size)
                start_vaddr = end_vaddr;
 
        /*
index 3b20e21..03a369c 100644 (file)
@@ -168,12 +168,14 @@ EXPORT_SYMBOL(seq_read);
 ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
        struct seq_file *m = iocb->ki_filp->private_data;
-       size_t size = iov_iter_count(iter);
        size_t copied = 0;
        size_t n;
        void *p;
        int err = 0;
 
+       if (!iov_iter_count(iter))
+               return 0;
+
        mutex_lock(&m->lock);
 
        /*
@@ -206,36 +208,34 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
                if (!m->buf)
                        goto Enomem;
        }
-       /* if not empty - flush it first */
+       // something left in the buffer - copy it out first
        if (m->count) {
-               n = min(m->count, size);
-               if (copy_to_iter(m->buf + m->from, n, iter) != n)
-                       goto Efault;
+               n = copy_to_iter(m->buf + m->from, m->count, iter);
                m->count -= n;
                m->from += n;
-               size -= n;
                copied += n;
-               if (!size)
+               if (m->count)   // hadn't managed to copy everything
                        goto Done;
        }
-       /* we need at least one record in buffer */
+       // get a non-empty record in the buffer
        m->from = 0;
        p = m->op->start(m, &m->index);
        while (1) {
                err = PTR_ERR(p);
-               if (!p || IS_ERR(p))
+               if (!p || IS_ERR(p))    // EOF or an error
                        break;
                err = m->op->show(m, p);
-               if (err < 0)
+               if (err < 0)            // hard error
                        break;
-               if (unlikely(err))
+               if (unlikely(err))      // ->show() says "skip it"
                        m->count = 0;
-               if (unlikely(!m->count)) {
+               if (unlikely(!m->count)) { // empty record
                        p = m->op->next(m, p, &m->index);
                        continue;
                }
-               if (m->count < m->size)
+               if (!seq_has_overflowed(m)) // got it
                        goto Fill;
+               // need a bigger buffer
                m->op->stop(m, p);
                kvfree(m->buf);
                m->count = 0;
@@ -244,11 +244,14 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
                        goto Enomem;
                p = m->op->start(m, &m->index);
        }
+       // EOF or an error
        m->op->stop(m, p);
        m->count = 0;
        goto Done;
 Fill:
-       /* they want more? let's try to get some more */
+       // one non-empty record is in the buffer; if they want more,
+       // try to fit more in, but in any case we need to advance
+       // the iterator once for every record shown.
        while (1) {
                size_t offs = m->count;
                loff_t pos = m->index;
@@ -259,30 +262,27 @@ Fill:
                                            m->op->next);
                        m->index++;
                }
-               if (!p || IS_ERR(p)) {
-                       err = PTR_ERR(p);
+               if (!p || IS_ERR(p))    // no next record for us
                        break;
-               }
-               if (m->count >= size)
+               if (m->count >= iov_iter_count(iter))
                        break;
                err = m->op->show(m, p);
-               if (seq_has_overflowed(m) || err) {
+               if (err > 0) {          // ->show() says "skip it"
                        m->count = offs;
-                       if (likely(err <= 0))
-                               break;
+               } else if (err || seq_has_overflowed(m)) {
+                       m->count = offs;
+                       break;
                }
        }
        m->op->stop(m, p);
-       n = min(m->count, size);
-       if (copy_to_iter(m->buf, n, iter) != n)
-               goto Efault;
+       n = copy_to_iter(m->buf, m->count, iter);
        copied += n;
        m->count -= n;
        m->from = n;
 Done:
-       if (!copied)
-               copied = err;
-       else {
+       if (unlikely(!copied)) {
+               copied = m->count ? -EFAULT : err;
+       else {
                iocb->ki_pos += copied;
                m->read_pos += copied;
        }
@@ -291,9 +291,6 @@ Done:
 Enomem:
        err = -ENOMEM;
        goto Done;
-Efault:
-       err = -EFAULT;
-       goto Done;
 }
 EXPORT_SYMBOL(seq_read_iter);
 
index bb128db..d6ef69a 100644 (file)
@@ -515,7 +515,7 @@ xfs_attr_copy_value(
  *========================================================================*/
 
 /*
- * Query whether the requested number of additional bytes of extended
+ * Query whether the total requested number of attr fork bytes of extended
  * attribute space will be able to fit inline.
  *
  * Returns zero if not, else the di_forkoff fork offset to be used in the
@@ -535,6 +535,12 @@ xfs_attr_shortform_bytesfit(
        int                     maxforkoff;
        int                     offset;
 
+       /*
+        * Check if the new size could fit at all first:
+        */
+       if (bytes > XFS_LITINO(mp))
+               return 0;
+
        /* rounded down */
        offset = (XFS_LITINO(mp) - bytes) >> 3;
 
index 577a663..beb81c8 100644 (file)
@@ -243,8 +243,8 @@ xfs_rmapbt_key_diff(
        else if (y > x)
                return -1;
 
-       x = be64_to_cpu(kp->rm_offset);
-       y = xfs_rmap_irec_offset_pack(rec);
+       x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
+       y = rec->rm_offset;
        if (x > y)
                return 1;
        else if (y > x)
@@ -275,8 +275,8 @@ xfs_rmapbt_diff_two_keys(
        else if (y > x)
                return -1;
 
-       x = be64_to_cpu(kp1->rm_offset);
-       y = be64_to_cpu(kp2->rm_offset);
+       x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
+       y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
        if (x > y)
                return 1;
        else if (y > x)
@@ -390,8 +390,8 @@ xfs_rmapbt_keys_inorder(
                return 1;
        else if (a > b)
                return 0;
-       a = be64_to_cpu(k1->rmap.rm_offset);
-       b = be64_to_cpu(k2->rmap.rm_offset);
+       a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
+       b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
        if (a <= b)
                return 1;
        return 0;
@@ -420,8 +420,8 @@ xfs_rmapbt_recs_inorder(
                return 1;
        else if (a > b)
                return 0;
-       a = be64_to_cpu(r1->rmap.rm_offset);
-       b = be64_to_cpu(r2->rmap.rm_offset);
+       a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
+       b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
        if (a <= b)
                return 1;
        return 0;
index 412e2ec..fed56d2 100644 (file)
@@ -218,13 +218,13 @@ xchk_bmap_xref_rmap(
         * which doesn't track unwritten state.
         */
        if (owner != XFS_RMAP_OWN_COW &&
-           irec->br_state == XFS_EXT_UNWRITTEN &&
-           !(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
+           !!(irec->br_state == XFS_EXT_UNWRITTEN) !=
+           !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
                xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
                                irec->br_startoff);
 
-       if (info->whichfork == XFS_ATTR_FORK &&
-           !(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
+       if (!!(info->whichfork == XFS_ATTR_FORK) !=
+           !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
                xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
                                irec->br_startoff);
        if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
index f52a7b8..debf392 100644 (file)
@@ -452,32 +452,41 @@ xchk_btree_check_minrecs(
        int                     level,
        struct xfs_btree_block  *block)
 {
-       unsigned int            numrecs;
-       int                     ok_level;
-
-       numrecs = be16_to_cpu(block->bb_numrecs);
+       struct xfs_btree_cur    *cur = bs->cur;
+       unsigned int            root_level = cur->bc_nlevels - 1;
+       unsigned int            numrecs = be16_to_cpu(block->bb_numrecs);
 
        /* More records than minrecs means the block is ok. */
-       if (numrecs >= bs->cur->bc_ops->get_minrecs(bs->cur, level))
+       if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
                return;
 
        /*
-        * Certain btree blocks /can/ have fewer than minrecs records.  Any
-        * level greater than or equal to the level of the highest dedicated
-        * btree block are allowed to violate this constraint.
-        *
-        * For a btree rooted in a block, the btree root can have fewer than
-        * minrecs records.  If the btree is rooted in an inode and does not
-        * store records in the root, the direct children of the root and the
-        * root itself can have fewer than minrecs records.
+        * For btrees rooted in the inode, it's possible that the root block
+        * contents spilled into a regular ondisk block because there wasn't
+        * enough space in the inode root.  The number of records in that
+        * child block might be less than the standard minrecs, but that's ok
+        * provided that there's only one direct child of the root.
         */
-       ok_level = bs->cur->bc_nlevels - 1;
-       if (bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
-               ok_level--;
-       if (level >= ok_level)
+       if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+           level == cur->bc_nlevels - 2) {
+               struct xfs_btree_block  *root_block;
+               struct xfs_buf          *root_bp;
+               int                     root_maxrecs;
+
+               root_block = xfs_btree_get_block(cur, root_level, &root_bp);
+               root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
+               if (be16_to_cpu(root_block->bb_numrecs) != 1 ||
+                   numrecs <= root_maxrecs)
+                       xchk_btree_set_corrupt(bs->sc, cur, level);
                return;
+       }
 
-       xchk_btree_set_corrupt(bs->sc, bs->cur, level);
+       /*
+        * Otherwise, only the root level is allowed to have fewer than minrecs
+        * records or keyptrs.
+        */
+       if (level < root_level)
+               xchk_btree_set_corrupt(bs->sc, cur, level);
 }
 
 /*
index 7c43299..b045e95 100644 (file)
@@ -558,14 +558,27 @@ xchk_directory_leaf1_bestfree(
        /* Check all the bestfree entries. */
        for (i = 0; i < bestcount; i++, bestp++) {
                best = be16_to_cpu(*bestp);
-               if (best == NULLDATAOFF)
-                       continue;
                error = xfs_dir3_data_read(sc->tp, sc->ip,
-                               i * args->geo->fsbcount, 0, &dbp);
+                               xfs_dir2_db_to_da(args->geo, i),
+                               XFS_DABUF_MAP_HOLE_OK,
+                               &dbp);
                if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
                                &error))
                        break;
-               xchk_directory_check_freesp(sc, lblk, dbp, best);
+
+               if (!dbp) {
+                       if (best != NULLDATAOFF) {
+                               xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
+                                               lblk);
+                               break;
+                       }
+                       continue;
+               }
+
+               if (best == NULLDATAOFF)
+                       xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+               else
+                       xchk_directory_check_freesp(sc, lblk, dbp, best);
                xfs_trans_brelse(sc->tp, dbp);
                if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
                        break;
index 3abb8b9..7b9ff82 100644 (file)
@@ -706,6 +706,23 @@ relock:
        return 0;
 }
 
+/*
+ * Check that the imap we are going to return to the caller spans the entire
+ * range that the caller requested for the IO.
+ */
+static bool
+imap_spans_range(
+       struct xfs_bmbt_irec    *imap,
+       xfs_fileoff_t           offset_fsb,
+       xfs_fileoff_t           end_fsb)
+{
+       if (imap->br_startoff > offset_fsb)
+               return false;
+       if (imap->br_startoff + imap->br_blockcount < end_fsb)
+               return false;
+       return true;
+}
+
 static int
 xfs_direct_write_iomap_begin(
        struct inode            *inode,
@@ -766,6 +783,18 @@ xfs_direct_write_iomap_begin(
        if (imap_needs_alloc(inode, flags, &imap, nimaps))
                goto allocate_blocks;
 
+       /*
+        * NOWAIT IO needs to span the entire requested IO with a single map so
+        * that we avoid partial IO failures due to the rest of the IO range not
+        * covered by this map triggering an EAGAIN condition when it is
+        * subsequently mapped and aborting the IO.
+        */
+       if ((flags & IOMAP_NOWAIT) &&
+           !imap_spans_range(&imap, offset_fsb, end_fsb)) {
+               error = -EAGAIN;
+               goto out_unlock;
+       }
+
        xfs_iunlock(ip, lockmode);
        trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
        return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags);
index 233dcc8..2a45138 100644 (file)
@@ -55,6 +55,9 @@ struct xfs_iwalk_ag {
        /* Where do we start the traversal? */
        xfs_ino_t                       startino;
 
+       /* What was the last inode number we saw when iterating the inobt? */
+       xfs_ino_t                       lastino;
+
        /* Array of inobt records we cache. */
        struct xfs_inobt_rec_incore     *recs;
 
@@ -301,6 +304,9 @@ xfs_iwalk_ag_start(
        if (XFS_IS_CORRUPT(mp, *has_more != 1))
                return -EFSCORRUPTED;
 
+       iwag->lastino = XFS_AGINO_TO_INO(mp, agno,
+                               irec->ir_startino + XFS_INODES_PER_CHUNK - 1);
+
        /*
         * If the LE lookup yielded an inobt record before the cursor position,
         * skip it and see if there's another one after it.
@@ -347,15 +353,17 @@ xfs_iwalk_run_callbacks(
        struct xfs_mount                *mp = iwag->mp;
        struct xfs_trans                *tp = iwag->tp;
        struct xfs_inobt_rec_incore     *irec;
-       xfs_agino_t                     restart;
+       xfs_agino_t                     next_agino;
        int                             error;
 
+       next_agino = XFS_INO_TO_AGINO(mp, iwag->lastino) + 1;
+
        ASSERT(iwag->nr_recs > 0);
 
        /* Delete cursor but remember the last record we cached... */
        xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
        irec = &iwag->recs[iwag->nr_recs - 1];
-       restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1;
+       ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK);
 
        error = xfs_iwalk_ag_recs(iwag);
        if (error)
@@ -372,7 +380,7 @@ xfs_iwalk_run_callbacks(
        if (error)
                return error;
 
-       return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more);
+       return xfs_inobt_lookup(*curpp, next_agino, XFS_LOOKUP_GE, has_more);
 }
 
 /* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */
@@ -396,6 +404,7 @@ xfs_iwalk_ag(
 
        while (!error && has_more) {
                struct xfs_inobt_rec_incore     *irec;
+               xfs_ino_t                       rec_fsino;
 
                cond_resched();
                if (xfs_pwork_want_abort(&iwag->pwork))
@@ -407,6 +416,15 @@ xfs_iwalk_ag(
                if (error || !has_more)
                        break;
 
+               /* Make sure that we always move forward. */
+               rec_fsino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino);
+               if (iwag->lastino != NULLFSINO &&
+                   XFS_IS_CORRUPT(mp, iwag->lastino >= rec_fsino)) {
+                       error = -EFSCORRUPTED;
+                       goto out;
+               }
+               iwag->lastino = rec_fsino + XFS_INODES_PER_CHUNK - 1;
+
                /* No allocated inodes in this chunk; skip it. */
                if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) {
                        error = xfs_btree_increment(cur, 0, &has_more);
@@ -535,6 +553,7 @@ xfs_iwalk(
                .trim_start     = 1,
                .skip_empty     = 1,
                .pwork          = XFS_PWORK_SINGLE_THREADED,
+               .lastino        = NULLFSINO,
        };
        xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, startino);
        int                     error;
@@ -623,6 +642,7 @@ xfs_iwalk_threaded(
                iwag->data = data;
                iwag->startino = startino;
                iwag->sz_recs = xfs_iwalk_prefetch(inode_records);
+               iwag->lastino = NULLFSINO;
                xfs_pwork_queue(&pctl, &iwag->pwork);
                startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
                if (flags & XFS_INOBT_WALK_SAME_AG)
@@ -696,6 +716,7 @@ xfs_inobt_walk(
                .startino       = startino,
                .sz_recs        = xfs_inobt_walk_prefetch(inobt_records),
                .pwork          = XFS_PWORK_SINGLE_THREADED,
+               .lastino        = NULLFSINO,
        };
        xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, startino);
        int                     error;
index 150ee5c..7110507 100644 (file)
@@ -194,20 +194,25 @@ xfs_initialize_perag(
                }
 
                pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
-               if (!pag)
+               if (!pag) {
+                       error = -ENOMEM;
                        goto out_unwind_new_pags;
+               }
                pag->pag_agno = index;
                pag->pag_mount = mp;
                spin_lock_init(&pag->pag_ici_lock);
                INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
-               if (xfs_buf_hash_init(pag))
+
+               error = xfs_buf_hash_init(pag);
+               if (error)
                        goto out_free_pag;
                init_waitqueue_head(&pag->pagb_wait);
                spin_lock_init(&pag->pagb_lock);
                pag->pagb_count = 0;
                pag->pagb_tree = RB_ROOT;
 
-               if (radix_tree_preload(GFP_NOFS))
+               error = radix_tree_preload(GFP_NOFS);
+               if (error)
                        goto out_hash_destroy;
 
                spin_lock(&mp->m_perag_lock);
index ff5930b..bec47f2 100644 (file)
@@ -691,21 +691,23 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
                bio->bi_opf |= REQ_FUA;
 
        ret = bio_iov_iter_get_pages(bio, from);
-       if (unlikely(ret)) {
-               bio_io_error(bio);
-               return ret;
-       }
+       if (unlikely(ret))
+               goto out_release;
+
        size = bio->bi_iter.bi_size;
-       task_io_account_write(ret);
+       task_io_account_write(size);
 
        if (iocb->ki_flags & IOCB_HIPRI)
                bio_set_polled(bio, iocb);
 
        ret = submit_bio_wait(bio);
 
+       zonefs_file_write_dio_end_io(iocb, size, ret, 0);
+
+out_release:
+       bio_release_pages(bio, false);
        bio_put(bio);
 
-       zonefs_file_write_dio_end_io(iocb, size, ret, 0);
        if (ret >= 0) {
                iocb->ki_pos += size;
                return size;
index db1b0ae..df60be7 100644 (file)
@@ -1105,7 +1105,7 @@ do {                                                                             \
        KUNIT_ASSERTION(test,                                                  \
                        strcmp(__left, __right) op 0,                          \
                        kunit_binary_str_assert,                               \
-                       KUNIT_INIT_BINARY_ASSERT_STRUCT(test,                  \
+                       KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(test,              \
                                                        assert_type,           \
                                                        #op,                   \
                                                        #left,                 \
index 639cae2..033eb5f 100644 (file)
@@ -1073,12 +1073,15 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
  * file system requests.
  */
 static inline unsigned int blk_max_size_offset(struct request_queue *q,
-                                              sector_t offset)
-{
-       unsigned int chunk_sectors = q->limits.chunk_sectors;
-
-       if (!chunk_sectors)
-               return q->limits.max_sectors;
+                                              sector_t offset,
+                                              unsigned int chunk_sectors)
+{
+       if (!chunk_sectors) {
+               if (q->limits.chunk_sectors)
+                       chunk_sectors = q->limits.chunk_sectors;
+               else
+                       return q->limits.max_sectors;
+       }
 
        if (likely(is_power_of_2(chunk_sectors)))
                chunk_sectors -= offset & (chunk_sectors - 1);
@@ -1101,7 +1104,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
            req_op(rq) == REQ_OP_SECURE_ERASE)
                return blk_queue_get_max_sectors(q, req_op(rq));
 
-       return min(blk_max_size_offset(q, offset),
+       return min(blk_max_size_offset(q, offset, 0),
                        blk_queue_get_max_sectors(q, req_op(rq)));
 }
 
index 9903088..2696eb0 100644 (file)
@@ -12,6 +12,9 @@
 
 #define BOOTCONFIG_MAGIC       "#BOOTCONFIG\n"
 #define BOOTCONFIG_MAGIC_LEN   12
+#define BOOTCONFIG_ALIGN_SHIFT 2
+#define BOOTCONFIG_ALIGN       (1 << BOOTCONFIG_ALIGN_SHIFT)
+#define BOOTCONFIG_ALIGN_MASK  (BOOTCONFIG_ALIGN - 1)
 
 /* XBC tree node */
 struct xbc_node {
index e3a0be2..7bb66e1 100644 (file)
@@ -77,4 +77,9 @@
 #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
 #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
 
+#ifdef __GENKSYMS__
+/* genksyms gets confused by _Static_assert */
+#define _Static_assert(expr, ...)
+#endif
+
 #endif /* _LINUX_BUILD_BUG_H */
index dd7233c..98cff1b 100644 (file)
@@ -8,8 +8,10 @@
                     + __clang_patchlevel__)
 
 #if CLANG_VERSION < 100001
+#ifndef __BPF_TRACING__
 # error Sorry, your version of Clang is too old - please use 10.0.1 or newer.
 #endif
+#endif
 
 /* Compiler specific definitions for Clang compiler */
 
index 46c3d69..de51c1b 100644 (file)
@@ -104,6 +104,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg
 #endif
 }
 
+#if defined(CONFIG_UM) || defined(CONFIG_IA64)
 /*
  * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out
  * extra segments containing the gate DSO contents.  Dumping its
@@ -118,5 +119,26 @@ elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset);
 extern int
 elf_core_write_extra_data(struct coredump_params *cprm);
 extern size_t elf_core_extra_data_size(void);
+#else
+static inline Elf_Half elf_core_extra_phdrs(void)
+{
+       return 0;
+}
+
+static inline int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
+{
+       return 1;
+}
+
+static inline int elf_core_write_extra_data(struct coredump_params *cprm)
+{
+       return 1;
+}
+
+static inline size_t elf_core_extra_data_size(void)
+{
+       return 0;
+}
+#endif
 
 #endif /* _LINUX_ELFCORE_H */
index dc4fd8a..fa0a524 100644 (file)
@@ -41,6 +41,7 @@ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
 __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
 int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
                                  __u64 *cnt);
+void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
 
 DECLARE_PER_CPU(int, eventfd_wake_count);
 
@@ -82,6 +83,11 @@ static inline bool eventfd_signal_count(void)
        return false;
 }
 
+static inline void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
+{
+
+}
+
 #endif
 
 #endif /* _LINUX_EVENTFD_H */
index 5968df8..41a1bab 100644 (file)
 #define        ZYNQMP_PM_CAPABILITY_WAKEUP     0x4U
 #define        ZYNQMP_PM_CAPABILITY_UNUSABLE   0x8U
 
-/* Feature check status */
-#define PM_FEATURE_INVALID             -1
-#define PM_FEATURE_UNCHECKED           0
-
 /*
  * Firmware FPGA Manager flags
  * XILINX_ZYNQMP_PM_FPGA_FULL: FPGA full reconfiguration
index fbf5b3e..d956987 100644 (file)
@@ -798,7 +798,6 @@ extern int iommu_calculate_agaw(struct intel_iommu *iommu);
 extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
 extern int dmar_disabled;
 extern int intel_iommu_enabled;
-extern int intel_iommu_tboot_noforce;
 extern int intel_iommu_gfx_mapped;
 #else
 static inline int iommu_calculate_agaw(struct intel_iommu *iommu)
index 71535e8..ea5a337 100644 (file)
@@ -384,11 +384,19 @@ extern void irq_domain_associate_many(struct irq_domain *domain,
 extern void irq_domain_disassociate(struct irq_domain *domain,
                                    unsigned int irq);
 
-extern unsigned int irq_create_mapping(struct irq_domain *host,
-                                      irq_hw_number_t hwirq);
+extern unsigned int irq_create_mapping_affinity(struct irq_domain *host,
+                                     irq_hw_number_t hwirq,
+                                     const struct irq_affinity_desc *affinity);
 extern unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec);
 extern void irq_dispose_mapping(unsigned int virq);
 
+static inline unsigned int irq_create_mapping(struct irq_domain *host,
+                                             irq_hw_number_t hwirq)
+{
+       return irq_create_mapping_affinity(host, hwirq, NULL);
+}
+
+
 /**
  * irq_linear_revmap() - Find a linux irq from a hw irq number.
  * @domain: domain owning this hardware interrupt
index 1c49fd6..578ff19 100644 (file)
@@ -401,7 +401,7 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
 #define JI_WAIT_DATA (1 << __JI_WAIT_DATA)
 
 /**
- * struct jbd_inode - The jbd_inode type is the structure linking inodes in
+ * struct jbd2_inode - The jbd_inode type is the structure linking inodes in
  * ordered mode present in a transaction so that we can sync them during commit.
  */
 struct jbd2_inode {
diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h
new file mode 100644 (file)
index 0000000..120e5e9
--- /dev/null
@@ -0,0 +1,103 @@
+#ifndef KVM_DIRTY_RING_H
+#define KVM_DIRTY_RING_H
+
+#include <linux/kvm.h>
+
+/**
+ * kvm_dirty_ring: KVM internal dirty ring structure
+ *
+ * @dirty_index: free running counter that points to the next slot in
+ *               dirty_ring->dirty_gfns, where a new dirty page should go
+ * @reset_index: free running counter that points to the next dirty page
+ *               in dirty_ring->dirty_gfns for which dirty trap needs to
+ *               be reenabled
+ * @size:        size of the compact list, dirty_ring->dirty_gfns
+ * @soft_limit:  when the number of dirty pages in the list reaches this
+ *               limit, vcpu that owns this ring should exit to userspace
+ *               to allow userspace to harvest all the dirty pages
+ * @dirty_gfns:  the array to keep the dirty gfns
+ * @index:       index of this dirty ring
+ */
+struct kvm_dirty_ring {
+       u32 dirty_index;
+       u32 reset_index;
+       u32 size;
+       u32 soft_limit;
+       struct kvm_dirty_gfn *dirty_gfns;
+       int index;
+};
+
+#if (KVM_DIRTY_LOG_PAGE_OFFSET == 0)
+/*
+ * If KVM_DIRTY_LOG_PAGE_OFFSET not defined, kvm_dirty_ring.o should
+ * not be included as well, so define these nop functions for the arch.
+ */
+static inline u32 kvm_dirty_ring_get_rsvd_entries(void)
+{
+       return 0;
+}
+
+static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring,
+                                      int index, u32 size)
+{
+       return 0;
+}
+
+static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm)
+{
+       return NULL;
+}
+
+static inline int kvm_dirty_ring_reset(struct kvm *kvm,
+                                      struct kvm_dirty_ring *ring)
+{
+       return 0;
+}
+
+static inline void kvm_dirty_ring_push(struct kvm_dirty_ring *ring,
+                                      u32 slot, u64 offset)
+{
+}
+
+static inline struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring,
+                                                  u32 offset)
+{
+       return NULL;
+}
+
+static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring)
+{
+}
+
+static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
+{
+       return true;
+}
+
+#else /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */
+
+u32 kvm_dirty_ring_get_rsvd_entries(void);
+int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size);
+struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm);
+
+/*
+ * called with kvm->slots_lock held, returns the number of
+ * processed pages.
+ */
+int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring);
+
+/*
+ * returns =0: successfully pushed
+ *         <0: unable to push, need to wait
+ */
+void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset);
+
+/* for use in vm_operations_struct */
+struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset);
+
+void kvm_dirty_ring_free(struct kvm_dirty_ring *ring);
+bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring);
+
+#endif /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */
+
+#endif /* KVM_DIRTY_RING_H */
index 7f2e2a0..f3b1013 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/kvm_types.h>
 
 #include <asm/kvm_host.h>
+#include <linux/kvm_dirty_ring.h>
 
 #ifndef KVM_MAX_VCPU_ID
 #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
@@ -319,6 +320,7 @@ struct kvm_vcpu {
        bool preempted;
        bool ready;
        struct kvm_vcpu_arch arch;
+       struct kvm_dirty_ring dirty_ring;
 };
 
 static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
@@ -349,6 +351,11 @@ struct kvm_memory_slot {
        u16 as_id;
 };
 
+static inline bool kvm_slot_dirty_track_enabled(struct kvm_memory_slot *slot)
+{
+       return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
+}
+
 static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot)
 {
        return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
@@ -505,6 +512,7 @@ struct kvm {
        struct srcu_struct irq_srcu;
        pid_t userspace_pid;
        unsigned int max_halt_poll_ns;
+       u32 dirty_ring_size;
 };
 
 #define kvm_err(fmt, ...) \
@@ -792,13 +800,12 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
                        offset_in_page(__gpa), v);                      \
 })
 
-int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
-void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn);
+void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
 struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
@@ -1478,4 +1485,14 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
 }
 #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */
 
+/*
+ * This defines how many reserved entries we want to keep before we
+ * kick the vcpu to the userspace to avoid dirty ring full.  This
+ * value can be tuned to higher if e.g. PML is enabled on the host.
+ */
+#define  KVM_DIRTY_RING_RSVD_ENTRIES  64
+
+/* Max number of entries allowed for each kvm dirty ring */
+#define  KVM_DIRTY_RING_MAX_ENTRIES  65536
+
 #endif
index a80c59a..922a7f6 100644 (file)
@@ -282,20 +282,6 @@ struct mem_cgroup {
 
        MEMCG_PADDING(_pad1_);
 
-       /*
-        * set > 0 if pages under this cgroup are moving to other cgroup.
-        */
-       atomic_t                moving_account;
-       struct task_struct      *move_lock_task;
-
-       /* Legacy local VM stats and events */
-       struct memcg_vmstats_percpu __percpu *vmstats_local;
-
-       /* Subtree VM stats and events (batched updates) */
-       struct memcg_vmstats_percpu __percpu *vmstats_percpu;
-
-       MEMCG_PADDING(_pad2_);
-
        atomic_long_t           vmstats[MEMCG_NR_STAT];
        atomic_long_t           vmevents[NR_VM_EVENT_ITEMS];
 
@@ -317,6 +303,20 @@ struct mem_cgroup {
        struct list_head objcg_list; /* list of inherited objcgs */
 #endif
 
+       MEMCG_PADDING(_pad2_);
+
+       /*
+        * set > 0 if pages under this cgroup are moving to other cgroup.
+        */
+       atomic_t                moving_account;
+       struct task_struct      *move_lock_task;
+
+       /* Legacy local VM stats and events */
+       struct memcg_vmstats_percpu __percpu *vmstats_local;
+
+       /* Subtree VM stats and events (batched updates) */
+       struct memcg_vmstats_percpu __percpu *vmstats_percpu;
+
 #ifdef CONFIG_CGROUP_WRITEBACK
        struct list_head cgwb_list;
        struct wb_domain cgwb_domain;
index d65c6fd..551093b 100644 (file)
@@ -281,20 +281,6 @@ static inline bool movable_node_is_enabled(void)
 }
 #endif /* ! CONFIG_MEMORY_HOTPLUG */
 
-#ifdef CONFIG_NUMA
-extern int memory_add_physaddr_to_nid(u64 start);
-extern int phys_to_target_node(u64 start);
-#else
-static inline int memory_add_physaddr_to_nid(u64 start)
-{
-       return 0;
-}
-static inline int phys_to_target_node(u64 start)
-{
-       return 0;
-}
-#endif
-
 #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
 /*
  * pgdat resizing functions
index a092346..2333524 100644 (file)
@@ -1223,6 +1223,11 @@ enum mlx5_fc_bulk_alloc_bitmask {
 
 #define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum))
 
+enum {
+       MLX5_STEERING_FORMAT_CONNECTX_5   = 0,
+       MLX5_STEERING_FORMAT_CONNECTX_6DX = 1,
+};
+
 struct mlx5_ifc_cmd_hca_cap_bits {
        u8         reserved_at_0[0x30];
        u8         vhca_id[0x10];
@@ -1521,7 +1526,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
        u8         general_obj_types[0x40];
 
-       u8         reserved_at_440[0x20];
+       u8         reserved_at_440[0x4];
+       u8         steering_format_version[0x4];
+       u8         create_qp_start_hint[0x18];
 
        u8         reserved_at_460[0x3];
        u8         log_max_uctx[0x5];
index 964b494..7c3da0e 100644 (file)
@@ -2813,9 +2813,21 @@ u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
                     struct net_device *sb_dev);
 u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
                       struct net_device *sb_dev);
+
 int dev_queue_xmit(struct sk_buff *skb);
 int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev);
-int dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
+int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
+
+static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
+{
+       int ret;
+
+       ret = __dev_direct_xmit(skb, queue_id);
+       if (!dev_xmit_complete(ret))
+               kfree_skb(skb);
+       return ret;
+}
+
 int register_netdevice(struct net_device *dev);
 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
 void unregister_netdevice_many(struct list_head *head);
@@ -3137,6 +3149,11 @@ static inline bool dev_validate_header(const struct net_device *dev,
        return false;
 }
 
+static inline bool dev_has_header(const struct net_device *dev)
+{
+       return dev->header_ops && dev->header_ops->create;
+}
+
 typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr,
                           int len, int size);
 int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
index 5deb099..8ebb641 100644 (file)
@@ -227,7 +227,7 @@ struct xt_table {
        unsigned int valid_hooks;
 
        /* Man behind the curtain... */
-       struct xt_table_info *private;
+       struct xt_table_info __rcu *private;
 
        /* Set this to THIS_MODULE if you are a module, otherwise NULL */
        struct module *me;
@@ -448,6 +448,9 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
 
 struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);
 
+struct xt_table_info
+*xt_table_get_private_protected(const struct xt_table *table);
+
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
 
index c32c152..f0373a6 100644 (file)
@@ -55,6 +55,7 @@ struct nfs_page {
        unsigned short          wb_nio;         /* Number of I/O attempts */
 };
 
+struct nfs_pgio_mirror;
 struct nfs_pageio_descriptor;
 struct nfs_pageio_ops {
        void    (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *);
@@ -64,6 +65,9 @@ struct nfs_pageio_ops {
        unsigned int    (*pg_get_mirror_count)(struct nfs_pageio_descriptor *,
                                       struct nfs_page *);
        void    (*pg_cleanup)(struct nfs_pageio_descriptor *);
+       struct nfs_pgio_mirror *
+               (*pg_get_mirror)(struct nfs_pageio_descriptor *, u32);
+       u32     (*pg_set_mirror)(struct nfs_pageio_descriptor *, u32);
 };
 
 struct nfs_rw_ops {
index 8cb33cc..cb44cfe 100644 (file)
 #endif
 
 #ifdef CONFIG_NUMA
+#include <linux/printk.h>
+#include <asm/sparsemem.h>
+
 /* Generic implementation available */
 int numa_map_to_online_node(int node);
-#else
+
+#ifndef memory_add_physaddr_to_nid
+static inline int memory_add_physaddr_to_nid(u64 start)
+{
+       pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n",
+                       start);
+       return 0;
+}
+#endif
+#ifndef phys_to_target_node
+static inline int phys_to_target_node(u64 start)
+{
+       pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n",
+                       start);
+       return 0;
+}
+#endif
+#else /* !CONFIG_NUMA */
 static inline int numa_map_to_online_node(int node)
 {
        return NUMA_NO_NODE;
 }
+static inline int memory_add_physaddr_to_nid(u64 start)
+{
+       return 0;
+}
+static inline int phys_to_target_node(u64 start)
+{
+       return 0;
+}
 #endif
 
 #endif /* _LINUX_NUMA_H */
index e1e19c1..d5570de 100644 (file)
@@ -906,6 +906,8 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac,
        xas_set(&xas, rac->_index);
        rcu_read_lock();
        xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) {
+               if (xas_retry(&xas, page))
+                       continue;
                VM_BUG_ON_PAGE(!PageLocked(page), page);
                VM_BUG_ON_PAGE(PageTail(page), page);
                array[i++] = page;
index 71125a4..e237004 100644 (file)
@@ -1427,6 +1427,19 @@ typedef unsigned int pgtbl_mod_mask;
 
 #endif /* !__ASSEMBLY__ */
 
+#if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT)
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+/*
+ * ZSMALLOC needs to know the highest PFN on 32-bit architectures
+ * with physical address space extension, but falls back to
+ * BITS_PER_LONG otherwise.
+ */
+#error Missing MAX_POSSIBLE_PHYSMEM_BITS definition
+#else
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
+#endif
+#endif
+
 #ifndef has_transparent_hugepage
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define has_transparent_hugepage() 1
index c59999c..240dce5 100644 (file)
@@ -50,6 +50,7 @@ struct sysc_regbits {
        s8 emufree_shift;
 };
 
+#define SYSC_MODULE_QUIRK_ENA_RESETDONE        BIT(25)
 #define SYSC_MODULE_QUIRK_PRUSS                BIT(24)
 #define SYSC_MODULE_QUIRK_DSS_RESET    BIT(23)
 #define SYSC_MODULE_QUIRK_RTC_UNLOCK   BIT(22)
index 4b708f4..b492ae0 100644 (file)
@@ -387,6 +387,27 @@ static inline int pm_runtime_get_sync(struct device *dev)
 }
 
 /**
+ * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it.
+ * @dev: Target device.
+ *
+ * Resume @dev synchronously and if that is successful, increment its runtime
+ * PM usage counter. Return 0 if the runtime PM usage counter of @dev has been
+ * incremented or a negative error code otherwise.
+ */
+static inline int pm_runtime_resume_and_get(struct device *dev)
+{
+       int ret;
+
+       ret = __pm_runtime_resume(dev, RPM_GET_PUT);
+       if (ret < 0) {
+               pm_runtime_put_noidle(dev);
+               return ret;
+       }
+
+       return 0;
+}
+
+/**
  * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0.
  * @dev: Target device.
  *
index 063cd12..76cd21f 100644 (file)
@@ -552,7 +552,6 @@ struct sched_dl_entity {
         * overruns.
         */
        unsigned int                    dl_throttled      : 1;
-       unsigned int                    dl_boosted        : 1;
        unsigned int                    dl_yielded        : 1;
        unsigned int                    dl_non_contending : 1;
        unsigned int                    dl_overrun        : 1;
@@ -571,6 +570,15 @@ struct sched_dl_entity {
         * time.
         */
        struct hrtimer inactive_timer;
+
+#ifdef CONFIG_RT_MUTEXES
+       /*
+        * Priority Inheritance. When a DEADLINE scheduling entity is boosted
+        * pi_se points to the donor, otherwise points to the dl_se it belongs
+        * to (the original one/itself).
+        */
+       struct sched_dl_entity *pi_se;
+#endif
 };
 
 #ifdef CONFIG_UCLAMP_TASK
@@ -770,7 +778,6 @@ struct task_struct {
        unsigned                        sched_reset_on_fork:1;
        unsigned                        sched_contributes_to_load:1;
        unsigned                        sched_migrated:1;
-       unsigned                        sched_remote_wakeup:1;
 #ifdef CONFIG_PSI
        unsigned                        sched_psi_wake_requeue:1;
 #endif
@@ -780,6 +787,21 @@ struct task_struct {
 
        /* Unserialized, strictly 'current' */
 
+       /*
+        * This field must not be in the scheduler word above due to wakelist
+        * queueing no longer being serialized by p->on_cpu. However:
+        *
+        * p->XXX = X;                  ttwu()
+        * schedule()                     if (p->on_rq && ..) // false
+        *   smp_mb__after_spinlock();    if (smp_load_acquire(&p->on_cpu) && //true
+        *   deactivate_task()                ttwu_queue_wakelist())
+        *     p->on_rq = 0;                    p->sched_remote_wakeup = Y;
+        *
+        * guarantees all stores of 'current' are visible before
+        * ->sched_remote_wakeup gets used, so it can be in this word.
+        */
+       unsigned                        sched_remote_wakeup:1;
+
        /* Bit to tell LSMs we're in execve(): */
        unsigned                        in_execve:1;
        unsigned                        in_iowait:1;
index bc27254..3964262 100644 (file)
@@ -869,7 +869,7 @@ static inline int security_inode_killpriv(struct dentry *dentry)
 
 static inline int security_inode_getsecurity(struct inode *inode, const char *name, void **buffer, bool alloc)
 {
-       return -EOPNOTSUPP;
+       return cap_inode_getsecurity(inode, name, buffer, alloc);
 }
 
 static inline int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags)
index 99380c0..b390fda 100644 (file)
@@ -734,6 +734,25 @@ static inline struct spi_controller *spi_alloc_slave(struct device *host,
        return __spi_alloc_controller(host, size, true);
 }
 
+struct spi_controller *__devm_spi_alloc_controller(struct device *dev,
+                                                  unsigned int size,
+                                                  bool slave);
+
+static inline struct spi_controller *devm_spi_alloc_master(struct device *dev,
+                                                          unsigned int size)
+{
+       return __devm_spi_alloc_controller(dev, size, false);
+}
+
+static inline struct spi_controller *devm_spi_alloc_slave(struct device *dev,
+                                                         unsigned int size)
+{
+       if (!IS_ENABLED(CONFIG_SPI_SLAVE))
+               return NULL;
+
+       return __devm_spi_alloc_controller(dev, size, true);
+}
+
 extern int spi_register_controller(struct spi_controller *ctlr);
 extern int devm_spi_register_controller(struct device *dev,
                                        struct spi_controller *ctlr);
index 628e289..15ca6b4 100644 (file)
@@ -170,6 +170,7 @@ struct plat_stmmacenet_data {
        int unicast_filter_entries;
        int tx_fifo_size;
        int rx_fifo_size;
+       u32 addr64;
        u32 rx_queues_to_use;
        u32 tx_queues_to_use;
        u8 rx_sched_algorithm;
index 3bb7226..fbdc657 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/dma-direction.h>
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/limits.h>
 
 struct device;
 struct page;
index a99e9b8..eb33d94 100644 (file)
@@ -306,6 +306,10 @@ struct tty_struct {
        struct termiox *termiox;        /* May be NULL for unsupported */
        char name[64];
        struct pid *pgrp;               /* Protected by ctrl lock */
+       /*
+        * Writes protected by both ctrl lock and legacy mutex, readers must use
+        * at least one of them.
+        */
        struct pid *session;
        unsigned long flags;
        int count;
index 27fb99c..fe10e85 100644 (file)
@@ -22,6 +22,7 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int
 #define WQ_FLAG_BOOKMARK       0x04
 #define WQ_FLAG_CUSTOM         0x08
 #define WQ_FLAG_DONE           0x10
+#define WQ_FLAG_PRIORITY       0x20
 
 /*
  * A single wait-queue entry structure:
@@ -164,11 +165,20 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
 
 extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 
 static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-       list_add(&wq_entry->entry, &wq_head->head);
+       struct list_head *head = &wq_head->head;
+       struct wait_queue_entry *wq;
+
+       list_for_each_entry(wq, &wq_head->head, entry) {
+               if (!(wq->flags & WQ_FLAG_PRIORITY))
+                       break;
+               head = &wq->entry;
+       }
+       list_add(&wq_entry->entry, head);
 }
 
 /*
index 0fdbf65..4807ca4 100644 (file)
@@ -20,7 +20,6 @@
  * zsmalloc mapping modes
  *
  * NOTE: These only make a difference when a mapped object spans pages.
- * They also have no effect when ZSMALLOC_PGTABLE_MAPPING is selected.
  */
 enum zs_mapmode {
        ZS_MM_RW, /* normal read-write mapping */
index 7d132cc..adc3da7 100644 (file)
 #define bond_for_each_slave_rcu(bond, pos, iter) \
        netdev_for_each_lower_private_rcu((bond)->dev, pos, iter)
 
-#ifdef CONFIG_XFRM_OFFLOAD
 #define BOND_XFRM_FEATURES (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM | \
                            NETIF_F_GSO_ESP)
-#endif /* CONFIG_XFRM_OFFLOAD */
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
 extern atomic_t netpoll_block_tx;
@@ -185,6 +183,11 @@ struct slave {
        struct rtnl_link_stats64 slave_stats;
 };
 
+static inline struct slave *to_slave(struct kobject *kobj)
+{
+       return container_of(kobj, struct slave, kobj);
+}
+
 struct bond_up_slave {
        unsigned int    count;
        struct rcu_head rcu;
@@ -750,6 +753,9 @@ extern struct bond_parm_tbl ad_select_tbl[];
 /* exported from bond_netlink.c */
 extern struct rtnl_link_ops bond_link_ops;
 
+/* exported from bond_sysfs_slave.c */
+extern const struct sysfs_ops slave_sysfs_ops;
+
 static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb)
 {
        atomic_long_inc(&dev->tx_dropped);
index e1eaf17..563457f 100644 (file)
@@ -107,7 +107,7 @@ static inline int IP_ECN_set_ect1(struct iphdr *iph)
        if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0)
                return 0;
 
-       check += (__force u16)htons(0x100);
+       check += (__force u16)htons(0x1);
 
        iph->check = (__force __sum16)(check + (check>=0xFFFF));
        iph->tos ^= INET_ECN_MASK;
index 9256097..ca6a3ea 100644 (file)
@@ -247,8 +247,9 @@ void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
                         unsigned long high_limit);
 int inet_hashinfo2_init_mod(struct inet_hashinfo *h);
 
-bool inet_ehash_insert(struct sock *sk, struct sock *osk);
-bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
+bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk);
+bool inet_ehash_nolisten(struct sock *sk, struct sock *osk,
+                        bool *found_dup_sk);
 int __inet_hash(struct sock *sk, struct sock *osk);
 int inet_hash(struct sock *sk);
 void inet_unhash(struct sock *sk);
index 02ccd32..6162067 100644 (file)
@@ -478,9 +478,11 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
                                           const void *from, int len,
                                           __be16 flags)
 {
-       memcpy(ip_tunnel_info_opts(info), from, len);
        info->options_len = len;
-       info->key.tun_flags |= flags;
+       if (len > 0) {
+               memcpy(ip_tunnel_info_opts(info), from, len);
+               info->key.tun_flags |= flags;
+       }
 }
 
 static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
@@ -526,7 +528,6 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
                                           __be16 flags)
 {
        info->options_len = 0;
-       info->key.tun_flags |= flags;
 }
 
 #endif /* CONFIG_INET */
index a21e8b1..851029e 100644 (file)
@@ -108,5 +108,35 @@ out_rcu_unlock:
        rcu_read_unlock();
        inet_frag_put(&fq->q);
 }
+
+/* Check if the upper layer header is truncated in the first fragment. */
+static inline bool
+ipv6frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp)
+{
+       u8 nexthdr = *nexthdrp;
+       __be16 frag_off;
+       int offset;
+
+       offset = ipv6_skip_exthdr(skb, start, &nexthdr, &frag_off);
+       if (offset < 0 || (frag_off & htons(IP6_OFFSET)))
+               return false;
+       switch (nexthdr) {
+       case NEXTHDR_TCP:
+               offset += sizeof(struct tcphdr);
+               break;
+       case NEXTHDR_UDP:
+               offset += sizeof(struct udphdr);
+               break;
+       case NEXTHDR_ICMP:
+               offset += sizeof(struct icmp6hdr);
+               break;
+       default:
+               offset += 1;
+       }
+       if (offset > skb->len)
+               return true;
+       return false;
+}
+
 #endif
 #endif
index 81ee175..22ced13 100644 (file)
@@ -204,6 +204,7 @@ struct neigh_table {
        int                     (*pconstructor)(struct pneigh_entry *);
        void                    (*pdestructor)(struct pneigh_entry *);
        void                    (*proxy_redo)(struct sk_buff *skb);
+       int                     (*is_multicast)(const void *pkey);
        bool                    (*allow_add)(const struct net_device *dev,
                                             struct netlink_ext_ack *extack);
        char                    *id;
index 55b4cad..c1c0a4f 100644 (file)
@@ -1524,4 +1524,8 @@ void __init nft_chain_route_init(void);
 void nft_chain_route_fini(void);
 
 void nf_tables_trans_destroy_flush_work(void);
+
+int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result);
+__be64 nf_jiffies64_to_msecs(u64 input);
+
 #endif /* _NET_NF_TABLES_H */
index ea7d1d7..1d34fe1 100644 (file)
@@ -37,6 +37,7 @@ void nft_offload_update_dependency(struct nft_offload_ctx *ctx,
 
 struct nft_flow_key {
        struct flow_dissector_key_basic                 basic;
+       struct flow_dissector_key_control               control;
        union {
                struct flow_dissector_key_ipv4_addrs    ipv4;
                struct flow_dissector_key_ipv6_addrs    ipv6;
@@ -62,6 +63,9 @@ struct nft_flow_rule {
 
 #define NFT_OFFLOAD_F_ACTION   (1 << 0)
 
+void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
+                                enum flow_dissector_key_id addr_type);
+
 struct nft_rule;
 struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule);
 void nft_flow_rule_destroy(struct nft_flow_rule *flow);
@@ -74,6 +78,9 @@ int nft_flow_rule_offload_commit(struct net *net);
                offsetof(struct nft_flow_key, __base.__field);          \
        (__reg)->len            = __len;                                \
        (__reg)->key            = __key;                                \
+
+#define NFT_OFFLOAD_MATCH_EXACT(__key, __base, __field, __len, __reg)  \
+       NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg)         \
        memset(&(__reg)->mask, 0xff, (__reg)->len);
 
 int nft_chain_offload_priority(struct nft_base_chain *basechain);
index baf1e99..2bdd802 100644 (file)
@@ -199,6 +199,12 @@ enum tls_context_flags {
         * to be atomic.
         */
        TLS_TX_SYNC_SCHED = 1,
+       /* tls_dev_del was called for the RX side, device state was released,
+        * but tls_ctx->netdev might still be kept, because TX-side driver
+        * resources might not be released yet. Used to prevent the second
+        * tls_dev_del call in tls_device_down if it happens simultaneously.
+        */
+       TLS_RX_DEV_CLOSED = 2,
 };
 
 struct cipher_context {
@@ -300,7 +306,8 @@ enum tls_offload_sync_type {
 #define TLS_DEVICE_RESYNC_ASYNC_LOGMAX         13
 struct tls_offload_resync_async {
        atomic64_t req;
-       u32 loglen;
+       u16 loglen;
+       u16 rcd_delta;
        u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX];
 };
 
@@ -471,6 +478,18 @@ static inline bool tls_bigint_increment(unsigned char *seq, int len)
        return (i == -1);
 }
 
+static inline void tls_bigint_subtract(unsigned char *seq, int  n)
+{
+       u64 rcd_sn;
+       __be64 *p;
+
+       BUILD_BUG_ON(TLS_MAX_REC_SEQ_SIZE != 8);
+
+       p = (__be64 *)seq;
+       rcd_sn = be64_to_cpu(*p);
+       *p = cpu_to_be64(rcd_sn - n);
+}
+
 static inline struct tls_context *tls_get_ctx(const struct sock *sk)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
@@ -639,6 +658,7 @@ tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len)
        atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) |
                     ((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC);
        rx_ctx->resync_async->loglen = 0;
+       rx_ctx->resync_async->rcd_delta = 0;
 }
 
 static inline void
index 3814fb6..9dab2bc 100644 (file)
@@ -240,8 +240,6 @@ struct xdp_attachment_info {
 };
 
 struct netdev_bpf;
-bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
-                            struct netdev_bpf *bpf);
 void xdp_attachment_setup(struct xdp_attachment_info *info,
                          struct netdev_bpf *bpf);
 
index 1a9559c..4f4e93b 100644 (file)
@@ -31,6 +31,7 @@ struct xdp_umem {
        struct page **pgs;
        int id;
        struct list_head xsk_dma_list;
+       struct work_struct work;
 };
 
 struct xsk_map {
index c25fb86..b3bbd10 100644 (file)
@@ -132,6 +132,9 @@ struct iscsi_task {
        void                    *dd_data;       /* driver/transport data */
 };
 
+/* invalid scsi_task pointer */
+#define        INVALID_SCSI_TASK       (struct iscsi_task *)-1l
+
 static inline int iscsi_task_has_unsol_data(struct iscsi_task *task)
 {
        return task->unsol_r2t.data_length > task->unsol_r2t.sent;
index 1e9db95..49b46df 100644 (file)
@@ -618,6 +618,9 @@ struct ocelot {
        /* Keep track of the vlan port masks */
        u32                             vlan_mask[VLAN_N_VID];
 
+       /* Switches like VSC9959 have flooding per traffic class */
+       int                             num_flooding_pgids;
+
        /* In tables like ANA:PORT and the ANA:PGID:PGID mask,
         * the CPU is located after the physical ports (at the
         * num_phys_ports index).
diff --git a/include/sound/rt1015.h b/include/sound/rt1015.h
new file mode 100644 (file)
index 0000000..70a7538
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/sound/rt1015.h -- Platform data for RT1015
+ *
+ * Copyright 2020 Realtek Microelectronics
+ */
+
+#ifndef __LINUX_SND_RT1015_H
+#define __LINUX_SND_RT1015_H
+
+struct rt1015_platform_data {
+       unsigned int power_up_delay_ms;
+};
+
+#endif
index 26cfb0f..49d7d0f 100644 (file)
@@ -399,6 +399,69 @@ TRACE_EVENT(kvm_halt_poll_ns,
 #define trace_kvm_halt_poll_ns_shrink(vcpu_id, new, old) \
        trace_kvm_halt_poll_ns(false, vcpu_id, new, old)
 
+TRACE_EVENT(kvm_dirty_ring_push,
+       TP_PROTO(struct kvm_dirty_ring *ring, u32 slot, u64 offset),
+       TP_ARGS(ring, slot, offset),
+
+       TP_STRUCT__entry(
+               __field(int, index)
+               __field(u32, dirty_index)
+               __field(u32, reset_index)
+               __field(u32, slot)
+               __field(u64, offset)
+       ),
+
+       TP_fast_assign(
+               __entry->index          = ring->index;
+               __entry->dirty_index    = ring->dirty_index;
+               __entry->reset_index    = ring->reset_index;
+               __entry->slot           = slot;
+               __entry->offset         = offset;
+       ),
+
+       TP_printk("ring %d: dirty 0x%x reset 0x%x "
+                 "slot %u offset 0x%llx (used %u)",
+                 __entry->index, __entry->dirty_index,
+                 __entry->reset_index,  __entry->slot, __entry->offset,
+                 __entry->dirty_index - __entry->reset_index)
+);
+
+TRACE_EVENT(kvm_dirty_ring_reset,
+       TP_PROTO(struct kvm_dirty_ring *ring),
+       TP_ARGS(ring),
+
+       TP_STRUCT__entry(
+               __field(int, index)
+               __field(u32, dirty_index)
+               __field(u32, reset_index)
+       ),
+
+       TP_fast_assign(
+               __entry->index          = ring->index;
+               __entry->dirty_index    = ring->dirty_index;
+               __entry->reset_index    = ring->reset_index;
+       ),
+
+       TP_printk("ring %d: dirty 0x%x reset 0x%x (used %u)",
+                 __entry->index, __entry->dirty_index, __entry->reset_index,
+                 __entry->dirty_index - __entry->reset_index)
+);
+
+TRACE_EVENT(kvm_dirty_ring_exit,
+       TP_PROTO(struct kvm_vcpu *vcpu),
+       TP_ARGS(vcpu),
+
+       TP_STRUCT__entry(
+           __field(int, vcpu_id)
+       ),
+
+       TP_fast_assign(
+           __entry->vcpu_id = vcpu->vcpu_id;
+       ),
+
+       TP_printk("vcpu %d", __entry->vcpu_id)
+);
+
 #endif /* _TRACE_KVM_MAIN_H */
 
 /* This part must be outside protection */
index 2477014..2a03263 100644 (file)
@@ -68,7 +68,8 @@ DECLARE_EVENT_CLASS(rpc_xdr_buf_class,
 
        TP_fast_assign(
                __entry->task_id = task->tk_pid;
-               __entry->client_id = task->tk_client->cl_clid;
+               __entry->client_id = task->tk_client ?
+                                    task->tk_client->cl_clid : -1;
                __entry->head_base = xdr->head[0].iov_base;
                __entry->head_len = xdr->head[0].iov_len;
                __entry->tail_base = xdr->tail[0].iov_base;
index e7cbccc..57d7953 100644 (file)
@@ -190,7 +190,7 @@ TRACE_EVENT(inode_foreign_history,
        ),
 
        TP_fast_assign(
-               strncpy(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32);
+               strscpy_pad(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32);
                __entry->ino            = inode->i_ino;
                __entry->cgroup_ino     = __trace_wbc_assign_cgroup(wbc);
                __entry->history        = history;
@@ -219,7 +219,7 @@ TRACE_EVENT(inode_switch_wbs,
        ),
 
        TP_fast_assign(
-               strncpy(__entry->name,  bdi_dev_name(old_wb->bdi), 32);
+               strscpy_pad(__entry->name, bdi_dev_name(old_wb->bdi), 32);
                __entry->ino            = inode->i_ino;
                __entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb);
                __entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb);
@@ -252,7 +252,7 @@ TRACE_EVENT(track_foreign_dirty,
                struct address_space *mapping = page_mapping(page);
                struct inode *inode = mapping ? mapping->host : NULL;
 
-               strncpy(__entry->name,  bdi_dev_name(wb->bdi), 32);
+               strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
                __entry->bdi_id         = wb->bdi->id;
                __entry->ino            = inode ? inode->i_ino : 0;
                __entry->memcg_id       = wb->memcg_css->id;
@@ -285,7 +285,7 @@ TRACE_EVENT(flush_foreign,
        ),
 
        TP_fast_assign(
-               strncpy(__entry->name,  bdi_dev_name(wb->bdi), 32);
+               strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
                __entry->cgroup_ino     = __trace_wb_assign_cgroup(wb);
                __entry->frn_bdi_id     = frn_bdi_id;
                __entry->frn_memcg_id   = frn_memcg_id;
index e6ceac3..556216d 100644 (file)
@@ -3897,8 +3897,8 @@ union bpf_attr {
        FN(seq_printf_btf),             \
        FN(skb_cgroup_classid),         \
        FN(redirect_neigh),             \
-       FN(bpf_per_cpu_ptr),            \
-       FN(bpf_this_cpu_ptr),           \
+       FN(per_cpu_ptr),                \
+       FN(this_cpu_ptr),               \
        FN(redirect_peer),              \
        /* */
 
index 0113bc4..5203f54 100644 (file)
@@ -526,6 +526,8 @@ enum devlink_attr {
        DEVLINK_ATTR_RELOAD_STATS_LIMIT,        /* u8 */
        DEVLINK_ATTR_RELOAD_STATS_VALUE,        /* u32 */
        DEVLINK_ATTR_REMOTE_RELOAD_STATS,       /* nested */
+       DEVLINK_ATTR_RELOAD_ACTION_INFO,        /* nested */
+       DEVLINK_ATTR_RELOAD_ACTION_STATS,       /* nested */
 
        /* add new attributes above here, update the policy in devlink.c */
 
index ca41220..374c678 100644 (file)
@@ -250,6 +250,8 @@ struct kvm_hyperv_exit {
 #define KVM_EXIT_ARM_NISV         28
 #define KVM_EXIT_X86_RDMSR        29
 #define KVM_EXIT_X86_WRMSR        30
+#define KVM_EXIT_DIRTY_RING_FULL  31
+#define KVM_EXIT_AP_RESET_HOLD    32
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -572,6 +574,7 @@ struct kvm_vapic_addr {
 #define KVM_MP_STATE_CHECK_STOP        6
 #define KVM_MP_STATE_OPERATING         7
 #define KVM_MP_STATE_LOAD              8
+#define KVM_MP_STATE_AP_RESET_HOLD     9
 
 struct kvm_mp_state {
        __u32 mp_state;
@@ -1053,6 +1056,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_X86_USER_SPACE_MSR 188
 #define KVM_CAP_X86_MSR_FILTER 189
 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
+#define KVM_CAP_SYS_HYPERV_CPUID 191
+#define KVM_CAP_DIRTY_LOG_RING 192
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1511,7 +1516,7 @@ struct kvm_enc_region {
 /* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */
 #define KVM_CLEAR_DIRTY_LOG          _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log)
 
-/* Available with KVM_CAP_HYPERV_CPUID */
+/* Available with KVM_CAP_HYPERV_CPUID (vcpu) / KVM_CAP_SYS_HYPERV_CPUID (system) */
 #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2)
 
 /* Available with KVM_CAP_ARM_SVE */
@@ -1557,6 +1562,9 @@ struct kvm_pv_cmd {
 /* Available with KVM_CAP_X86_MSR_FILTER */
 #define KVM_X86_SET_MSR_FILTER _IOW(KVMIO,  0xc6, struct kvm_msr_filter)
 
+/* Available with KVM_CAP_DIRTY_LOG_RING */
+#define KVM_RESET_DIRTY_RINGS          _IO(KVMIO, 0xc7)
+
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
        /* Guest initialization commands */
@@ -1710,4 +1718,52 @@ struct kvm_hyperv_eventfd {
 #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE    (1 << 0)
 #define KVM_DIRTY_LOG_INITIALLY_SET            (1 << 1)
 
+/*
+ * Arch needs to define the macro after implementing the dirty ring
+ * feature.  KVM_DIRTY_LOG_PAGE_OFFSET should be defined as the
+ * starting page offset of the dirty ring structures.
+ */
+#ifndef KVM_DIRTY_LOG_PAGE_OFFSET
+#define KVM_DIRTY_LOG_PAGE_OFFSET 0
+#endif
+
+/*
+ * KVM dirty GFN flags, defined as:
+ *
+ * |---------------+---------------+--------------|
+ * | bit 1 (reset) | bit 0 (dirty) | Status       |
+ * |---------------+---------------+--------------|
+ * |             0 |             0 | Invalid GFN  |
+ * |             0 |             1 | Dirty GFN    |
+ * |             1 |             X | GFN to reset |
+ * |---------------+---------------+--------------|
+ *
+ * Lifecycle of a dirty GFN goes like:
+ *
+ *      dirtied         harvested        reset
+ * 00 -----------> 01 -------------> 1X -------+
+ *  ^                                          |
+ *  |                                          |
+ *  +------------------------------------------+
+ *
+ * The userspace program is only responsible for the 01->1X state
+ * conversion after harvesting an entry.  Also, it must not skip any
+ * dirty bits, so that dirty bits are always harvested in sequence.
+ */
+#define KVM_DIRTY_GFN_F_DIRTY           BIT(0)
+#define KVM_DIRTY_GFN_F_RESET           BIT(1)
+#define KVM_DIRTY_GFN_F_MASK            0x3
+
+/*
+ * KVM dirty rings should be mapped at KVM_DIRTY_LOG_PAGE_OFFSET of
+ * per-vcpu mmaped regions as an array of struct kvm_dirty_gfn.  The
+ * size of the gfn buffer is decided by the first argument when
+ * enabling KVM_CAP_DIRTY_LOG_RING.
+ */
+struct kvm_dirty_gfn {
+       __u32 flags;
+       __u32 slot;
+       __u64 offset;
+};
+
 #endif /* __LINUX_KVM_H */
index 8300cc2..8d16744 100644 (file)
@@ -1058,4 +1058,6 @@ enum ovs_dec_ttl_attr {
        __OVS_DEC_TTL_ATTR_MAX
 };
 
+#define OVS_DEC_TTL_ATTR_MAX (__OVS_DEC_TTL_ATTR_MAX - 1)
+
 #endif /* _LINUX_OPENVSWITCH_H */
index 82cc58f..1500a0f 100644 (file)
@@ -171,9 +171,12 @@ struct statx {
  * be of use to ordinary userspace programs such as GUIs or ls rather than
  * specialised tools.
  *
- * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS
+ * Note that the flags marked [I] correspond to the FS_IOC_SETFLAGS flags
  * semantically.  Where possible, the numerical value is picked to correspond
- * also.
+ * also.  Note that the DAX attribute indicates that the file is in the CPU
+ * direct access state.  It does not correspond to the per-inode flag that
+ * some filesystems support.
+ *
  */
 #define STATX_ATTR_COMPRESSED          0x00000004 /* [I] File is compressed by the fs */
 #define STATX_ATTR_IMMUTABLE           0x00000010 /* [I] File is marked immutable */
@@ -183,7 +186,7 @@ struct statx {
 #define STATX_ATTR_AUTOMOUNT           0x00001000 /* Dir: Automount trigger */
 #define STATX_ATTR_MOUNT_ROOT          0x00002000 /* Root of a mount */
 #define STATX_ATTR_VERITY              0x00100000 /* [I] Verity protected file */
-#define STATX_ATTR_DAX                 0x00002000 /* [I] File is DAX */
+#define STATX_ATTR_DAX                 0x00200000 /* File is currently in DAX state */
 
 
 #endif /* _UAPI_LINUX_STAT_H */
index 9bc5bc0..b9c937b 100644 (file)
@@ -198,6 +198,23 @@ void gnttab_free_auto_xlat_frames(void);
 int gnttab_alloc_pages(int nr_pages, struct page **pages);
 void gnttab_free_pages(int nr_pages, struct page **pages);
 
+struct gnttab_page_cache {
+       spinlock_t              lock;
+#ifdef CONFIG_XEN_UNPOPULATED_ALLOC
+       struct page             *pages;
+#else
+       struct list_head        pages;
+#endif
+       unsigned int            num_pages;
+};
+
+void gnttab_page_cache_init(struct gnttab_page_cache *cache);
+int gnttab_page_cache_get(struct gnttab_page_cache *cache, struct page **page);
+void gnttab_page_cache_put(struct gnttab_page_cache *cache, struct page **page,
+                          unsigned int num);
+void gnttab_page_cache_shrink(struct gnttab_page_cache *cache,
+                             unsigned int num);
+
 #ifdef CONFIG_XEN_GRANT_DMA_ALLOC
 struct gnttab_dma_alloc_args {
        /* Device for which DMA memory will be/was allocated. */
index c944691..0872a5a 100644 (file)
@@ -47,6 +47,10 @@ config CLANG_VERSION
        int
        default $(shell,$(srctree)/scripts/clang-version.sh $(CC))
 
+config LLD_VERSION
+       int
+       default $(shell,$(srctree)/scripts/lld-version.sh $(LD))
+
 config CC_CAN_LINK
        bool
        default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag)) if 64BIT
@@ -719,7 +723,7 @@ config LOG_CPU_MAX_BUF_SHIFT
          with more CPUs. Therefore this value is used only when the sum of
          contributions is greater than the half of the default kernel ring
          buffer as defined by LOG_BUF_SHIFT. The default values are set
-         so that more than 64 CPUs are needed to trigger the allocation.
+         so that more than 16 CPUs are needed to trigger the allocation.
 
          Also this option is ignored when "log_buf_len" kernel parameter is
          used as it forces an exact (power of two) size of the ring buffer.
@@ -1348,6 +1352,12 @@ config LD_DEAD_CODE_DATA_ELIMINATION
          present. This option is not well tested yet, so use at your
          own risk.
 
+config LD_ORPHAN_WARN
+       def_bool y
+       depends on ARCH_WANT_LD_ORPHAN_WARN
+       depends on !LD_IS_LLD || LLD_VERSION >= 110000
+       depends on $(ld-option,--orphan-handling=warn)
+
 config SYSCTL
        bool
 
index 1f97c03..55b74d7 100644 (file)
@@ -535,7 +535,7 @@ extern unsigned long __initramfs_size;
 #include <linux/initrd.h>
 #include <linux/kexec.h>
 
-void __weak free_initrd_mem(unsigned long start, unsigned long end)
+void __weak __init free_initrd_mem(unsigned long start, unsigned long end)
 {
 #ifdef CONFIG_ARCH_KEEP_MEMBLOCK
        unsigned long aligned_start = ALIGN_DOWN(start, PAGE_SIZE);
index 20baced..32b2a8a 100644 (file)
@@ -288,8 +288,8 @@ static void * __init get_boot_config_from_initrd(u32 *_size, u32 *_csum)
 
 found:
        hdr = (u32 *)(data - 8);
-       size = hdr[0];
-       csum = hdr[1];
+       size = le32_to_cpu(hdr[0]);
+       csum = le32_to_cpu(hdr[1]);
 
        data = ((void *)hdr) - size;
        if ((unsigned long)data < initrd_start) {
index af601b9..6c9f199 100644 (file)
@@ -97,7 +97,6 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
 obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
-obj-$(CONFIG_ELFCORE) += elfcore.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace/
 obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_TRACE_CLOCK) += trace/
index 25520f5..deda118 100644 (file)
@@ -717,9 +717,9 @@ bpf_base_func_proto(enum bpf_func_id func_id)
                return &bpf_snprintf_btf_proto;
        case BPF_FUNC_jiffies64:
                return &bpf_jiffies64_proto;
-       case BPF_FUNC_bpf_per_cpu_ptr:
+       case BPF_FUNC_per_cpu_ptr:
                return &bpf_per_cpu_ptr_proto;
-       case BPF_FUNC_bpf_this_cpu_ptr:
+       case BPF_FUNC_this_cpu_ptr:
                return &bpf_this_cpu_ptr_proto;
        default:
                break;
index 6200519..53fe6ef 100644 (file)
@@ -1298,9 +1298,7 @@ static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
 
 static bool __reg64_bound_s32(s64 a)
 {
-       if (a > S32_MIN && a < S32_MAX)
-               return true;
-       return false;
+       return a > S32_MIN && a < S32_MAX;
 }
 
 static bool __reg64_bound_u32(u64 a)
@@ -1314,10 +1312,10 @@ static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
 {
        __mark_reg32_unbounded(reg);
 
-       if (__reg64_bound_s32(reg->smin_value))
+       if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
                reg->s32_min_value = (s32)reg->smin_value;
-       if (__reg64_bound_s32(reg->smax_value))
                reg->s32_max_value = (s32)reg->smax_value;
+       }
        if (__reg64_bound_u32(reg->umin_value))
                reg->u32_min_value = (u32)reg->umin_value;
        if (__reg64_bound_u32(reg->umax_value))
@@ -4895,6 +4893,8 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
 
        ret_reg->smax_value = meta->msize_max_value;
        ret_reg->s32_max_value = meta->msize_max_value;
+       ret_reg->smin_value = -MAX_ERRNO;
+       ret_reg->s32_min_value = -MAX_ERRNO;
        __reg_deduce_bounds(ret_reg);
        __reg_bound_offset(ret_reg);
        __update_reg_bounds(ret_reg);
@@ -7786,9 +7786,11 @@ static int check_return_code(struct bpf_verifier_env *env)
        struct tnum range = tnum_range(0, 1);
        enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
        int err;
+       const bool is_subprog = env->cur_state->frame[0]->subprogno;
 
        /* LSM and struct_ops func-ptr's return type could be "void" */
-       if ((prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
+       if (!is_subprog &&
+           (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
             prog_type == BPF_PROG_TYPE_LSM) &&
            !prog->aux->attach_func_proto->type)
                return 0;
@@ -7808,6 +7810,16 @@ static int check_return_code(struct bpf_verifier_env *env)
                return -EACCES;
        }
 
+       reg = cur_regs(env) + BPF_REG_0;
+       if (is_subprog) {
+               if (reg->type != SCALAR_VALUE) {
+                       verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
+                               reg_type_str[reg->type]);
+                       return -EINVAL;
+               }
+               return 0;
+       }
+
        switch (prog_type) {
        case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
                if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
@@ -7861,7 +7873,6 @@ static int check_return_code(struct bpf_verifier_env *env)
                return 0;
        }
 
-       reg = cur_regs(env) + BPF_REG_0;
        if (reg->type != SCALAR_VALUE) {
                verbose(env, "At program exit the register R0 is not a known value (%s)\n",
                        reg_type_str[reg->type]);
@@ -9572,12 +9583,13 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
                               struct bpf_insn *insn,
                               struct bpf_insn_aux_data *aux)
 {
-       u32 datasec_id, type, id = insn->imm;
        const struct btf_var_secinfo *vsi;
        const struct btf_type *datasec;
        const struct btf_type *t;
        const char *sym_name;
        bool percpu = false;
+       u32 type, id = insn->imm;
+       s32 datasec_id;
        u64 addr;
        int i;
 
index 6ff2578..2b8d7a5 100644 (file)
@@ -815,6 +815,10 @@ void __init cpuhp_threads_init(void)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
+#ifndef arch_clear_mm_cpumask_cpu
+#define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
+#endif
+
 /**
  * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
  * @cpu: a CPU id
@@ -850,7 +854,7 @@ void clear_tasks_mm_cpumask(int cpu)
                t = find_lock_task_mm(p);
                if (!t)
                        continue;
-               cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
+               arch_clear_mm_cpumask_cpu(cpu, t->mm);
                task_unlock(t);
        }
        rcu_read_unlock();
diff --git a/kernel/elfcore.c b/kernel/elfcore.c
deleted file mode 100644 (file)
index 57fb4dc..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/elf.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/binfmts.h>
-#include <linux/elfcore.h>
-
-Elf_Half __weak elf_core_extra_phdrs(void)
-{
-       return 0;
-}
-
-int __weak elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
-{
-       return 1;
-}
-
-int __weak elf_core_write_extra_data(struct coredump_params *cprm)
-{
-       return 1;
-}
-
-size_t __weak elf_core_extra_data_size(void)
-{
-       return 0;
-}
index 63b3491..b0b1ad9 100644 (file)
@@ -253,7 +253,7 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
 
        if (copy_from_user(buf, buffer, count)) {
                ret = -EFAULT;
-               goto out;
+               goto out_free;
        }
        buf[count] = '\0';
        sym = strstrip(buf);
@@ -307,8 +307,9 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
                ret = count;
        }
 out:
-       kfree(buf);
        mutex_unlock(&fei_lock);
+out_free:
+       kfree(buf);
        return ret;
 }
 
index cf8b374..e4ca696 100644 (file)
@@ -624,17 +624,19 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
 EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
 
 /**
- * irq_create_mapping() - Map a hardware interrupt into linux irq space
+ * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
  * @domain: domain owning this hardware interrupt or NULL for default domain
  * @hwirq: hardware irq number in that domain space
+ * @affinity: irq affinity
  *
  * Only one mapping per hardware interrupt is permitted. Returns a linux
  * irq number.
  * If the sense/trigger is to be specified, set_irq_type() should be called
  * on the number returned from that call.
  */
-unsigned int irq_create_mapping(struct irq_domain *domain,
-                               irq_hw_number_t hwirq)
+unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
+                                      irq_hw_number_t hwirq,
+                                      const struct irq_affinity_desc *affinity)
 {
        struct device_node *of_node;
        int virq;
@@ -660,7 +662,8 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
        }
 
        /* Allocate a virtual interrupt number */
-       virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), NULL);
+       virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node),
+                                     affinity);
        if (virq <= 0) {
                pr_debug("-> virq allocation failed\n");
                return 0;
@@ -676,7 +679,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
 
        return virq;
 }
-EXPORT_SYMBOL_GPL(irq_create_mapping);
+EXPORT_SYMBOL_GPL(irq_create_mapping_affinity);
 
 /**
  * irq_create_strict_mappings() - Map a range of hw irqs to fixed linux irqs
index d9fb9e1..c1418b4 100644 (file)
@@ -108,19 +108,21 @@ static inline void lockdep_lock(void)
 {
        DEBUG_LOCKS_WARN_ON(!irqs_disabled());
 
+       __this_cpu_inc(lockdep_recursion);
        arch_spin_lock(&__lock);
        __owner = current;
-       __this_cpu_inc(lockdep_recursion);
 }
 
 static inline void lockdep_unlock(void)
 {
+       DEBUG_LOCKS_WARN_ON(!irqs_disabled());
+
        if (debug_locks && DEBUG_LOCKS_WARN_ON(__owner != current))
                return;
 
-       __this_cpu_dec(lockdep_recursion);
        __owner = NULL;
        arch_spin_unlock(&__lock);
+       __this_cpu_dec(lockdep_recursion);
 }
 
 static inline bool lockdep_assert_locked(void)
index fe64a49..bc1e3b5 100644 (file)
@@ -528,8 +528,8 @@ static int log_store(u32 caller_id, int facility, int level,
        if (dev_info)
                memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info));
 
-       /* insert message */
-       if ((flags & LOG_CONT) || !(flags & LOG_NEWLINE))
+       /* A message without a trailing newline can be continued. */
+       if (!(flags & LOG_NEWLINE))
                prb_commit(&e);
        else
                prb_final_commit(&e);
index 6b15256..74e25a1 100644 (file)
@@ -882,8 +882,6 @@ static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out)
        head_id = atomic_long_read(&desc_ring->head_id); /* LMM(desc_reserve:A) */
 
        do {
-               desc = to_desc(desc_ring, head_id);
-
                id = DESC_ID(head_id + 1);
                id_prev_wrap = DESC_ID_PREV_WRAP(desc_ring, id);
 
index 43d6179..79de129 100644 (file)
@@ -264,17 +264,11 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
        return ret;
 }
 
-static bool ptrace_has_cap(const struct cred *cred, struct user_namespace *ns,
-                          unsigned int mode)
+static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
 {
-       int ret;
-
        if (mode & PTRACE_MODE_NOAUDIT)
-               ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NOAUDIT);
-       else
-               ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NONE);
-
-       return ret == 0;
+               return ns_capable_noaudit(ns, CAP_SYS_PTRACE);
+       return ns_capable(ns, CAP_SYS_PTRACE);
 }
 
 /* Returns 0 on success, -errno on denial. */
@@ -326,7 +320,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
            gid_eq(caller_gid, tcred->sgid) &&
            gid_eq(caller_gid, tcred->gid))
                goto ok;
-       if (ptrace_has_cap(cred, tcred->user_ns, mode))
+       if (ptrace_has_cap(tcred->user_ns, mode))
                goto ok;
        rcu_read_unlock();
        return -EPERM;
@@ -345,7 +339,7 @@ ok:
        mm = task->mm;
        if (mm &&
            ((get_dumpable(mm) != SUID_DUMP_USER) &&
-            !ptrace_has_cap(cred, mm->user_ns, mode)))
+            !ptrace_has_cap(mm->user_ns, mode)))
            return -EPERM;
 
        return security_ptrace_access_check(task, mode);
index 0fde39b..ca21d28 100644 (file)
@@ -249,13 +249,16 @@ static bool check_slow_task(struct task_struct *t, void *arg)
 
 /*
  * Scan the current list of tasks blocked within RCU read-side critical
- * sections, printing out the tid of each.
+ * sections, printing out the tid of each of the first few of them.
  */
-static int rcu_print_task_stall(struct rcu_node *rnp)
+static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
+       __releases(rnp->lock)
 {
+       int i = 0;
        int ndetected = 0;
        struct rcu_stall_chk_rdr rscr;
        struct task_struct *t;
+       struct task_struct *ts[8];
 
        if (!rcu_preempt_blocked_readers_cgp(rnp))
                return 0;
@@ -264,6 +267,14 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
        t = list_entry(rnp->gp_tasks->prev,
                       struct task_struct, rcu_node_entry);
        list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
+               get_task_struct(t);
+               ts[i++] = t;
+               if (i >= ARRAY_SIZE(ts))
+                       break;
+       }
+       raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+       for (i--; i; i--) {
+               t = ts[i];
                if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr))
                        pr_cont(" P%d", t->pid);
                else
@@ -273,6 +284,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
                                ".q"[rscr.rs.b.need_qs],
                                ".e"[rscr.rs.b.exp_hint],
                                ".l"[rscr.on_blkd_list]);
+               put_task_struct(t);
                ndetected++;
        }
        pr_cont("\n");
@@ -293,8 +305,9 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
  * Because preemptible RCU does not exist, we never have to check for
  * tasks blocked within RCU read-side critical sections.
  */
-static int rcu_print_task_stall(struct rcu_node *rnp)
+static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
 {
+       raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
        return 0;
 }
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
@@ -472,7 +485,6 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
        pr_err("INFO: %s detected stalls on CPUs/tasks:\n", rcu_state.name);
        rcu_for_each_leaf_node(rnp) {
                raw_spin_lock_irqsave_rcu_node(rnp, flags);
-               ndetected += rcu_print_task_stall(rnp);
                if (rnp->qsmask != 0) {
                        for_each_leaf_node_possible_cpu(rnp, cpu)
                                if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
@@ -480,7 +492,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)
                                        ndetected++;
                                }
                }
-               raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+               ndetected += rcu_print_task_stall(rnp, flags); // Releases rnp->lock.
        }
 
        for_each_possible_cpu(cpu)
index d2003a7..e7e4534 100644 (file)
@@ -2501,7 +2501,12 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
 #ifdef CONFIG_SMP
        if (wake_flags & WF_MIGRATED)
                en_flags |= ENQUEUE_MIGRATED;
+       else
 #endif
+       if (p->in_iowait) {
+               delayacct_blkio_end(p);
+               atomic_dec(&task_rq(p)->nr_iowait);
+       }
 
        activate_task(rq, p, en_flags);
        ttwu_do_wakeup(rq, p, wake_flags, rf);
@@ -2888,11 +2893,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
        if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
                goto unlock;
 
-       if (p->in_iowait) {
-               delayacct_blkio_end(p);
-               atomic_dec(&task_rq(p)->nr_iowait);
-       }
-
 #ifdef CONFIG_SMP
        /*
         * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
@@ -2963,6 +2963,11 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 
        cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
        if (task_cpu(p) != cpu) {
+               if (p->in_iowait) {
+                       delayacct_blkio_end(p);
+                       atomic_dec(&task_rq(p)->nr_iowait);
+               }
+
                wake_flags |= WF_MIGRATED;
                psi_ttwu_dequeue(p);
                set_task_cpu(p, cpu);
@@ -4907,20 +4912,21 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
                if (!dl_prio(p->normal_prio) ||
                    (pi_task && dl_prio(pi_task->prio) &&
                     dl_entity_preempt(&pi_task->dl, &p->dl))) {
-                       p->dl.dl_boosted = 1;
+                       p->dl.pi_se = pi_task->dl.pi_se;
                        queue_flag |= ENQUEUE_REPLENISH;
-               } else
-                       p->dl.dl_boosted = 0;
+               } else {
+                       p->dl.pi_se = &p->dl;
+               }
                p->sched_class = &dl_sched_class;
        } else if (rt_prio(prio)) {
                if (dl_prio(oldprio))
-                       p->dl.dl_boosted = 0;
+                       p->dl.pi_se = &p->dl;
                if (oldprio < prio)
                        queue_flag |= ENQUEUE_HEAD;
                p->sched_class = &rt_sched_class;
        } else {
                if (dl_prio(oldprio))
-                       p->dl.dl_boosted = 0;
+                       p->dl.pi_se = &p->dl;
                if (rt_prio(oldprio))
                        p->rt.timeout = 0;
                p->sched_class = &fair_sched_class;
index f232305..1d3c972 100644 (file)
@@ -43,6 +43,28 @@ static inline int on_dl_rq(struct sched_dl_entity *dl_se)
        return !RB_EMPTY_NODE(&dl_se->rb_node);
 }
 
+#ifdef CONFIG_RT_MUTEXES
+static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
+{
+       return dl_se->pi_se;
+}
+
+static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
+{
+       return pi_of(dl_se) != dl_se;
+}
+#else
+static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
+{
+       return dl_se;
+}
+
+static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
+{
+       return false;
+}
+#endif
+
 #ifdef CONFIG_SMP
 static inline struct dl_bw *dl_bw_of(int i)
 {
@@ -698,7 +720,7 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
        struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
        struct rq *rq = rq_of_dl_rq(dl_rq);
 
-       WARN_ON(dl_se->dl_boosted);
+       WARN_ON(is_dl_boosted(dl_se));
        WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
 
        /*
@@ -736,21 +758,20 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
  * could happen are, typically, a entity voluntarily trying to overcome its
  * runtime, or it just underestimated it during sched_setattr().
  */
-static void replenish_dl_entity(struct sched_dl_entity *dl_se,
-                               struct sched_dl_entity *pi_se)
+static void replenish_dl_entity(struct sched_dl_entity *dl_se)
 {
        struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
        struct rq *rq = rq_of_dl_rq(dl_rq);
 
-       BUG_ON(pi_se->dl_runtime <= 0);
+       BUG_ON(pi_of(dl_se)->dl_runtime <= 0);
 
        /*
         * This could be the case for a !-dl task that is boosted.
         * Just go with full inherited parameters.
         */
        if (dl_se->dl_deadline == 0) {
-               dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
-               dl_se->runtime = pi_se->dl_runtime;
+               dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
+               dl_se->runtime = pi_of(dl_se)->dl_runtime;
        }
 
        if (dl_se->dl_yielded && dl_se->runtime > 0)
@@ -763,8 +784,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
         * arbitrary large.
         */
        while (dl_se->runtime <= 0) {
-               dl_se->deadline += pi_se->dl_period;
-               dl_se->runtime += pi_se->dl_runtime;
+               dl_se->deadline += pi_of(dl_se)->dl_period;
+               dl_se->runtime += pi_of(dl_se)->dl_runtime;
        }
 
        /*
@@ -778,8 +799,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
         */
        if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
                printk_deferred_once("sched: DL replenish lagged too much\n");
-               dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
-               dl_se->runtime = pi_se->dl_runtime;
+               dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
+               dl_se->runtime = pi_of(dl_se)->dl_runtime;
        }
 
        if (dl_se->dl_yielded)
@@ -812,8 +833,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
  * task with deadline equal to period this is the same of using
  * dl_period instead of dl_deadline in the equation above.
  */
-static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
-                              struct sched_dl_entity *pi_se, u64 t)
+static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
 {
        u64 left, right;
 
@@ -835,9 +855,9 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
         * of anything below microseconds resolution is actually fiction
         * (but still we want to give the user that illusion >;).
         */
-       left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
+       left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
        right = ((dl_se->deadline - t) >> DL_SCALE) *
-               (pi_se->dl_runtime >> DL_SCALE);
+               (pi_of(dl_se)->dl_runtime >> DL_SCALE);
 
        return dl_time_before(right, left);
 }
@@ -922,24 +942,23 @@ static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
  * Please refer to the comments update_dl_revised_wakeup() function to find
  * more about the Revised CBS rule.
  */
-static void update_dl_entity(struct sched_dl_entity *dl_se,
-                            struct sched_dl_entity *pi_se)
+static void update_dl_entity(struct sched_dl_entity *dl_se)
 {
        struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
        struct rq *rq = rq_of_dl_rq(dl_rq);
 
        if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
-           dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
+           dl_entity_overflow(dl_se, rq_clock(rq))) {
 
                if (unlikely(!dl_is_implicit(dl_se) &&
                             !dl_time_before(dl_se->deadline, rq_clock(rq)) &&
-                            !dl_se->dl_boosted)){
+                            !is_dl_boosted(dl_se))) {
                        update_dl_revised_wakeup(dl_se, rq);
                        return;
                }
 
-               dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
-               dl_se->runtime = pi_se->dl_runtime;
+               dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
+               dl_se->runtime = pi_of(dl_se)->dl_runtime;
        }
 }
 
@@ -1038,7 +1057,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
         * The task might have been boosted by someone else and might be in the
         * boosting/deboosting path, its not throttled.
         */
-       if (dl_se->dl_boosted)
+       if (is_dl_boosted(dl_se))
                goto unlock;
 
        /*
@@ -1066,7 +1085,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
         * but do not enqueue -- wait for our wakeup to do that.
         */
        if (!task_on_rq_queued(p)) {
-               replenish_dl_entity(dl_se, dl_se);
+               replenish_dl_entity(dl_se);
                goto unlock;
        }
 
@@ -1156,7 +1175,7 @@ static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
 
        if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
            dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
-               if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
+               if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p)))
                        return;
                dl_se->dl_throttled = 1;
                if (dl_se->runtime > 0)
@@ -1287,7 +1306,7 @@ throttle:
                        dl_se->dl_overrun = 1;
 
                __dequeue_task_dl(rq, curr, 0);
-               if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
+               if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr)))
                        enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
 
                if (!is_leftmost(curr, &rq->dl))
@@ -1481,8 +1500,7 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
 }
 
 static void
-enqueue_dl_entity(struct sched_dl_entity *dl_se,
-                 struct sched_dl_entity *pi_se, int flags)
+enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
 {
        BUG_ON(on_dl_rq(dl_se));
 
@@ -1493,9 +1511,9 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
         */
        if (flags & ENQUEUE_WAKEUP) {
                task_contending(dl_se, flags);
-               update_dl_entity(dl_se, pi_se);
+               update_dl_entity(dl_se);
        } else if (flags & ENQUEUE_REPLENISH) {
-               replenish_dl_entity(dl_se, pi_se);
+               replenish_dl_entity(dl_se);
        } else if ((flags & ENQUEUE_RESTORE) &&
                  dl_time_before(dl_se->deadline,
                                 rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
@@ -1512,19 +1530,7 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
 
 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
-       struct task_struct *pi_task = rt_mutex_get_top_task(p);
-       struct sched_dl_entity *pi_se = &p->dl;
-
-       /*
-        * Use the scheduling parameters of the top pi-waiter task if:
-        * - we have a top pi-waiter which is a SCHED_DEADLINE task AND
-        * - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is
-        *   smaller than our deadline OR we are a !SCHED_DEADLINE task getting
-        *   boosted due to a SCHED_DEADLINE pi-waiter).
-        * Otherwise we keep our runtime and deadline.
-        */
-       if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) {
-               pi_se = &pi_task->dl;
+       if (is_dl_boosted(&p->dl)) {
                /*
                 * Because of delays in the detection of the overrun of a
                 * thread's runtime, it might be the case that a thread
@@ -1557,7 +1563,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
                 * the throttle.
                 */
                p->dl.dl_throttled = 0;
-               BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH);
+               BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH);
                return;
        }
 
@@ -1594,7 +1600,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
                return;
        }
 
-       enqueue_dl_entity(&p->dl, pi_se, flags);
+       enqueue_dl_entity(&p->dl, flags);
 
        if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
                enqueue_pushable_dl_task(rq, p);
@@ -2787,11 +2793,14 @@ void __dl_clear_params(struct task_struct *p)
        dl_se->dl_bw                    = 0;
        dl_se->dl_density               = 0;
 
-       dl_se->dl_boosted               = 0;
        dl_se->dl_throttled             = 0;
        dl_se->dl_yielded               = 0;
        dl_se->dl_non_contending        = 0;
        dl_se->dl_overrun               = 0;
+
+#ifdef CONFIG_RT_MUTEXES
+       dl_se->pi_se                    = dl_se;
+#endif
 }
 
 bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
index 8917d2d..ae7ceba 100644 (file)
@@ -5477,6 +5477,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
        struct cfs_rq *cfs_rq;
        struct sched_entity *se = &p->se;
        int idle_h_nr_running = task_has_idle_policy(p);
+       int task_new = !(flags & ENQUEUE_WAKEUP);
 
        /*
         * The code below (indirectly) updates schedutil which looks at
@@ -5549,7 +5550,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
         * into account, but that is not straightforward to implement,
         * and the following generally works well enough in practice.
         */
-       if (flags & ENQUEUE_WAKEUP)
+       if (!task_new)
                update_overutilized_status(rq);
 
 enqueue_throttle:
index 24d0ee2..c6932b8 100644 (file)
@@ -78,7 +78,7 @@ void __weak arch_cpu_idle_dead(void) { }
 void __weak arch_cpu_idle(void)
 {
        cpu_idle_force_poll = 1;
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 /**
@@ -94,9 +94,35 @@ void __cpuidle default_idle_call(void)
 
                trace_cpu_idle(1, smp_processor_id());
                stop_critical_timings();
+
+               /*
+                * arch_cpu_idle() is supposed to enable IRQs, however
+                * we can't do that because of RCU and tracing.
+                *
+                * Trace IRQs enable here, then switch off RCU, and have
+                * arch_cpu_idle() use raw_local_irq_enable(). Note that
+                * rcu_idle_enter() relies on lockdep IRQ state, so switch that
+                * last -- this is very similar to the entry code.
+                */
+               trace_hardirqs_on_prepare();
+               lockdep_hardirqs_on_prepare(_THIS_IP_);
                rcu_idle_enter();
+               lockdep_hardirqs_on(_THIS_IP_);
+
                arch_cpu_idle();
+
+               /*
+                * OK, so IRQs are enabled here, but RCU needs them disabled to
+                * turn itself back on.. funny thing is that disabling IRQs
+                * will cause tracing, which needs RCU. Jump through hoops to
+                * make it 'work'.
+                */
+               raw_local_irq_disable();
+               lockdep_hardirqs_off(_THIS_IP_);
                rcu_idle_exit();
+               lockdep_hardirqs_on(_THIS_IP_);
+               raw_local_irq_enable();
+
                start_critical_timings();
                trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
        }
index e23e74d..9d8df34 100644 (file)
@@ -38,8 +38,33 @@ static void ipi_mb(void *info)
        smp_mb();       /* IPIs should be serializing but paranoid. */
 }
 
+static void ipi_sync_core(void *info)
+{
+       /*
+        * The smp_mb() in membarrier after all the IPIs is supposed to
+        * ensure that memory on remote CPUs that occur before the IPI
+        * become visible to membarrier()'s caller -- see scenario B in
+        * the big comment at the top of this file.
+        *
+        * A sync_core() would provide this guarantee, but
+        * sync_core_before_usermode() might end up being deferred until
+        * after membarrier()'s smp_mb().
+        */
+       smp_mb();       /* IPIs should be serializing but paranoid. */
+
+       sync_core_before_usermode();
+}
+
 static void ipi_rseq(void *info)
 {
+       /*
+        * Ensure that all stores done by the calling thread are visible
+        * to the current task before the current task resumes.  We could
+        * probably optimize this away on most architectures, but by the
+        * time we've already sent an IPI, the cost of the extra smp_mb()
+        * is negligible.
+        */
+       smp_mb();
        rseq_preempt(current);
 }
 
@@ -154,6 +179,7 @@ static int membarrier_private_expedited(int flags, int cpu_id)
                if (!(atomic_read(&mm->membarrier_state) &
                      MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
                        return -EPERM;
+               ipi_func = ipi_sync_core;
        } else if (flags == MEMBARRIER_FLAG_RSEQ) {
                if (!IS_ENABLED(CONFIG_RSEQ))
                        return -EINVAL;
@@ -168,7 +194,8 @@ static int membarrier_private_expedited(int flags, int cpu_id)
                        return -EPERM;
        }
 
-       if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1)
+       if (flags != MEMBARRIER_FLAG_SYNC_CORE &&
+           (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1))
                return 0;
 
        /*
@@ -187,8 +214,6 @@ static int membarrier_private_expedited(int flags, int cpu_id)
 
                if (cpu_id >= nr_cpu_ids || !cpu_online(cpu_id))
                        goto out;
-               if (cpu_id == raw_smp_processor_id())
-                       goto out;
                rcu_read_lock();
                p = rcu_dereference(cpu_rq(cpu_id)->curr);
                if (!p || p->mm != mm) {
@@ -203,16 +228,6 @@ static int membarrier_private_expedited(int flags, int cpu_id)
                for_each_online_cpu(cpu) {
                        struct task_struct *p;
 
-                       /*
-                        * Skipping the current CPU is OK even through we can be
-                        * migrated at any point. The current CPU, at the point
-                        * where we read raw_smp_processor_id(), is ensured to
-                        * be in program order with respect to the caller
-                        * thread. Therefore, we can skip this CPU from the
-                        * iteration.
-                        */
-                       if (cpu == raw_smp_processor_id())
-                               continue;
                        p = rcu_dereference(cpu_rq(cpu)->curr);
                        if (p && p->mm == mm)
                                __cpumask_set_cpu(cpu, tmpmask);
@@ -220,12 +235,38 @@ static int membarrier_private_expedited(int flags, int cpu_id)
                rcu_read_unlock();
        }
 
-       preempt_disable();
-       if (cpu_id >= 0)
+       if (cpu_id >= 0) {
+               /*
+                * smp_call_function_single() will call ipi_func() if cpu_id
+                * is the calling CPU.
+                */
                smp_call_function_single(cpu_id, ipi_func, NULL, 1);
-       else
-               smp_call_function_many(tmpmask, ipi_func, NULL, 1);
-       preempt_enable();
+       } else {
+               /*
+                * For regular membarrier, we can save a few cycles by
+                * skipping the current cpu -- we're about to do smp_mb()
+                * below, and if we migrate to a different cpu, this cpu
+                * and the new cpu will execute a full barrier in the
+                * scheduler.
+                *
+                * For SYNC_CORE, we do need a barrier on the current cpu --
+                * otherwise, if we are migrated and replaced by a different
+                * task in the same mm just before, during, or after
+                * membarrier, we will end up with some thread in the mm
+                * running without a core sync.
+                *
+                * For RSEQ, don't rseq_preempt() the caller.  User code
+                * is not supposed to issue syscalls at all from inside an
+                * rseq critical section.
+                */
+               if (flags != MEMBARRIER_FLAG_SYNC_CORE) {
+                       preempt_disable();
+                       smp_call_function_many(tmpmask, ipi_func, NULL, true);
+                       preempt_enable();
+               } else {
+                       on_each_cpu_mask(tmpmask, ipi_func, NULL, true);
+               }
+       }
 
 out:
        if (cpu_id < 0)
index 01f5d30..183cc6a 100644 (file)
@@ -37,6 +37,17 @@ void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue
 }
 EXPORT_SYMBOL(add_wait_queue_exclusive);
 
+void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
+{
+       unsigned long flags;
+
+       wq_entry->flags |= WQ_FLAG_EXCLUSIVE | WQ_FLAG_PRIORITY;
+       spin_lock_irqsave(&wq_head->lock, flags);
+       __add_wait_queue(wq_head, wq_entry);
+       spin_unlock_irqrestore(&wq_head->lock, flags);
+}
+EXPORT_SYMBOL_GPL(add_wait_queue_priority);
+
 void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
        unsigned long flags;
@@ -57,7 +68,11 @@ EXPORT_SYMBOL(remove_wait_queue);
 /*
  * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
  * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
- * number) then we wake all the non-exclusive tasks and one exclusive task.
+ * number) then we wake that number of exclusive tasks, and potentially all
+ * the non-exclusive tasks. Normally, exclusive tasks will be at the end of
+ * the list and any non-exclusive tasks will be woken first. A priority task
+ * may be at the head of the list, and can consume the event without any other
+ * tasks being woken.
  *
  * There are circumstances in which we can try to wake a task which has already
  * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
index 8ad7a29..53a7d15 100644 (file)
@@ -38,7 +38,7 @@
 #include <linux/filter.h>
 #include <linux/pid.h>
 #include <linux/ptrace.h>
-#include <linux/security.h>
+#include <linux/capability.h>
 #include <linux/tracehook.h>
 #include <linux/uaccess.h>
 #include <linux/anon_inodes.h>
@@ -558,8 +558,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
         * behavior of privileged children.
         */
        if (!task_no_new_privs(current) &&
-           security_capable(current_cred(), current_user_ns(),
-                                    CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) != 0)
+                       !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
                return ERR_PTR(-EACCES);
 
        /* Allocate a new seccomp_filter */
index a4020c0..e1bf522 100644 (file)
@@ -202,7 +202,7 @@ config DYNAMIC_FTRACE_WITH_REGS
 
 config DYNAMIC_FTRACE_WITH_DIRECT_CALLS
        def_bool y
-       depends on DYNAMIC_FTRACE
+       depends on DYNAMIC_FTRACE_WITH_REGS
        depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 
 config FUNCTION_PROFILER
index 4517c8b..a125ea5 100644 (file)
@@ -181,6 +181,16 @@ bpf_probe_read_user_str_common(void *dst, u32 size,
 {
        int ret;
 
+       /*
+        * NB: We rely on strncpy_from_user() not copying junk past the NUL
+        * terminator into `dst`.
+        *
+        * strncpy_from_user() does long-sized strides in the fast path. If the
+        * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
+        * then there could be junk after the NUL in `dst`. If user takes `dst`
+        * and keys a hash map with it, then semantically identical strings can
+        * occupy multiple entries in the map.
+        */
        ret = strncpy_from_user_nofault(dst, unsafe_ptr, size);
        if (unlikely(ret < 0))
                memset(dst, 0, size);
@@ -1198,7 +1208,7 @@ static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
        *btf = bpf_get_btf_vmlinux();
 
        if (IS_ERR_OR_NULL(*btf))
-               return PTR_ERR(*btf);
+               return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL;
 
        if (ptr->type_id > 0)
                *btf_id = ptr->type_id;
@@ -1327,9 +1337,9 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
        case BPF_FUNC_snprintf_btf:
                return &bpf_snprintf_btf_proto;
-       case BPF_FUNC_bpf_per_cpu_ptr:
+       case BPF_FUNC_per_cpu_ptr:
                return &bpf_per_cpu_ptr_proto;
-       case BPF_FUNC_bpf_this_cpu_ptr:
+       case BPF_FUNC_this_cpu_ptr:
                return &bpf_this_cpu_ptr_proto;
        default:
                return NULL;
index 8185f72..9c1bba8 100644 (file)
@@ -1629,6 +1629,8 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec)
 static struct ftrace_ops *
 ftrace_find_tramp_ops_any(struct dyn_ftrace *rec);
 static struct ftrace_ops *
+ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_exclude);
+static struct ftrace_ops *
 ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
 
 static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
@@ -1778,7 +1780,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
                         * to it.
                         */
                        if (ftrace_rec_count(rec) == 1 &&
-                           ftrace_find_tramp_ops_any(rec))
+                           ftrace_find_tramp_ops_any_other(rec, ops))
                                rec->flags |= FTRACE_FL_TRAMP;
                        else
                                rec->flags &= ~FTRACE_FL_TRAMP;
@@ -2245,6 +2247,24 @@ ftrace_find_tramp_ops_any(struct dyn_ftrace *rec)
 }
 
 static struct ftrace_ops *
+ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_exclude)
+{
+       struct ftrace_ops *op;
+       unsigned long ip = rec->ip;
+
+       do_for_each_ftrace_op(op, ftrace_ops_list) {
+
+               if (op == op_exclude || !op->trampoline)
+                       continue;
+
+               if (hash_contains_ip(ip, op->func_hash))
+                       return op;
+       } while_for_each_ftrace_op(op);
+
+       return NULL;
+}
+
+static struct ftrace_ops *
 ftrace_find_tramp_ops_next(struct dyn_ftrace *rec,
                           struct ftrace_ops *op)
 {
index dc83b3f..a6268e0 100644 (file)
@@ -3234,14 +3234,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 
        /* See if we shot pass the end of this buffer page */
        if (unlikely(write > BUF_PAGE_SIZE)) {
-               if (tail != w) {
-                       /* before and after may now different, fix it up*/
-                       b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before);
-                       a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
-                       if (a_ok && b_ok && info->before != info->after)
-                               (void)rb_time_cmpxchg(&cpu_buffer->before_stamp,
-                                                     info->before, info->after);
-               }
+               /* before and after may now different, fix it up*/
+               b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before);
+               a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
+               if (a_ok && b_ok && info->before != info->after)
+                       (void)rb_time_cmpxchg(&cpu_buffer->before_stamp,
+                                             info->before, info->after);
                return rb_move_tail(cpu_buffer, tail, info);
        }
 
@@ -3287,11 +3285,11 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
                ts = rb_time_stamp(cpu_buffer->buffer);
                barrier();
  /*E*/         if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) &&
-                   info->after < ts) {
+                   info->after < ts &&
+                   rb_time_cmpxchg(&cpu_buffer->write_stamp,
+                                   info->after, ts)) {
                        /* Nothing came after this event between C and E */
                        info->delta = ts - info->after;
-                       (void)rb_time_cmpxchg(&cpu_buffer->write_stamp,
-                                             info->after, info->ts);
                        info->ts = ts;
                } else {
                        /*
index 410cfeb..0613418 100644 (file)
@@ -163,7 +163,8 @@ static union trace_eval_map_item *trace_eval_maps;
 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
 
 int tracing_set_tracer(struct trace_array *tr, const char *buf);
-static void ftrace_trace_userstack(struct trace_buffer *buffer,
+static void ftrace_trace_userstack(struct trace_array *tr,
+                                  struct trace_buffer *buffer,
                                   unsigned long flags, int pc);
 
 #define MAX_TRACER_SIZE                100
@@ -2870,7 +2871,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
         * two. They are not that meaningful.
         */
        ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
-       ftrace_trace_userstack(buffer, flags, pc);
+       ftrace_trace_userstack(tr, buffer, flags, pc);
 }
 
 /*
@@ -3056,13 +3057,14 @@ EXPORT_SYMBOL_GPL(trace_dump_stack);
 static DEFINE_PER_CPU(int, user_stack_count);
 
 static void
-ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
+ftrace_trace_userstack(struct trace_array *tr,
+                      struct trace_buffer *buffer, unsigned long flags, int pc)
 {
        struct trace_event_call *call = &event_user_stack;
        struct ring_buffer_event *event;
        struct userstack_entry *entry;
 
-       if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
+       if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
                return;
 
        /*
@@ -3101,7 +3103,8 @@ ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
        preempt_enable();
 }
 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
-static void ftrace_trace_userstack(struct trace_buffer *buffer,
+static void ftrace_trace_userstack(struct trace_array *tr,
+                                  struct trace_buffer *buffer,
                                   unsigned long flags, int pc)
 {
 }
@@ -3534,7 +3537,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
 }
 
 #define STATIC_TEMP_BUF_SIZE   128
-static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
+static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
 
 /* Find the next real entry, without updating the iterator itself */
 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
index c9ad5c6..d071fc2 100644 (file)
@@ -368,7 +368,7 @@ static int start_kthread(struct trace_array *tr)
        struct task_struct *kthread;
        int next_cpu;
 
-       if (WARN_ON(hwlat_kthread))
+       if (hwlat_kthread)
                return 0;
 
        /* Just pick the first CPU on first iteration */
index ce45af5..d415fc7 100644 (file)
@@ -107,7 +107,7 @@ obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o
 # off the generation of FPU/SSE* instructions for kernel proper but FPU_FLAGS
 # get appended last to CFLAGS and thus override those previous compiler options.
 #
-FPU_CFLAGS := -mhard-float -msse -msse2
+FPU_CFLAGS := -msse -msse2
 ifdef CONFIG_CC_IS_GCC
 # Stack alignment mismatch, proceed with caution.
 # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
@@ -120,6 +120,7 @@ ifdef CONFIG_CC_IS_GCC
 #  -mpreferred-stack-boundary=3 is not between 4 and 12
 #
 # can be triggered. Otherwise gcc doesn't complain.
+FPU_CFLAGS += -mhard-float
 FPU_CFLAGS += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-stack-boundary=4)
 endif
 
index e6d5fcc..122d8d0 100644 (file)
@@ -35,17 +35,32 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src,
                goto byte_at_a_time;
 
        while (max >= sizeof(unsigned long)) {
-               unsigned long c, data;
+               unsigned long c, data, mask;
 
                /* Fall back to byte-at-a-time if we get a page fault */
                unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time);
 
-               *(unsigned long *)(dst+res) = c;
+               /*
+                * Note that we mask out the bytes following the NUL. This is
+                * important to do because string oblivious code may read past
+                * the NUL. For those routines, we don't want to give them
+                * potentially random bytes after the NUL in `src`.
+                *
+                * One example of such code is BPF map keys. BPF treats map keys
+                * as an opaque set of bytes. Without the post-NUL mask, any BPF
+                * maps keyed by strings returned from strncpy_from_user() may
+                * have multiple entries for semantically identical strings.
+                */
                if (has_zero(c, &data, &constants)) {
                        data = prep_zero_mask(c, data, &constants);
                        data = create_zero_mask(data);
+                       mask = zero_bytemask(data);
+                       *(unsigned long *)(dst+res) = c & mask;
                        return res + find_zero(data);
                }
+
+               *(unsigned long *)(dst+res) = c;
+
                res += sizeof(unsigned long);
                max -= sizeof(unsigned long);
        }
index 8533d2f..ba13e92 100644 (file)
@@ -7,6 +7,7 @@
 
 static int collect_syscall(struct task_struct *target, struct syscall_info *info)
 {
+       unsigned long args[6] = { };
        struct pt_regs *regs;
 
        if (!try_get_task_stack(target)) {
@@ -27,8 +28,14 @@ static int collect_syscall(struct task_struct *target, struct syscall_info *info
 
        info->data.nr = syscall_get_nr(target, regs);
        if (info->data.nr != -1L)
-               syscall_get_arguments(target, regs,
-                                     (unsigned long *)&info->data.args[0]);
+               syscall_get_arguments(target, regs, args);
+
+       info->data.args[0] = args[0];
+       info->data.args[1] = args[1];
+       info->data.args[2] = args[2];
+       info->data.args[3] = args[3];
+       info->data.args[4] = args[4];
+       info->data.args[5] = args[5];
 
        put_task_stack(target);
        return 0;
index aa9ef23..db10701 100644 (file)
@@ -4,6 +4,7 @@
 #include "dfltcc_util.h"
 #include "dfltcc.h"
 #include <asm/setup.h>
+#include <linux/export.h>
 #include <linux/zutil.h>
 
 /*
@@ -29,6 +30,7 @@ int dfltcc_can_inflate(
     return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) &&
                is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0);
 }
+EXPORT_SYMBOL(dfltcc_can_inflate);
 
 static int dfltcc_was_inflate_used(
     z_streamp strm
@@ -147,3 +149,4 @@ dfltcc_inflate_action dfltcc_inflate(
     return (cc == DFLTCC_CC_OP1_TOO_SHORT || cc == DFLTCC_CC_OP2_TOO_SHORT) ?
         DFLTCC_INFLATE_BREAK : DFLTCC_INFLATE_CONTINUE;
 }
+EXPORT_SYMBOL(dfltcc_inflate);
index d42423f..390165f 100644 (file)
@@ -707,19 +707,6 @@ config ZSMALLOC
          returned by an alloc().  This handle must be mapped in order to
          access the allocated space.
 
-config ZSMALLOC_PGTABLE_MAPPING
-       bool "Use page table mapping to access object in zsmalloc"
-       depends on ZSMALLOC=y
-       help
-         By default, zsmalloc uses a copy-based object mapping method to
-         access allocations that span two pages. However, if a particular
-         architecture (ex, ARM) performs VM mapping faster than copying,
-         then you should select this. This causes zsmalloc to use page table
-         mapping rather than copying for object mapping.
-
-         You can check speed with zsmalloc benchmark:
-         https://github.com/spartacus06/zsmapbench
-
 config ZSMALLOC_STAT
        bool "Export zsmalloc statistics"
        depends on ZSMALLOC
index d5e7c20..0b2067b 100644 (file)
@@ -1484,11 +1484,19 @@ void end_page_writeback(struct page *page)
                rotate_reclaimable_page(page);
        }
 
+       /*
+        * Writeback does not hold a page reference of its own, relying
+        * on truncation to wait for the clearing of PG_writeback.
+        * But here we must make sure that the page is not freed and
+        * reused before the wake_up_page().
+        */
+       get_page(page);
        if (!test_clear_page_writeback(page))
                BUG();
 
        smp_mb__after_atomic();
        wake_up_page(page, PG_writeback);
+       put_page(page);
 }
 EXPORT_SYMBOL(end_page_writeback);
 
@@ -2347,10 +2355,15 @@ page_ok:
 
 page_not_up_to_date:
                /* Get exclusive access to the page ... */
-               if (iocb->ki_flags & IOCB_WAITQ)
+               if (iocb->ki_flags & IOCB_WAITQ) {
+                       if (written) {
+                               put_page(page);
+                               goto out;
+                       }
                        error = lock_page_async(page, iocb->ki_waitq);
-               else
+               } else {
                        error = lock_page_killable(page);
+               }
                if (unlikely(error))
                        goto readpage_error;
 
@@ -2393,10 +2406,15 @@ readpage:
                }
 
                if (!PageUptodate(page)) {
-                       if (iocb->ki_flags & IOCB_WAITQ)
+                       if (iocb->ki_flags & IOCB_WAITQ) {
+                               if (written) {
+                                       put_page(page);
+                                       goto out;
+                               }
                                error = lock_page_async(page, iocb->ki_waitq);
-                       else
+                       } else {
                                error = lock_page_killable(page);
+                       }
 
                        if (unlikely(error))
                                goto readpage_error;
index 9474dbc..ec2bb93 100644 (file)
@@ -710,7 +710,6 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
                        transparent_hugepage_use_zero_page()) {
                pgtable_t pgtable;
                struct page *zero_page;
-               bool set;
                vm_fault_t ret;
                pgtable = pte_alloc_one(vma->vm_mm);
                if (unlikely(!pgtable))
@@ -723,25 +722,25 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
                }
                vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
                ret = 0;
-               set = false;
                if (pmd_none(*vmf->pmd)) {
                        ret = check_stable_address_space(vma->vm_mm);
                        if (ret) {
                                spin_unlock(vmf->ptl);
+                               pte_free(vma->vm_mm, pgtable);
                        } else if (userfaultfd_missing(vma)) {
                                spin_unlock(vmf->ptl);
+                               pte_free(vma->vm_mm, pgtable);
                                ret = handle_userfault(vmf, VM_UFFD_MISSING);
                                VM_BUG_ON(ret & VM_FAULT_FALLBACK);
                        } else {
                                set_huge_zero_page(pgtable, vma->vm_mm, vma,
                                                   haddr, vmf->pmd, zero_page);
                                spin_unlock(vmf->ptl);
-                               set = true;
                        }
-               } else
+               } else {
                        spin_unlock(vmf->ptl);
-               if (!set)
                        pte_free(vma->vm_mm, pgtable);
+               }
                return ret;
        }
        gfp = alloc_hugepage_direct_gfpmask(vma);
index 37f15c3..d029d93 100644 (file)
@@ -1216,6 +1216,7 @@ static void destroy_compound_gigantic_page(struct page *page,
        }
 
        set_compound_order(page, 0);
+       page[1].compound_nr = 0;
        __ClearPageHead(page);
 }
 
index 1f87aec..9182848 100644 (file)
@@ -82,11 +82,8 @@ static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
 
        for (idx = 0; idx < hugetlb_max_hstate; idx++) {
                if (page_counter_read(
-                           hugetlb_cgroup_counter_from_cgroup(h_cg, idx)) ||
-                   page_counter_read(hugetlb_cgroup_counter_from_cgroup_rsvd(
-                           h_cg, idx))) {
+                               hugetlb_cgroup_counter_from_cgroup(h_cg, idx)))
                        return true;
-               }
        }
        return false;
 }
@@ -202,9 +199,10 @@ static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
        struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
        struct hstate *h;
        struct page *page;
-       int idx = 0;
+       int idx;
 
        do {
+               idx = 0;
                for_each_hstate(h) {
                        spin_lock(&hugetlb_lock);
                        list_for_each_entry(page, &h->hugepage_activelist, lru)
index 4c53758..0e3f849 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/srcu.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <linux/cpuhotplug.h>
 
 #include "../slab.h"
 #include "kasan.h"
@@ -43,6 +44,7 @@ struct qlist_head {
        struct qlist_node *head;
        struct qlist_node *tail;
        size_t bytes;
+       bool offline;
 };
 
 #define QLIST_INIT { NULL, NULL, 0 }
@@ -188,6 +190,10 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
        local_irq_save(flags);
 
        q = this_cpu_ptr(&cpu_quarantine);
+       if (q->offline) {
+               local_irq_restore(flags);
+               return;
+       }
        qlist_put(q, &info->quarantine_link, cache->size);
        if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) {
                qlist_move_all(q, &temp);
@@ -328,3 +334,36 @@ void quarantine_remove_cache(struct kmem_cache *cache)
 
        synchronize_srcu(&remove_cache_srcu);
 }
+
+static int kasan_cpu_online(unsigned int cpu)
+{
+       this_cpu_ptr(&cpu_quarantine)->offline = false;
+       return 0;
+}
+
+static int kasan_cpu_offline(unsigned int cpu)
+{
+       struct qlist_head *q;
+
+       q = this_cpu_ptr(&cpu_quarantine);
+       /* Ensure the ordering between the writing to q->offline and
+        * qlist_free_all. Otherwise, cpu_quarantine may be corrupted
+        * by interrupt.
+        */
+       WRITE_ONCE(q->offline, true);
+       barrier();
+       qlist_free_all(q, NULL);
+       return 0;
+}
+
+static int __init kasan_cpu_quarantine_init(void)
+{
+       int ret = 0;
+
+       ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/kasan:online",
+                               kasan_cpu_online, kasan_cpu_offline);
+       if (ret < 0)
+               pr_err("kasan cpu quarantine register failed [%d]\n", ret);
+       return ret;
+}
+late_initcall(kasan_cpu_quarantine_init);
index 5aa6e44..fe23008 100644 (file)
@@ -534,7 +534,6 @@ static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
        struct list_lru_node *nlru = &lru->node[nid];
        int dst_idx = dst_memcg->kmemcg_id;
        struct list_lru_one *src, *dst;
-       bool set;
 
        /*
         * Since list_lru_{add,del} may be called under an IRQ-safe lock,
@@ -546,11 +545,12 @@ static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
        dst = list_lru_from_memcg_idx(nlru, dst_idx);
 
        list_splice_init(&src->list, &dst->list);
-       set = (!dst->nr_items && src->nr_items);
-       dst->nr_items += src->nr_items;
-       if (set)
+
+       if (src->nr_items) {
+               dst->nr_items += src->nr_items;
                memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
-       src->nr_items = 0;
+               src->nr_items = 0;
+       }
 
        spin_unlock_irq(&nlru->lock);
 }
index 416a56b..13f5677 100644 (file)
@@ -226,7 +226,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
                struct address_space *mapping)
 {
        XA_STATE(xas, &mapping->i_pages, linear_page_index(vma, start));
-       pgoff_t end_index = end / PAGE_SIZE;
+       pgoff_t end_index = linear_page_index(vma, end + PAGE_SIZE - 1);
        struct page *page;
 
        rcu_read_lock();
@@ -1204,8 +1204,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
                goto put_pid;
        }
 
-       if (task->mm != current->mm &&
-                       !process_madvise_behavior_valid(behavior)) {
+       if (!process_madvise_behavior_valid(behavior)) {
                ret = -EINVAL;
                goto release_task;
        }
@@ -1231,8 +1230,6 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
                ret = total_len - iov_iter_count(&iter);
 
        mmput(mm);
-       return ret;
-
 release_task:
        put_task_struct(task);
 put_pid:
index 3dcbf24..29459a6 100644 (file)
@@ -867,8 +867,13 @@ void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val)
        rcu_read_lock();
        memcg = mem_cgroup_from_obj(p);
 
-       /* Untracked pages have no memcg, no lruvec. Update only the node */
-       if (!memcg || memcg == root_mem_cgroup) {
+       /*
+        * Untracked pages have no memcg, no lruvec. Update only the
+        * node. If we reparent the slab objects to the root memcg,
+        * when we free the slab object, we need to update the per-memcg
+        * vmstats to keep it correct for the root memcg.
+        */
+       if (!memcg) {
                __mod_node_page_state(pgdat, idx, val);
        } else {
                lruvec = mem_cgroup_lruvec(memcg, pgdat);
index b44d4c7..63b2e46 100644 (file)
@@ -350,24 +350,6 @@ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages,
        return err;
 }
 
-#ifdef CONFIG_NUMA
-int __weak memory_add_physaddr_to_nid(u64 start)
-{
-       pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n",
-                       start);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-
-int __weak phys_to_target_node(u64 start)
-{
-       pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n",
-                       start);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(phys_to_target_node);
-#endif
-
 /* find the smallest valid pfn in the range [start_pfn, end_pfn) */
 static unsigned long find_smallest_section_pfn(int nid, struct zone *zone,
                                     unsigned long start_pfn,
index d91ecb0..5c8b448 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1808,6 +1808,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
                if (error)
                        goto unmap_and_free_vma;
 
+               /* Can addr have changed??
+                *
+                * Answer: Yes, several device drivers can do it in their
+                *         f_op->mmap method. -DaveM
+                * Bug: If addr is changed, prev, rb_link, rb_parent should
+                *      be updated for vma_link()
+                */
+               WARN_ON_ONCE(addr != vma->vm_start);
+
+               addr = vma->vm_start;
+
                /* If vm_flags changed after call_mmap(), we should try merge vma again
                 * as we may succeed this time.
                 */
@@ -1822,25 +1833,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
                                fput(vma->vm_file);
                                vm_area_free(vma);
                                vma = merge;
-                               /* Update vm_flags and possible addr to pick up the change. We don't
-                                * warn here if addr changed as the vma is not linked by vma_link().
-                                */
-                               addr = vma->vm_start;
+                               /* Update vm_flags to pick up the change. */
                                vm_flags = vma->vm_flags;
                                goto unmap_writable;
                        }
                }
 
-               /* Can addr have changed??
-                *
-                * Answer: Yes, several device drivers can do it in their
-                *         f_op->mmap method. -DaveM
-                * Bug: If addr is changed, prev, rb_link, rb_parent should
-                *      be updated for vma_link()
-                */
-               WARN_ON_ONCE(addr != vma->vm_start);
-
-               addr = vma->vm_start;
                vm_flags = vma->vm_flags;
        } else if (vm_flags & VM_SHARED) {
                error = shmem_zero_setup(vma);
index 7709f0e..5860424 100644 (file)
@@ -2754,12 +2754,6 @@ int test_clear_page_writeback(struct page *page)
        } else {
                ret = TestClearPageWriteback(page);
        }
-       /*
-        * NOTE: Page might be free now! Writeback doesn't hold a page
-        * reference on its own, it relies on truncation to wait for
-        * the clearing of PG_writeback. The below can only access
-        * page state that is static across allocation cycles.
-        */
        if (ret) {
                dec_lruvec_state(lruvec, NR_WRITEBACK);
                dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
index 23f5066..eaa227a 100644 (file)
@@ -5103,6 +5103,11 @@ refill:
                if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
                        goto refill;
 
+               if (unlikely(nc->pfmemalloc)) {
+                       free_the_page(page, compound_order(page));
+                       goto refill;
+               }
+
 #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
                /* if size can vary use size else just use PAGE_SIZE */
                size = nc->size;
index 6d7c6a5..f9977d6 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -274,22 +274,32 @@ static inline size_t obj_full_size(struct kmem_cache *s)
        return s->size + sizeof(struct obj_cgroup *);
 }
 
-static inline struct obj_cgroup *memcg_slab_pre_alloc_hook(struct kmem_cache *s,
-                                                          size_t objects,
-                                                          gfp_t flags)
+/*
+ * Returns false if the allocation should fail.
+ */
+static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
+                                            struct obj_cgroup **objcgp,
+                                            size_t objects, gfp_t flags)
 {
        struct obj_cgroup *objcg;
 
+       if (!memcg_kmem_enabled())
+               return true;
+
+       if (!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT))
+               return true;
+
        objcg = get_obj_cgroup_from_current();
        if (!objcg)
-               return NULL;
+               return true;
 
        if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s))) {
                obj_cgroup_put(objcg);
-               return NULL;
+               return false;
        }
 
-       return objcg;
+       *objcgp = objcg;
+       return true;
 }
 
 static inline void mod_objcg_state(struct obj_cgroup *objcg,
@@ -315,7 +325,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
        unsigned long off;
        size_t i;
 
-       if (!objcg)
+       if (!memcg_kmem_enabled() || !objcg)
                return;
 
        flags &= ~__GFP_ACCOUNT;
@@ -400,11 +410,11 @@ static inline void memcg_free_page_obj_cgroups(struct page *page)
 {
 }
 
-static inline struct obj_cgroup *memcg_slab_pre_alloc_hook(struct kmem_cache *s,
-                                                          size_t objects,
-                                                          gfp_t flags)
+static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
+                                            struct obj_cgroup **objcgp,
+                                            size_t objects, gfp_t flags)
 {
-       return NULL;
+       return true;
 }
 
 static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
@@ -508,9 +518,8 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
        if (should_failslab(s, flags))
                return NULL;
 
-       if (memcg_kmem_enabled() &&
-           ((flags & __GFP_ACCOUNT) || (s->flags & SLAB_ACCOUNT)))
-               *objcgp = memcg_slab_pre_alloc_hook(s, size, flags);
+       if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags))
+               return NULL;
 
        return s;
 }
@@ -529,8 +538,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
                                         s->flags, flags);
        }
 
-       if (memcg_kmem_enabled())
-               memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
+       memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
 }
 
 #ifndef CONFIG_SLOB
index c4a6136..d583611 100644 (file)
@@ -2867,6 +2867,7 @@ late_initcall(max_swapfiles_check);
 static struct swap_info_struct *alloc_swap_info(void)
 {
        struct swap_info_struct *p;
+       struct swap_info_struct *defer = NULL;
        unsigned int type;
        int i;
 
@@ -2895,7 +2896,7 @@ static struct swap_info_struct *alloc_swap_info(void)
                smp_wmb();
                WRITE_ONCE(nr_swapfiles, nr_swapfiles + 1);
        } else {
-               kvfree(p);
+               defer = p;
                p = swap_info[type];
                /*
                 * Do not memset this entry: a racing procfs swap_next()
@@ -2908,6 +2909,7 @@ static struct swap_info_struct *alloc_swap_info(void)
                plist_node_init(&p->avail_lists[i], 0);
        p->flags = SWP_USED;
        spin_unlock(&swap_lock);
+       kvfree(defer);
        spin_lock_init(&p->lock);
        spin_lock_init(&p->cont_lock);
 
index 918c7b0..cdfaaad 100644 (file)
@@ -293,11 +293,7 @@ struct zspage {
 };
 
 struct mapping_area {
-#ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING
-       struct vm_struct *vm; /* vm area for mapping object that span pages */
-#else
        char *vm_buf; /* copy buffer for objects that span pages */
-#endif
        char *vm_addr; /* address of kmap_atomic()'ed pages */
        enum zs_mapmode vm_mm; /* mapping mode */
 };
@@ -1113,54 +1109,6 @@ static struct zspage *find_get_zspage(struct size_class *class)
        return zspage;
 }
 
-#ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING
-static inline int __zs_cpu_up(struct mapping_area *area)
-{
-       /*
-        * Make sure we don't leak memory if a cpu UP notification
-        * and zs_init() race and both call zs_cpu_up() on the same cpu
-        */
-       if (area->vm)
-               return 0;
-       area->vm = get_vm_area(PAGE_SIZE * 2, 0);
-       if (!area->vm)
-               return -ENOMEM;
-
-       /*
-        * Populate ptes in advance to avoid pte allocation with GFP_KERNEL
-        * in non-preemtible context of zs_map_object.
-        */
-       return apply_to_page_range(&init_mm, (unsigned long)area->vm->addr,
-                       PAGE_SIZE * 2, NULL, NULL);
-}
-
-static inline void __zs_cpu_down(struct mapping_area *area)
-{
-       if (area->vm)
-               free_vm_area(area->vm);
-       area->vm = NULL;
-}
-
-static inline void *__zs_map_object(struct mapping_area *area,
-                               struct page *pages[2], int off, int size)
-{
-       unsigned long addr = (unsigned long)area->vm->addr;
-
-       BUG_ON(map_kernel_range(addr, PAGE_SIZE * 2, PAGE_KERNEL, pages) < 0);
-       area->vm_addr = area->vm->addr;
-       return area->vm_addr + off;
-}
-
-static inline void __zs_unmap_object(struct mapping_area *area,
-                               struct page *pages[2], int off, int size)
-{
-       unsigned long addr = (unsigned long)area->vm_addr;
-
-       unmap_kernel_range(addr, PAGE_SIZE * 2);
-}
-
-#else /* CONFIG_ZSMALLOC_PGTABLE_MAPPING */
-
 static inline int __zs_cpu_up(struct mapping_area *area)
 {
        /*
@@ -1241,8 +1189,6 @@ out:
        pagefault_enable();
 }
 
-#endif /* CONFIG_ZSMALLOC_PGTABLE_MAPPING */
-
 static int zs_cpu_prepare(unsigned int cpu)
 {
        struct mapping_area *area;
index 9a47ef8..1f1f5b0 100644 (file)
@@ -391,6 +391,7 @@ out:
 
 /**
  * batadv_frag_create() - create a fragment from skb
+ * @net_dev: outgoing device for fragment
  * @skb: skb to create fragment from
  * @frag_head: header to use in new fragment
  * @fragment_size: size of new fragment
@@ -401,22 +402,25 @@ out:
  *
  * Return: the new fragment, NULL on error.
  */
-static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
+static struct sk_buff *batadv_frag_create(struct net_device *net_dev,
+                                         struct sk_buff *skb,
                                          struct batadv_frag_packet *frag_head,
                                          unsigned int fragment_size)
 {
+       unsigned int ll_reserved = LL_RESERVED_SPACE(net_dev);
+       unsigned int tailroom = net_dev->needed_tailroom;
        struct sk_buff *skb_fragment;
        unsigned int header_size = sizeof(*frag_head);
        unsigned int mtu = fragment_size + header_size;
 
-       skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN);
+       skb_fragment = dev_alloc_skb(ll_reserved + mtu + tailroom);
        if (!skb_fragment)
                goto err;
 
        skb_fragment->priority = skb->priority;
 
        /* Eat the last mtu-bytes of the skb */
-       skb_reserve(skb_fragment, header_size + ETH_HLEN);
+       skb_reserve(skb_fragment, ll_reserved + header_size);
        skb_split(skb, skb_fragment, skb->len - fragment_size);
 
        /* Add the header */
@@ -439,11 +443,12 @@ int batadv_frag_send_packet(struct sk_buff *skb,
                            struct batadv_orig_node *orig_node,
                            struct batadv_neigh_node *neigh_node)
 {
+       struct net_device *net_dev = neigh_node->if_incoming->net_dev;
        struct batadv_priv *bat_priv;
        struct batadv_hard_iface *primary_if = NULL;
        struct batadv_frag_packet frag_header;
        struct sk_buff *skb_fragment;
-       unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
+       unsigned int mtu = net_dev->mtu;
        unsigned int header_size = sizeof(frag_header);
        unsigned int max_fragment_size, num_fragments;
        int ret;
@@ -503,7 +508,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
                        goto put_primary_if;
                }
 
-               skb_fragment = batadv_frag_create(skb, &frag_header,
+               skb_fragment = batadv_frag_create(net_dev, skb, &frag_header,
                                                  max_fragment_size);
                if (!skb_fragment) {
                        ret = -ENOMEM;
@@ -522,13 +527,14 @@ int batadv_frag_send_packet(struct sk_buff *skb,
                frag_header.no++;
        }
 
-       /* Make room for the fragment header. */
-       if (batadv_skb_head_push(skb, header_size) < 0 ||
-           pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) {
-               ret = -ENOMEM;
+       /* make sure that there is at least enough head for the fragmentation
+        * and ethernet headers
+        */
+       ret = skb_cow_head(skb, ETH_HLEN + header_size);
+       if (ret < 0)
                goto put_primary_if;
-       }
 
+       skb_push(skb, header_size);
        memcpy(skb->data, &frag_header, header_size);
 
        /* Send the last fragment */
index dad9964..3390459 100644 (file)
@@ -554,6 +554,9 @@ static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface)
        needed_headroom = lower_headroom + (lower_header_len - ETH_HLEN);
        needed_headroom += batadv_max_header_len();
 
+       /* fragmentation headers don't strip the unicast/... header */
+       needed_headroom += sizeof(struct batadv_frag_packet);
+
        soft_iface->needed_headroom = needed_headroom;
        soft_iface->needed_tailroom = lower_tailroom;
 }
index a67b2b0..c0ca5fb 100644 (file)
@@ -180,6 +180,7 @@ static const struct file_operations batadv_log_fops = {
        .read           = batadv_log_read,
        .poll           = batadv_log_poll,
        .llseek         = no_llseek,
+       .owner          = THIS_MODULE,
 };
 
 /**
index 6f742fe..d3ea9d0 100644 (file)
@@ -177,6 +177,9 @@ static int br_dev_open(struct net_device *dev)
        br_stp_enable_bridge(br);
        br_multicast_open(br);
 
+       if (br_opt_get(br, BROPT_MULTICAST_ENABLED))
+               br_multicast_join_snoopers(br);
+
        return 0;
 }
 
@@ -197,6 +200,9 @@ static int br_dev_stop(struct net_device *dev)
        br_stp_disable_bridge(br);
        br_multicast_stop(br);
 
+       if (br_opt_get(br, BROPT_MULTICAST_ENABLED))
+               br_multicast_leave_snoopers(br);
+
        netif_stop_queue(dev);
 
        return 0;
@@ -207,6 +213,7 @@ static void br_get_stats64(struct net_device *dev,
 {
        struct net_bridge *br = netdev_priv(dev);
 
+       netdev_stats_to_stats64(stats, &dev->stats);
        dev_fetch_sw_netstats(stats, br->stats);
 }
 
index eae898c..54cb82a 100644 (file)
@@ -3286,7 +3286,7 @@ static inline void br_ip6_multicast_join_snoopers(struct net_bridge *br)
 }
 #endif
 
-static void br_multicast_join_snoopers(struct net_bridge *br)
+void br_multicast_join_snoopers(struct net_bridge *br)
 {
        br_ip4_multicast_join_snoopers(br);
        br_ip6_multicast_join_snoopers(br);
@@ -3317,7 +3317,7 @@ static inline void br_ip6_multicast_leave_snoopers(struct net_bridge *br)
 }
 #endif
 
-static void br_multicast_leave_snoopers(struct net_bridge *br)
+void br_multicast_leave_snoopers(struct net_bridge *br)
 {
        br_ip4_multicast_leave_snoopers(br);
        br_ip6_multicast_leave_snoopers(br);
@@ -3336,9 +3336,6 @@ static void __br_multicast_open(struct net_bridge *br,
 
 void br_multicast_open(struct net_bridge *br)
 {
-       if (br_opt_get(br, BROPT_MULTICAST_ENABLED))
-               br_multicast_join_snoopers(br);
-
        __br_multicast_open(br, &br->ip4_own_query);
 #if IS_ENABLED(CONFIG_IPV6)
        __br_multicast_open(br, &br->ip6_own_query);
@@ -3354,9 +3351,6 @@ void br_multicast_stop(struct net_bridge *br)
        del_timer_sync(&br->ip6_other_query.timer);
        del_timer_sync(&br->ip6_own_query.timer);
 #endif
-
-       if (br_opt_get(br, BROPT_MULTICAST_ENABLED))
-               br_multicast_leave_snoopers(br);
 }
 
 void br_multicast_dev_del(struct net_bridge *br)
@@ -3487,6 +3481,7 @@ static void br_multicast_start_querier(struct net_bridge *br,
 int br_multicast_toggle(struct net_bridge *br, unsigned long val)
 {
        struct net_bridge_port *port;
+       bool change_snoopers = false;
 
        spin_lock_bh(&br->multicast_lock);
        if (!!br_opt_get(br, BROPT_MULTICAST_ENABLED) == !!val)
@@ -3495,7 +3490,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
        br_mc_disabled_update(br->dev, val);
        br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val);
        if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) {
-               br_multicast_leave_snoopers(br);
+               change_snoopers = true;
                goto unlock;
        }
 
@@ -3506,9 +3501,30 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
        list_for_each_entry(port, &br->port_list, list)
                __br_multicast_enable_port(port);
 
+       change_snoopers = true;
+
 unlock:
        spin_unlock_bh(&br->multicast_lock);
 
+       /* br_multicast_join_snoopers has the potential to cause
+        * an MLD Report/Leave to be delivered to br_multicast_rcv,
+        * which would in turn call br_multicast_add_group, which would
+        * attempt to acquire multicast_lock. This function should be
+        * called after the lock has been released to avoid deadlocks on
+        * multicast_lock.
+        *
+        * br_multicast_leave_snoopers does not have the problem since
+        * br_multicast_rcv first checks BROPT_MULTICAST_ENABLED, and
+        * returns without calling br_multicast_ipv4/6_rcv if it's not
+        * enabled. Moved both functions out just for symmetry.
+        */
+       if (change_snoopers) {
+               if (br_opt_get(br, BROPT_MULTICAST_ENABLED))
+                       br_multicast_join_snoopers(br);
+               else
+                       br_multicast_leave_snoopers(br);
+       }
+
        return 0;
 }
 
index 04c3f9a..8edfb98 100644 (file)
@@ -735,6 +735,11 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
        mtu_reserved = nf_bridge_mtu_reduction(skb);
        mtu = skb->dev->mtu;
 
+       if (nf_bridge->pkt_otherhost) {
+               skb->pkt_type = PACKET_OTHERHOST;
+               nf_bridge->pkt_otherhost = false;
+       }
+
        if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu)
                mtu = nf_bridge->frag_max_size;
 
@@ -835,8 +840,6 @@ static unsigned int br_nf_post_routing(void *priv,
        else
                return NF_ACCEPT;
 
-       /* We assume any code from br_dev_queue_push_xmit onwards doesn't care
-        * about the value of skb->pkt_type. */
        if (skb->pkt_type == PACKET_OTHERHOST) {
                skb->pkt_type = PACKET_HOST;
                nf_bridge->pkt_otherhost = true;
index 345118e..8424464 100644 (file)
@@ -792,6 +792,8 @@ void br_multicast_del_port(struct net_bridge_port *port);
 void br_multicast_enable_port(struct net_bridge_port *port);
 void br_multicast_disable_port(struct net_bridge_port *port);
 void br_multicast_init(struct net_bridge *br);
+void br_multicast_join_snoopers(struct net_bridge *br);
+void br_multicast_leave_snoopers(struct net_bridge *br);
 void br_multicast_open(struct net_bridge *br);
 void br_multicast_stop(struct net_bridge *br);
 void br_multicast_dev_del(struct net_bridge *br);
@@ -969,6 +971,14 @@ static inline void br_multicast_init(struct net_bridge *br)
 {
 }
 
+static inline void br_multicast_join_snoopers(struct net_bridge *br)
+{
+}
+
+static inline void br_multicast_leave_snoopers(struct net_bridge *br)
+{
+}
+
 static inline void br_multicast_open(struct net_bridge *br)
 {
 }
index 3e493eb..08c7741 100644 (file)
@@ -266,8 +266,10 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
                }
 
                masterv = br_vlan_get_master(br, v->vid, extack);
-               if (!masterv)
+               if (!masterv) {
+                       err = -ENOMEM;
                        goto out_filt;
+               }
                v->brvlan = masterv;
                if (br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)) {
                        v->stats = netdev_alloc_pcpu_stats(struct br_vlan_stats);
index 6373ab9..4c343b4 100644 (file)
@@ -541,10 +541,13 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
 
        /* Check for bugs in CAN protocol implementations using af_can.c:
         * 'rcv' will be NULL if no matching list item was found for removal.
+        * As this case may potentially happen when closing a socket while
+        * the notifier for removing the CAN netdev is running we just print
+        * a warning here.
         */
        if (!rcv) {
-               WARN(1, "BUG: receive list entry not found for dev %s, id %03X, mask %03X\n",
-                    DNAME(dev), can_id, mask);
+               pr_warn("can: receive list entry not found for dev %s, id %03X, mask %03X\n",
+                       DNAME(dev), can_id, mask);
                goto out;
        }
 
@@ -677,16 +680,25 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
 {
        struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
 
-       if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU ||
-                    cfd->len > CAN_MAX_DLEN)) {
-               pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n",
+       if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU)) {
+               pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n",
+                            dev->type, skb->len);
+               goto free_skb;
+       }
+
+       /* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */
+       if (unlikely(cfd->len > CAN_MAX_DLEN)) {
+               pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d, datalen %d\n",
                             dev->type, skb->len, cfd->len);
-               kfree_skb(skb);
-               return NET_RX_DROP;
+               goto free_skb;
        }
 
        can_receive(skb, dev);
        return NET_RX_SUCCESS;
+
+free_skb:
+       kfree_skb(skb);
+       return NET_RX_DROP;
 }
 
 static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -694,16 +706,25 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
 {
        struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
 
-       if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU ||
-                    cfd->len > CANFD_MAX_DLEN)) {
-               pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n",
+       if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU)) {
+               pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n",
+                            dev->type, skb->len);
+               goto free_skb;
+       }
+
+       /* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */
+       if (unlikely(cfd->len > CANFD_MAX_DLEN)) {
+               pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d, datalen %d\n",
                             dev->type, skb->len, cfd->len);
-               kfree_skb(skb);
-               return NET_RX_DROP;
+               goto free_skb;
        }
 
        can_receive(skb, dev);
        return NET_RX_SUCCESS;
+
+free_skb:
+       kfree_skb(skb);
+       return NET_RX_DROP;
 }
 
 /* af_can protocol functions */
index d78ab13..26bdc3c 100644 (file)
@@ -1157,6 +1157,9 @@ static int isotp_setsockopt(struct socket *sock, int level, int optname,
        if (level != SOL_CAN_ISOTP)
                return -EINVAL;
 
+       if (so->bound)
+               return -EISCONN;
+
        switch (optname) {
        case CAN_ISOTP_OPTS:
                if (optlen != sizeof(struct can_isotp_options))
index 82dc6b4..38412e7 100644 (file)
@@ -4180,7 +4180,7 @@ int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev)
 }
 EXPORT_SYMBOL(dev_queue_xmit_accel);
 
-int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
+int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
 {
        struct net_device *dev = skb->dev;
        struct sk_buff *orig_skb = skb;
@@ -4210,17 +4210,13 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
        dev_xmit_recursion_dec();
 
        local_bh_enable();
-
-       if (!dev_xmit_complete(ret))
-               kfree_skb(skb);
-
        return ret;
 drop:
        atomic_long_inc(&dev->tx_dropped);
        kfree_skb_list(skb);
        return NET_XMIT_DROP;
 }
-EXPORT_SYMBOL(dev_direct_xmit);
+EXPORT_SYMBOL(__dev_direct_xmit);
 
 /*************************************************************************
  *                     Receiver routines
@@ -8921,6 +8917,17 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
        return dev->xdp_state[mode].prog;
 }
 
+static u8 dev_xdp_prog_count(struct net_device *dev)
+{
+       u8 count = 0;
+       int i;
+
+       for (i = 0; i < __MAX_XDP_MODE; i++)
+               if (dev->xdp_state[i].prog || dev->xdp_state[i].link)
+                       count++;
+       return count;
+}
+
 u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
 {
        struct bpf_prog *prog = dev_xdp_prog(dev, mode);
@@ -9011,6 +9018,7 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
                          struct bpf_xdp_link *link, struct bpf_prog *new_prog,
                          struct bpf_prog *old_prog, u32 flags)
 {
+       unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES);
        struct bpf_prog *cur_prog;
        enum bpf_xdp_mode mode;
        bpf_op_t bpf_op;
@@ -9026,11 +9034,17 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
                NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment");
                return -EINVAL;
        }
-       /* just one XDP mode bit should be set, zero defaults to SKB mode */
-       if (hweight32(flags & XDP_FLAGS_MODES) > 1) {
+       /* just one XDP mode bit should be set, zero defaults to drv/skb mode */
+       if (num_modes > 1) {
                NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set");
                return -EINVAL;
        }
+       /* avoid ambiguity if offload + drv/skb mode progs are both loaded */
+       if (!num_modes && dev_xdp_prog_count(dev) > 1) {
+               NL_SET_ERR_MSG(extack,
+                              "More than one program loaded, unset mode is ambiguous");
+               return -EINVAL;
+       }
        /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */
        if (old_prog && !(flags & XDP_FLAGS_REPLACE)) {
                NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified");
index ab4b136..8c5ddff 100644 (file)
@@ -517,7 +517,7 @@ devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_l
        return test_bit(limit, &devlink->ops->reload_limits);
 }
 
-static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_action action,
+static int devlink_reload_stat_put(struct sk_buff *msg,
                                   enum devlink_reload_limit limit, u32 value)
 {
        struct nlattr *reload_stats_entry;
@@ -526,8 +526,7 @@ static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_acti
        if (!reload_stats_entry)
                return -EMSGSIZE;
 
-       if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, action) ||
-           nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
+       if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
            nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
                goto nla_put_failure;
        nla_nest_end(msg, reload_stats_entry);
@@ -540,7 +539,7 @@ nla_put_failure:
 
 static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote)
 {
-       struct nlattr *reload_stats_attr;
+       struct nlattr *reload_stats_attr, *act_info, *act_stats;
        int i, j, stat_idx;
        u32 value;
 
@@ -552,17 +551,29 @@ static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink
        if (!reload_stats_attr)
                return -EMSGSIZE;
 
-       for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
-               /* Remote stats are shown even if not locally supported. Stats
-                * of actions with unspecified limit are shown though drivers
-                * don't need to register unspecified limit.
-                */
-               if (!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
-                   !devlink_reload_limit_is_supported(devlink, j))
+       for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
+               if ((!is_remote &&
+                    !devlink_reload_action_is_supported(devlink, i)) ||
+                   i == DEVLINK_RELOAD_ACTION_UNSPEC)
                        continue;
-               for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
-                       if ((!is_remote && !devlink_reload_action_is_supported(devlink, i)) ||
-                           i == DEVLINK_RELOAD_ACTION_UNSPEC ||
+               act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO);
+               if (!act_info)
+                       goto nla_put_failure;
+
+               if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i))
+                       goto action_info_nest_cancel;
+               act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS);
+               if (!act_stats)
+                       goto action_info_nest_cancel;
+
+               for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
+                       /* Remote stats are shown even if not locally supported.
+                        * Stats of actions with unspecified limit are shown
+                        * though drivers don't need to register unspecified
+                        * limit.
+                        */
+                       if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
+                            !devlink_reload_limit_is_supported(devlink, j)) ||
                            devlink_reload_combination_is_invalid(i, j))
                                continue;
 
@@ -571,13 +582,19 @@ static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink
                                value = devlink->stats.reload_stats[stat_idx];
                        else
                                value = devlink->stats.remote_reload_stats[stat_idx];
-                       if (devlink_reload_stat_put(msg, i, j, value))
-                               goto nla_put_failure;
+                       if (devlink_reload_stat_put(msg, j, value))
+                               goto action_stats_nest_cancel;
                }
+               nla_nest_end(msg, act_stats);
+               nla_nest_end(msg, act_info);
        }
        nla_nest_end(msg, reload_stats_attr);
        return 0;
 
+action_stats_nest_cancel:
+       nla_nest_cancel(msg, act_stats);
+action_info_nest_cancel:
+       nla_nest_cancel(msg, act_info);
 nla_put_failure:
        nla_nest_cancel(msg, reload_stats_attr);
        return -EMSGSIZE;
@@ -755,6 +772,8 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
        if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
                goto nla_put_failure;
 
+       /* Hold rtnl lock while accessing port's netdev attributes. */
+       rtnl_lock();
        spin_lock_bh(&devlink_port->type_lock);
        if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
                goto nla_put_failure_type_locked;
@@ -763,9 +782,10 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
                        devlink_port->desired_type))
                goto nla_put_failure_type_locked;
        if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) {
+               struct net *net = devlink_net(devlink_port->devlink);
                struct net_device *netdev = devlink_port->type_dev;
 
-               if (netdev &&
+               if (netdev && net_eq(net, dev_net(netdev)) &&
                    (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX,
                                 netdev->ifindex) ||
                     nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME,
@@ -781,6 +801,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
                        goto nla_put_failure_type_locked;
        }
        spin_unlock_bh(&devlink_port->type_lock);
+       rtnl_unlock();
        if (devlink_nl_port_attrs_put(msg, devlink_port))
                goto nla_put_failure;
        if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
@@ -791,6 +812,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
 
 nla_put_failure_type_locked:
        spin_unlock_bh(&devlink_port->type_lock);
+       rtnl_unlock();
 nla_put_failure:
        genlmsg_cancel(msg, hdr);
        return -EMSGSIZE;
@@ -1448,7 +1470,7 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
                err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index,
                                                pool_index, &cur, &max);
                if (err && err != -EOPNOTSUPP)
-                       return err;
+                       goto sb_occ_get_failure;
                if (!err) {
                        if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
                                goto nla_put_failure;
@@ -1461,8 +1483,10 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
        return 0;
 
 nla_put_failure:
+       err = -EMSGSIZE;
+sb_occ_get_failure:
        genlmsg_cancel(msg, hdr);
-       return -EMSGSIZE;
+       return err;
 }
 
 static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb,
index d4474c8..715b67f 100644 (file)
@@ -381,10 +381,8 @@ static void __flow_block_indr_cleanup(void (*release)(void *cb_priv),
 
        list_for_each_entry_safe(this, next, &flow_block_indr_list, indr.list) {
                if (this->release == release &&
-                   this->indr.cb_priv == cb_priv) {
+                   this->indr.cb_priv == cb_priv)
                        list_move(&this->indr.list, cleanup_list);
-                       return;
-               }
        }
 }
 
index e095fb8..6eb2e5e 100644 (file)
@@ -99,9 +99,14 @@ void gro_cells_destroy(struct gro_cells *gcells)
                struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
 
                napi_disable(&cell->napi);
-               netif_napi_del(&cell->napi);
+               __netif_napi_del(&cell->napi);
                __skb_queue_purge(&cell->napi_skbs);
        }
+       /* This barrier is needed because netpoll could access dev->napi_list
+        * under rcu protection.
+        */
+       synchronize_net();
+
        free_percpu(gcells->cells);
        gcells->cells = NULL;
 }
index 7d34382..2f7940b 100644 (file)
@@ -39,12 +39,11 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
 {
        int ret;
 
-       /* Preempt disable is needed to protect per-cpu redirect_info between
-        * BPF prog and skb_do_redirect(). The call_rcu in bpf_prog_put() and
-        * access to maps strictly require a rcu_read_lock() for protection,
-        * mixing with BH RCU lock doesn't work.
+       /* Migration disable and BH disable are needed to protect per-cpu
+        * redirect_info between BPF prog and skb_do_redirect().
         */
-       preempt_disable();
+       migrate_disable();
+       local_bh_disable();
        bpf_compute_data_pointers(skb);
        ret = bpf_prog_run_save_cb(lwt->prog, skb);
 
@@ -78,7 +77,8 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
                break;
        }
 
-       preempt_enable();
+       local_bh_enable();
+       migrate_enable();
 
        return ret;
 }
index 8e39e28..9500d28 100644 (file)
@@ -235,6 +235,8 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 
                        write_lock(&n->lock);
                        if ((n->nud_state == NUD_FAILED) ||
+                           (tbl->is_multicast &&
+                            tbl->is_multicast(n->primary_key)) ||
                            time_after(tref, n->updated))
                                remove = true;
                        write_unlock(&n->lock);
index c310c7c..9609482 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 #include <linux/if_vlan.h>
+#include <net/dsa.h>
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <net/addrconf.h>
@@ -657,15 +658,15 @@ EXPORT_SYMBOL_GPL(__netpoll_setup);
 
 int netpoll_setup(struct netpoll *np)
 {
-       struct net_device *ndev = NULL;
+       struct net_device *ndev = NULL, *dev = NULL;
+       struct net *net = current->nsproxy->net_ns;
        struct in_device *in_dev;
        int err;
 
        rtnl_lock();
-       if (np->dev_name[0]) {
-               struct net *net = current->nsproxy->net_ns;
+       if (np->dev_name[0])
                ndev = __dev_get_by_name(net, np->dev_name);
-       }
+
        if (!ndev) {
                np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
                err = -ENODEV;
@@ -673,6 +674,19 @@ int netpoll_setup(struct netpoll *np)
        }
        dev_hold(ndev);
 
+       /* bring up DSA management network devices up first */
+       for_each_netdev(net, dev) {
+               if (!netdev_uses_dsa(dev))
+                       continue;
+
+               err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
+               if (err < 0) {
+                       np_err(np, "%s failed to open %s\n",
+                              np->dev_name, dev->name);
+                       goto put;
+               }
+       }
+
        if (netdev_master_upper_dev_get(ndev)) {
                np_err(np, "%s is a slave device, aborting\n", np->dev_name);
                err = -EBUSY;
index 1ba8f01..e578544 100644 (file)
@@ -4549,7 +4549,7 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
        if (skb && (skb_next = skb_peek(q))) {
                icmp_next = is_icmp_err_skb(skb_next);
                if (icmp_next)
-                       sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin;
+                       sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno;
        }
        spin_unlock_irqrestore(&q->lock, flags);
 
@@ -5786,6 +5786,9 @@ int skb_mpls_dec_ttl(struct sk_buff *skb)
        if (unlikely(!eth_p_mpls(skb->protocol)))
                return -EINVAL;
 
+       if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN))
+               return -ENOMEM;
+
        lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry);
        ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
        if (!--ttl)
index 654182e..25cdbb2 100644 (file)
@@ -170,10 +170,12 @@ static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i,
        struct scatterlist *sge = sk_msg_elem(msg, i);
        u32 len = sge->length;
 
-       if (charge)
-               sk_mem_uncharge(sk, len);
-       if (!msg->skb)
+       /* When the skb owns the memory we free it from consume_skb path. */
+       if (!msg->skb) {
+               if (charge)
+                       sk_mem_uncharge(sk, len);
                put_page(sg_page(sge));
+       }
        memset(sge, 0, sizeof(*sge));
        return len;
 }
@@ -397,28 +399,45 @@ out:
 }
 EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
 
-static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
+                                                 struct sk_buff *skb)
 {
-       struct sock *sk = psock->sk;
-       int copied = 0, num_sge;
        struct sk_msg *msg;
 
+       if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+               return NULL;
+
+       if (!sk_rmem_schedule(sk, skb, skb->truesize))
+               return NULL;
+
        msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
        if (unlikely(!msg))
-               return -EAGAIN;
-       if (!sk_rmem_schedule(sk, skb, skb->len)) {
-               kfree(msg);
-               return -EAGAIN;
-       }
+               return NULL;
 
        sk_msg_init(msg);
+       return msg;
+}
+
+static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
+                                       struct sk_psock *psock,
+                                       struct sock *sk,
+                                       struct sk_msg *msg)
+{
+       int num_sge, copied;
+
+       /* skb linearize may fail with ENOMEM, but lets simply try again
+        * later if this happens. Under memory pressure we don't want to
+        * drop the skb. We need to linearize the skb so that the mapping
+        * in skb_to_sgvec can not error.
+        */
+       if (skb_linearize(skb))
+               return -EAGAIN;
        num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
        if (unlikely(num_sge < 0)) {
                kfree(msg);
                return num_sge;
        }
 
-       sk_mem_charge(sk, skb->len);
        copied = skb->len;
        msg->sg.start = 0;
        msg->sg.size = copied;
@@ -430,6 +449,48 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
        return copied;
 }
 
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb);
+
+static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+{
+       struct sock *sk = psock->sk;
+       struct sk_msg *msg;
+
+       /* If we are receiving on the same sock skb->sk is already assigned,
+        * skip memory accounting and owner transition seeing it already set
+        * correctly.
+        */
+       if (unlikely(skb->sk == sk))
+               return sk_psock_skb_ingress_self(psock, skb);
+       msg = sk_psock_create_ingress_msg(sk, skb);
+       if (!msg)
+               return -EAGAIN;
+
+       /* This will transition ownership of the data from the socket where
+        * the BPF program was run initiating the redirect to the socket
+        * we will eventually receive this data on. The data will be released
+        * from skb_consume found in __tcp_bpf_recvmsg() after its been copied
+        * into user buffers.
+        */
+       skb_set_owner_r(skb, sk);
+       return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+}
+
+/* Puts an skb on the ingress queue of the socket already assigned to the
+ * skb. In this case we do not need to check memory limits or skb_set_owner_r
+ * because the skb is already accounted for here.
+ */
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb)
+{
+       struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
+       struct sock *sk = psock->sk;
+
+       if (unlikely(!msg))
+               return -EAGAIN;
+       sk_msg_init(msg);
+       return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+}
+
 static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
                               u32 off, u32 len, bool ingress)
 {
@@ -789,7 +850,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
                 * retrying later from workqueue.
                 */
                if (skb_queue_empty(&psock->ingress_skb)) {
-                       err = sk_psock_skb_ingress(psock, skb);
+                       err = sk_psock_skb_ingress_self(psock, skb);
                }
                if (err < 0) {
                        skb_queue_tail(&psock->ingress_skb, skb);
index 48aba93..d900ceb 100644 (file)
@@ -335,11 +335,10 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
  * scenarios (e.g. queue full), it is possible to return the xdp_frame
  * while still leveraging this protection.  The @napi_direct boolean
  * is used for those calls sites.  Thus, allowing for faster recycling
- * of xdp_frames/pages in those cases. This path is never used by the
- * MEM_TYPE_XSK_BUFF_POOL memory type, so it's explicitly not part of
- * the switch-statement.
+ * of xdp_frames/pages in those cases.
  */
-static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct)
+static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
+                        struct xdp_buff *xdp)
 {
        struct xdp_mem_allocator *xa;
        struct page *page;
@@ -361,6 +360,10 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct)
                page = virt_to_page(data); /* Assumes order0 page*/
                put_page(page);
                break;
+       case MEM_TYPE_XSK_BUFF_POOL:
+               /* NB! Only valid from an xdp_buff! */
+               xsk_buff_free(xdp);
+               break;
        default:
                /* Not possible, checked in xdp_rxq_info_reg_mem_model() */
                WARN(1, "Incorrect XDP memory type (%d) usage", mem->type);
@@ -370,19 +373,19 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct)
 
 void xdp_return_frame(struct xdp_frame *xdpf)
 {
-       __xdp_return(xdpf->data, &xdpf->mem, false);
+       __xdp_return(xdpf->data, &xdpf->mem, false, NULL);
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame);
 
 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
 {
-       __xdp_return(xdpf->data, &xdpf->mem, true);
+       __xdp_return(xdpf->data, &xdpf->mem, true, NULL);
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
 
 void xdp_return_buff(struct xdp_buff *xdp)
 {
-       __xdp_return(xdp->data, &xdp->rxq->mem, true);
+       __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
 }
 
 /* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
@@ -400,18 +403,6 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
 }
 EXPORT_SYMBOL_GPL(__xdp_release_frame);
 
-bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
-                            struct netdev_bpf *bpf)
-{
-       if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) {
-               NL_SET_ERR_MSG(bpf->extack,
-                              "program loaded with different flags");
-               return false;
-       }
-       return true;
-}
-EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok);
-
 void xdp_attachment_setup(struct xdp_attachment_info *info,
                          struct netdev_bpf *bpf)
 {
index bb3d706..b0b6e6a 100644 (file)
@@ -427,7 +427,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
 
        if (__inet_inherit_port(sk, newsk) < 0)
                goto put_and_exit;
-       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
        if (*own_req)
                ireq->ireq_opt = NULL;
        else
index ef4ab28..78ee1b5 100644 (file)
@@ -533,7 +533,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
                dccp_done(newsk);
                goto out;
        }
-       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
        /* Clone pktoptions received with SYN, if we own the req */
        if (*own_req && ireq->pktopts) {
                newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
index 1fb3603..0515d66 100644 (file)
@@ -628,6 +628,8 @@ int ethnl_parse_bitset(unsigned long *val, unsigned long *mask,
                        return ret;
 
                change_bits = nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]);
+               if (change_bits > nbits)
+                       change_bits = nbits;
                bitmap_from_arr32(val, nla_data(tb[ETHTOOL_A_BITSET_VALUE]),
                                  change_bits);
                if (change_bits < nbits)
index 687971d..922dd73 100644 (file)
@@ -125,6 +125,7 @@ static int arp_constructor(struct neighbour *neigh);
 static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
 static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
 static void parp_redo(struct sk_buff *skb);
+static int arp_is_multicast(const void *pkey);
 
 static const struct neigh_ops arp_generic_ops = {
        .family =               AF_INET,
@@ -156,6 +157,7 @@ struct neigh_table arp_tbl = {
        .key_eq         = arp_key_eq,
        .constructor    = arp_constructor,
        .proxy_redo     = parp_redo,
+       .is_multicast   = arp_is_multicast,
        .id             = "arp_cache",
        .parms          = {
                .tbl                    = &arp_tbl,
@@ -928,6 +930,10 @@ static void parp_redo(struct sk_buff *skb)
        arp_process(dev_net(skb->dev), NULL, skb);
 }
 
+static int arp_is_multicast(const void *pkey)
+{
+       return ipv4_is_multicast(*((__be32 *)pkey));
+}
 
 /*
  *     Receive an arp request from the device layer.
index 86a23e4..cdf6ec5 100644 (file)
@@ -696,7 +696,7 @@ int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
                cfg->fc_gw4 = *((__be32 *)via->rtvia_addr);
                break;
        case AF_INET6:
-#ifdef CONFIG_IPV6
+#if IS_ENABLED(CONFIG_IPV6)
                if (alen != sizeof(struct in6_addr)) {
                        NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_VIA");
                        return -EINVAL;
@@ -825,7 +825,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
        if (has_gw && has_via) {
                NL_SET_ERR_MSG(extack,
                               "Nexthop configuration can not contain both GATEWAY and VIA");
-               goto errout;
+               return -EINVAL;
        }
 
        return 0;
index 4148f5f..f60869a 100644 (file)
@@ -787,7 +787,7 @@ static void reqsk_queue_hash_req(struct request_sock *req,
        timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
        mod_timer(&req->rsk_timer, jiffies + timeout);
 
-       inet_ehash_insert(req_to_sk(req), NULL);
+       inet_ehash_insert(req_to_sk(req), NULL, NULL);
        /* before letting lookups find us, make sure all req fields
         * are committed to memory and refcnt initialized.
         */
index 366a450..93474b1 100644 (file)
@@ -479,8 +479,10 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
        r->idiag_inode  = 0;
 
        if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
-                                    inet_rsk(reqsk)->ir_mark))
+                                    inet_rsk(reqsk)->ir_mark)) {
+               nlmsg_cancel(skb, nlh);
                return -EMSGSIZE;
+       }
 
        nlmsg_end(skb, nlh);
        return 0;
index 8cbe743..45fb450 100644 (file)
@@ -20,6 +20,9 @@
 #include <net/addrconf.h>
 #include <net/inet_connection_sock.h>
 #include <net/inet_hashtables.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/inet6_hashtables.h>
+#endif
 #include <net/secure_seq.h>
 #include <net/ip.h>
 #include <net/tcp.h>
@@ -508,10 +511,52 @@ static u32 inet_sk_port_offset(const struct sock *sk)
                                          inet->inet_dport);
 }
 
-/* insert a socket into ehash, and eventually remove another one
- * (The another one can be a SYN_RECV or TIMEWAIT
+/* Searches for an exsiting socket in the ehash bucket list.
+ * Returns true if found, false otherwise.
  */
-bool inet_ehash_insert(struct sock *sk, struct sock *osk)
+static bool inet_ehash_lookup_by_sk(struct sock *sk,
+                                   struct hlist_nulls_head *list)
+{
+       const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num);
+       const int sdif = sk->sk_bound_dev_if;
+       const int dif = sk->sk_bound_dev_if;
+       const struct hlist_nulls_node *node;
+       struct net *net = sock_net(sk);
+       struct sock *esk;
+
+       INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr);
+
+       sk_nulls_for_each_rcu(esk, node, list) {
+               if (esk->sk_hash != sk->sk_hash)
+                       continue;
+               if (sk->sk_family == AF_INET) {
+                       if (unlikely(INET_MATCH(esk, net, acookie,
+                                               sk->sk_daddr,
+                                               sk->sk_rcv_saddr,
+                                               ports, dif, sdif))) {
+                               return true;
+                       }
+               }
+#if IS_ENABLED(CONFIG_IPV6)
+               else if (sk->sk_family == AF_INET6) {
+                       if (unlikely(INET6_MATCH(esk, net,
+                                                &sk->sk_v6_daddr,
+                                                &sk->sk_v6_rcv_saddr,
+                                                ports, dif, sdif))) {
+                               return true;
+                       }
+               }
+#endif
+       }
+       return false;
+}
+
+/* Insert a socket into ehash, and eventually remove another one
+ * (The another one can be a SYN_RECV or TIMEWAIT)
+ * If an existing socket already exists, socket sk is not inserted,
+ * and sets found_dup_sk parameter to true.
+ */
+bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
 {
        struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
        struct hlist_nulls_head *list;
@@ -530,16 +575,23 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk)
        if (osk) {
                WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
                ret = sk_nulls_del_node_init_rcu(osk);
+       } else if (found_dup_sk) {
+               *found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
+               if (*found_dup_sk)
+                       ret = false;
        }
+
        if (ret)
                __sk_nulls_add_node_rcu(sk, list);
+
        spin_unlock(lock);
+
        return ret;
 }
 
-bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
+bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk)
 {
-       bool ok = inet_ehash_insert(sk, osk);
+       bool ok = inet_ehash_insert(sk, osk, found_dup_sk);
 
        if (ok) {
                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -583,7 +635,7 @@ int __inet_hash(struct sock *sk, struct sock *osk)
        int err = 0;
 
        if (sk->sk_state != TCP_LISTEN) {
-               inet_ehash_nolisten(sk, osk);
+               inet_ehash_nolisten(sk, osk, NULL);
                return 0;
        }
        WARN_ON(!sk_unhashed(sk));
@@ -679,7 +731,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
                tb = inet_csk(sk)->icsk_bind_hash;
                spin_lock_bh(&head->lock);
                if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-                       inet_ehash_nolisten(sk, NULL);
+                       inet_ehash_nolisten(sk, NULL, NULL);
                        spin_unlock_bh(&head->lock);
                        return 0;
                }
@@ -758,7 +810,7 @@ ok:
        inet_bind_hash(sk, tb, port);
        if (sk_unhashed(sk)) {
                inet_sk(sk)->inet_sport = htons(port);
-               inet_ehash_nolisten(sk, (struct sock *)tw);
+               inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
        }
        if (tw)
                inet_twsk_bind_unhash(tw, hinfo);
index d1e04d2..563b62b 100644 (file)
@@ -203,7 +203,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 
        local_bh_disable();
        addend = xt_write_recseq_begin();
-       private = READ_ONCE(table->private); /* Address dependency. */
+       private = rcu_access_pointer(table->private);
        cpu     = smp_processor_id();
        table_base = private->entries;
        jumpstack  = (struct arpt_entry **)private->jumpstack[cpu];
@@ -649,7 +649,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 {
        unsigned int countersize;
        struct xt_counters *counters;
-       const struct xt_table_info *private = table->private;
+       const struct xt_table_info *private = xt_table_get_private_protected(table);
 
        /* We need atomic snapshot of counters: rest doesn't change
         * (other than comefrom, which userspace doesn't care
@@ -673,7 +673,7 @@ static int copy_entries_to_user(unsigned int total_size,
        unsigned int off, num;
        const struct arpt_entry *e;
        struct xt_counters *counters;
-       struct xt_table_info *private = table->private;
+       struct xt_table_info *private = xt_table_get_private_protected(table);
        int ret = 0;
        void *loc_cpu_entry;
 
@@ -807,7 +807,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
        t = xt_request_find_table_lock(net, NFPROTO_ARP, name);
        if (!IS_ERR(t)) {
                struct arpt_getinfo info;
-               const struct xt_table_info *private = t->private;
+               const struct xt_table_info *private = xt_table_get_private_protected(t);
 #ifdef CONFIG_COMPAT
                struct xt_table_info tmp;
 
@@ -860,7 +860,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
 
        t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
        if (!IS_ERR(t)) {
-               const struct xt_table_info *private = t->private;
+               const struct xt_table_info *private = xt_table_get_private_protected(t);
 
                if (get.size == private->size)
                        ret = copy_entries_to_user(private->size,
@@ -1017,7 +1017,7 @@ static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
        }
 
        local_bh_disable();
-       private = t->private;
+       private = xt_table_get_private_protected(t);
        if (private->number != tmp.num_counters) {
                ret = -EINVAL;
                goto unlock_up_free;
@@ -1330,7 +1330,7 @@ static int compat_copy_entries_to_user(unsigned int total_size,
                                       void __user *userptr)
 {
        struct xt_counters *counters;
-       const struct xt_table_info *private = table->private;
+       const struct xt_table_info *private = xt_table_get_private_protected(table);
        void __user *pos;
        unsigned int size;
        int ret = 0;
index f15bc21..6e2851f 100644 (file)
@@ -258,7 +258,7 @@ ipt_do_table(struct sk_buff *skb,
        WARN_ON(!(table->valid_hooks & (1 << hook)));
        local_bh_disable();
        addend = xt_write_recseq_begin();
-       private = READ_ONCE(table->private); /* Address dependency. */
+       private = rcu_access_pointer(table->private);
        cpu        = smp_processor_id();
        table_base = private->entries;
        jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
@@ -791,7 +791,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 {
        unsigned int countersize;
        struct xt_counters *counters;
-       const struct xt_table_info *private = table->private;
+       const struct xt_table_info *private = xt_table_get_private_protected(table);
 
        /* We need atomic snapshot of counters: rest doesn't change
           (other than comefrom, which userspace doesn't care
@@ -815,7 +815,7 @@ copy_entries_to_user(unsigned int total_size,
        unsigned int off, num;
        const struct ipt_entry *e;
        struct xt_counters *counters;
-       const struct xt_table_info *private = table->private;
+       const struct xt_table_info *private = xt_table_get_private_protected(table);
        int ret = 0;
        const void *loc_cpu_entry;
 
@@ -964,7 +964,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
        t = xt_request_find_table_lock(net, AF_INET, name);
        if (!IS_ERR(t)) {
                struct ipt_getinfo info;
-               const struct xt_table_info *private = t->private;
+               const struct xt_table_info *private = xt_table_get_private_protected(t);
 #ifdef CONFIG_COMPAT
                struct xt_table_info tmp;
 
@@ -1018,7 +1018,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
 
        t = xt_find_table_lock(net, AF_INET, get.name);
        if (!IS_ERR(t)) {
-               const struct xt_table_info *private = t->private;
+               const struct xt_table_info *private = xt_table_get_private_protected(t);
                if (get.size == private->size)
                        ret = copy_entries_to_user(private->size,
                                                   t, uptr->entrytable);
@@ -1173,7 +1173,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
        }
 
        local_bh_disable();
-       private = t->private;
+       private = xt_table_get_private_protected(t);
        if (private->number != tmp.num_counters) {
                ret = -EINVAL;
                goto unlock_up_free;
@@ -1543,7 +1543,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
                            void __user *userptr)
 {
        struct xt_counters *counters;
-       const struct xt_table_info *private = table->private;
+       const struct xt_table_info *private = xt_table_get_private_protected(table);
        void __user *pos;
        unsigned int size;
        int ret = 0;
index dc2a399..9f43abe 100644 (file)
@@ -3222,7 +3222,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 
        fl4.daddr = dst;
        fl4.saddr = src;
-       fl4.flowi4_tos = rtm->rtm_tos;
+       fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK;
        fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
        fl4.flowi4_mark = mark;
        fl4.flowi4_uid = uid;
@@ -3246,8 +3246,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                fl4.flowi4_iif = iif; /* for rt_fill_info */
                skb->dev        = dev;
                skb->mark       = mark;
-               err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
-                                        dev, &res);
+               err = ip_route_input_rcu(skb, dst, src,
+                                        rtm->rtm_tos & IPTOS_RT_MASK, dev,
+                                        &res);
 
                rt = skb_rtable(skb);
                if (err == 0 && rt->dst.error)
index 6c4d79b..6ea3dc2 100644 (file)
@@ -945,7 +945,7 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
        filter_expired = after(tcp_jiffies32,
                               bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
        if (rs->rtt_us >= 0 &&
-           (rs->rtt_us <= bbr->min_rtt_us ||
+           (rs->rtt_us < bbr->min_rtt_us ||
             (filter_expired && !rs->is_ack_delayed))) {
                bbr->min_rtt_us = rs->rtt_us;
                bbr->min_rtt_stamp = tcp_jiffies32;
index 37f4cb2..bc7d2a5 100644 (file)
@@ -15,8 +15,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
 {
        struct iov_iter *iter = &msg->msg_iter;
        int peek = flags & MSG_PEEK;
-       int i, ret, copied = 0;
        struct sk_msg *msg_rx;
+       int i, copied = 0;
 
        msg_rx = list_first_entry_or_null(&psock->ingress_msg,
                                          struct sk_msg, list);
@@ -37,17 +37,16 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
                        page = sg_page(sge);
                        if (copied + copy > len)
                                copy = len - copied;
-                       ret = copy_page_to_iter(page, sge->offset, copy, iter);
-                       if (ret != copy) {
-                               msg_rx->sg.start = i;
-                               return -EFAULT;
-                       }
+                       copy = copy_page_to_iter(page, sge->offset, copy, iter);
+                       if (!copy)
+                               return copied ? copied : -EFAULT;
 
                        copied += copy;
                        if (likely(!peek)) {
                                sge->offset += copy;
                                sge->length -= copy;
-                               sk_mem_uncharge(sk, copy);
+                               if (!msg_rx->skb)
+                                       sk_mem_uncharge(sk, copy);
                                msg_rx->sg.size -= copy;
 
                                if (!sge->length) {
@@ -56,6 +55,11 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
                                                put_page(page);
                                }
                        } else {
+                               /* Lets not optimize peek case if copy_page_to_iter
+                                * didn't copy the entire length lets just break.
+                                */
+                               if (copy != sge->length)
+                                       return copied;
                                sk_msg_iter_var_next(i);
                        }
 
index db47ac2..563d016 100644 (file)
@@ -198,6 +198,11 @@ static void tcp_reinit_congestion_control(struct sock *sk,
        icsk->icsk_ca_setsockopt = 1;
        memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
 
+       if (ca->flags & TCP_CONG_NEEDS_ECN)
+               INET_ECN_xmit(sk);
+       else
+               INET_ECN_dontxmit(sk);
+
        if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
                tcp_init_congestion_control(sk);
 }
index 389d1b3..ef4bdb0 100644 (file)
@@ -510,7 +510,6 @@ static void tcp_init_buffer_space(struct sock *sk)
        if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
                tcp_sndbuf_expand(sk);
 
-       tp->rcvq_space.space = min_t(u32, tp->rcv_wnd, TCP_INIT_CWND * tp->advmss);
        tcp_mstamp_refresh(tp);
        tp->rcvq_space.time = tp->tcp_mstamp;
        tp->rcvq_space.seq = tp->copied_seq;
@@ -534,6 +533,8 @@ static void tcp_init_buffer_space(struct sock *sk)
 
        tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
        tp->snd_cwnd_stamp = tcp_jiffies32;
+       tp->rcvq_space.space = min3(tp->rcv_ssthresh, tp->rcv_wnd,
+                                   (u32)TCP_INIT_CWND * tp->advmss);
 }
 
 /* 4. Recalculate window clamp after socket hit its memory bounds. */
index 7352c09..595dcc3 100644 (file)
@@ -980,17 +980,23 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 
        skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
 
-       tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
-                       tcp_rsk(req)->syn_tos : inet_sk(sk)->tos;
-
        if (skb) {
                __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
 
+               tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
+                               (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
+                               (inet_sk(sk)->tos & INET_ECN_MASK) :
+                               inet_sk(sk)->tos;
+
+               if (!INET_ECN_is_capable(tos) &&
+                   tcp_bpf_ca_needs_ecn((struct sock *)req))
+                       tos |= INET_ECN_ECT_0;
+
                rcu_read_lock();
                err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
                                            ireq->ir_rmt_addr,
                                            rcu_dereference(ireq->ireq_opt),
-                                           tos & ~INET_ECN_MASK);
+                                           tos);
                rcu_read_unlock();
                err = net_xmit_eval(err);
        }
@@ -1498,6 +1504,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
                                  bool *own_req)
 {
        struct inet_request_sock *ireq;
+       bool found_dup_sk = false;
        struct inet_sock *newinet;
        struct tcp_sock *newtp;
        struct sock *newsk;
@@ -1535,7 +1542,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
                inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
        newinet->inet_id = prandom_u32();
 
-       /* Set ToS of the new socket based upon the value of incoming SYN. */
+       /* Set ToS of the new socket based upon the value of incoming SYN.
+        * ECT bits are set later in tcp_init_transfer().
+        */
        if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
                newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
 
@@ -1575,12 +1584,22 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 
        if (__inet_inherit_port(sk, newsk) < 0)
                goto put_and_exit;
-       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
+                                      &found_dup_sk);
        if (likely(*own_req)) {
                tcp_move_syn(newtp, req);
                ireq->ireq_opt = NULL;
        } else {
-               newinet->inet_opt = NULL;
+               if (!req_unhash && found_dup_sk) {
+                       /* This code path should only be executed in the
+                        * syncookie case only
+                        */
+                       bh_unlock_sock(newsk);
+                       sock_put(newsk);
+                       newsk = NULL;
+               } else {
+                       newinet->inet_opt = NULL;
+               }
        }
        return newsk;
 
index bf48cd7..9901176 100644 (file)
@@ -1880,7 +1880,8 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
         * window, and remember whether we were cwnd-limited then.
         */
        if (!before(tp->snd_una, tp->max_packets_seq) ||
-           tp->packets_out > tp->max_packets_out) {
+           tp->packets_out > tp->max_packets_out ||
+           is_cwnd_limited) {
                tp->max_packets_out = tp->packets_out;
                tp->max_packets_seq = tp->snd_nxt;
                tp->is_cwnd_limited = is_cwnd_limited;
@@ -2702,6 +2703,10 @@ repair:
        else
                tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED);
 
+       is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
+       if (likely(sent_pkts || is_cwnd_limited))
+               tcp_cwnd_validate(sk, is_cwnd_limited);
+
        if (likely(sent_pkts)) {
                if (tcp_in_cwnd_reduction(sk))
                        tp->prr_out += sent_pkts;
@@ -2709,8 +2714,6 @@ repair:
                /* Send one loss probe per tail loss episode. */
                if (push_one != 2)
                        tcp_schedule_loss_probe(sk, false);
-               is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
-               tcp_cwnd_validate(sk, is_cwnd_limited);
                return false;
        }
        return !tp->packets_out && !tcp_write_queue_empty(sk);
index 09f0a23..9eeebd4 100644 (file)
@@ -2173,7 +2173,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
                __skb_pull(skb, skb_transport_offset(skb));
                ret = udp_queue_rcv_one_skb(sk, skb);
                if (ret > 0)
-                       ip_protocol_deliver_rcu(dev_net(skb->dev), skb, -ret);
+                       ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret);
        }
        return 0;
 }
index 01146b6..8b6eb38 100644 (file)
@@ -5022,8 +5022,10 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
                return -EMSGSIZE;
 
        if (args->netnsid >= 0 &&
-           nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
+           nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) {
+               nlmsg_cancel(skb, nlh);
                return -EMSGSIZE;
+       }
 
        put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
        if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 ||
@@ -5054,8 +5056,10 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
                return -EMSGSIZE;
 
        if (args->netnsid >= 0 &&
-           nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
+           nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) {
+               nlmsg_cancel(skb, nlh);
                return -EMSGSIZE;
+       }
 
        put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
        if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 ||
index 642fc6a..8a22486 100644 (file)
@@ -306,7 +306,9 @@ static int ip6addrlbl_del(struct net *net,
 /* add default label */
 static int __net_init ip6addrlbl_net_init(struct net *net)
 {
-       int err = 0;
+       struct ip6addrlbl_entry *p = NULL;
+       struct hlist_node *n;
+       int err;
        int i;
 
        ADDRLABEL(KERN_DEBUG "%s\n", __func__);
@@ -315,14 +317,20 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
        INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head);
 
        for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
-               int ret = ip6addrlbl_add(net,
-                                        ip6addrlbl_init_table[i].prefix,
-                                        ip6addrlbl_init_table[i].prefixlen,
-                                        0,
-                                        ip6addrlbl_init_table[i].label, 0);
-               /* XXX: should we free all rules when we catch an error? */
-               if (ret && (!err || err != -ENOMEM))
-                       err = ret;
+               err = ip6addrlbl_add(net,
+                                    ip6addrlbl_init_table[i].prefix,
+                                    ip6addrlbl_init_table[i].prefixlen,
+                                    0,
+                                    ip6addrlbl_init_table[i].label, 0);
+               if (err)
+                       goto err_ip6addrlbl_add;
+       }
+       return 0;
+
+err_ip6addrlbl_add:
+       hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
+               hlist_del_rcu(&p->list);
+               kfree_rcu(p, rcu);
        }
        return err;
 }
index d88d976..440080d 100644 (file)
@@ -588,7 +588,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
        memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
        memset(ah->auth_data, 0, ahp->icv_trunc_len);
 
-       if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN))
+       err = ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN);
+       if (err)
                goto out_free;
 
        ip6h->priority    = 0;
index 931b186..cf6e138 100644 (file)
@@ -1133,8 +1133,13 @@ static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu,
                        return;
 
                if (rt->dst.dev) {
-                       dev->needed_headroom = rt->dst.dev->hard_header_len +
-                                              t_hlen;
+                       unsigned short dst_len = rt->dst.dev->hard_header_len +
+                                                t_hlen;
+
+                       if (t->dev->header_ops)
+                               dev->hard_header_len = dst_len;
+                       else
+                               dev->needed_headroom = dst_len;
 
                        if (set_mtu) {
                                dev->mtu = rt->dst.dev->mtu - t_hlen;
@@ -1159,7 +1164,12 @@ static int ip6gre_calc_hlen(struct ip6_tnl *tunnel)
        tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
 
        t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
-       tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen;
+
+       if (tunnel->dev->header_ops)
+               tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+       else
+               tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen;
+
        return t_hlen;
 }
 
index 27f29b9..7671747 100644 (file)
@@ -81,6 +81,7 @@ static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
 static int pndisc_constructor(struct pneigh_entry *n);
 static void pndisc_destructor(struct pneigh_entry *n);
 static void pndisc_redo(struct sk_buff *skb);
+static int ndisc_is_multicast(const void *pkey);
 
 static const struct neigh_ops ndisc_generic_ops = {
        .family =               AF_INET6,
@@ -115,6 +116,7 @@ struct neigh_table nd_tbl = {
        .pconstructor = pndisc_constructor,
        .pdestructor =  pndisc_destructor,
        .proxy_redo =   pndisc_redo,
+       .is_multicast = ndisc_is_multicast,
        .allow_add  =   ndisc_allow_add,
        .id =           "ndisc_cache",
        .parms = {
@@ -1706,6 +1708,11 @@ static void pndisc_redo(struct sk_buff *skb)
        kfree_skb(skb);
 }
 
+static int ndisc_is_multicast(const void *pkey)
+{
+       return ipv6_addr_is_multicast((struct in6_addr *)pkey);
+}
+
 static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
 {
        struct inet6_dev *idev = __in6_dev_get(skb->dev);
index 2e2119b..c4f532f 100644 (file)
@@ -280,7 +280,7 @@ ip6t_do_table(struct sk_buff *skb,
 
        local_bh_disable();
        addend = xt_write_recseq_begin();
-       private = READ_ONCE(table->private); /* Address dependency. */
+       private = rcu_access_pointer(table->private);
        cpu        = smp_processor_id();
        table_base = private->entries;
        jumpstack  = (struct ip6t_entry **)private->jumpstack[cpu];
@@ -807,7 +807,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 {
        unsigned int countersize;
        struct xt_counters *counters;
-       const struct xt_table_info *private = table->private;
+       const struct xt_table_info *private = xt_table_get_private_protected(table);
 
        /* We need atomic snapshot of counters: rest doesn't change
           (other than comefrom, which userspace doesn't care
@@ -831,7 +831,7 @@ copy_entries_to_user(unsigned int total_size,
        unsigned int off, num;
        const struct ip6t_entry *e;
        struct xt_counters *counters;
-       const struct xt_table_info *private = table->private;
+       const struct xt_table_info *private = xt_table_get_private_protected(table);
        int ret = 0;
        const void *loc_cpu_entry;
 
@@ -980,7 +980,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
        t = xt_request_find_table_lock(net, AF_INET6, name);
        if (!IS_ERR(t)) {
                struct ip6t_getinfo info;
-               const struct xt_table_info *private = t->private;
+               const struct xt_table_info *private = xt_table_get_private_protected(t);
 #ifdef CONFIG_COMPAT
                struct xt_table_info tmp;
 
@@ -1035,7 +1035,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
 
        t = xt_find_table_lock(net, AF_INET6, get.name);
        if (!IS_ERR(t)) {
-               struct xt_table_info *private = t->private;
+               struct xt_table_info *private = xt_table_get_private_protected(t);
                if (get.size == private->size)
                        ret = copy_entries_to_user(private->size,
                                                   t, uptr->entrytable);
@@ -1189,7 +1189,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
        }
 
        local_bh_disable();
-       private = t->private;
+       private = xt_table_get_private_protected(t);
        if (private->number != tmp.num_counters) {
                ret = -EINVAL;
                goto unlock_up_free;
@@ -1552,7 +1552,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
                            void __user *userptr)
 {
        struct xt_counters *counters;
-       const struct xt_table_info *private = table->private;
+       const struct xt_table_info *private = xt_table_get_private_protected(table);
        void __user *pos;
        unsigned int size;
        int ret = 0;
index 054d287..c129ad3 100644 (file)
@@ -440,6 +440,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
 int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 {
        u16 savethdr = skb->transport_header;
+       u8 nexthdr = NEXTHDR_FRAGMENT;
        int fhoff, nhoff, ret;
        struct frag_hdr *fhdr;
        struct frag_queue *fq;
@@ -455,6 +456,14 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
        if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
                return 0;
 
+       /* Discard the first fragment if it does not include all headers
+        * RFC 8200, Section 4.5
+        */
+       if (ipv6frag_thdr_truncated(skb, fhoff, &nexthdr)) {
+               pr_debug("Drop incomplete fragment\n");
+               return 0;
+       }
+
        if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr)))
                return -ENOMEM;
 
index c8cf1bb..47a0dc4 100644 (file)
@@ -324,9 +324,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
        struct frag_queue *fq;
        const struct ipv6hdr *hdr = ipv6_hdr(skb);
        struct net *net = dev_net(skb_dst(skb)->dev);
-       __be16 frag_off;
-       int iif, offset;
        u8 nexthdr;
+       int iif;
 
        if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
                goto fail_hdr;
@@ -362,24 +361,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
         * the source of the fragment, with the Pointer field set to zero.
         */
        nexthdr = hdr->nexthdr;
-       offset = ipv6_skip_exthdr(skb, skb_transport_offset(skb), &nexthdr, &frag_off);
-       if (offset >= 0) {
-               /* Check some common protocols' header */
-               if (nexthdr == IPPROTO_TCP)
-                       offset += sizeof(struct tcphdr);
-               else if (nexthdr == IPPROTO_UDP)
-                       offset += sizeof(struct udphdr);
-               else if (nexthdr == IPPROTO_ICMPV6)
-                       offset += sizeof(struct icmp6hdr);
-               else
-                       offset += 1;
-
-               if (!(frag_off & htons(IP6_OFFSET)) && offset > skb->len) {
-                       __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
-                                       IPSTATS_MIB_INHDRERRORS);
-                       icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0);
-                       return -1;
-               }
+       if (ipv6frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) {
+               __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
+                               IPSTATS_MIB_INHDRERRORS);
+               icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0);
+               return -1;
        }
 
        iif = skb->dev ? skb->dev->ifindex : 0;
index 8db59f4..991dc36 100644 (file)
@@ -527,15 +527,21 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
                if (np->repflow && ireq->pktopts)
                        fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
 
+               tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
+                               (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
+                               (np->tclass & INET_ECN_MASK) :
+                               np->tclass;
+
+               if (!INET_ECN_is_capable(tclass) &&
+                   tcp_bpf_ca_needs_ecn((struct sock *)req))
+                       tclass |= INET_ECN_ECT_0;
+
                rcu_read_lock();
                opt = ireq->ipv6_opt;
-               tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
-                               tcp_rsk(req)->syn_tos : np->tclass;
                if (!opt)
                        opt = rcu_dereference(np->opt);
                err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt,
-                              tclass & ~INET_ECN_MASK,
-                              sk->sk_priority);
+                              tclass, sk->sk_priority);
                rcu_read_unlock();
                err = net_xmit_eval(err);
        }
@@ -1193,6 +1199,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
        const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
        struct ipv6_txoptions *opt;
        struct inet_sock *newinet;
+       bool found_dup_sk = false;
        struct tcp_sock *newtp;
        struct sock *newsk;
 #ifdef CONFIG_TCP_MD5SIG
@@ -1314,7 +1321,9 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
        if (np->repflow)
                newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
 
-       /* Set ToS of the new socket based upon the value of incoming SYN. */
+       /* Set ToS of the new socket based upon the value of incoming SYN.
+        * ECT bits are set later in tcp_init_transfer().
+        */
        if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
                newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
 
@@ -1368,7 +1377,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
                tcp_done(newsk);
                goto out;
        }
-       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
+                                      &found_dup_sk);
        if (*own_req) {
                tcp_move_syn(newtp, req);
 
@@ -1383,6 +1393,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
                                skb_set_owner_r(newnp->pktoptions, newsk);
                        }
                }
+       } else {
+               if (!req_unhash && found_dup_sk) {
+                       /* This code path should only be executed in the
+                        * syncookie case only
+                        */
+                       bh_unlock_sock(newsk);
+                       sock_put(newsk);
+                       newsk = NULL;
+               }
        }
 
        return newsk;
index 047238f..db7d888 100644 (file)
@@ -1645,7 +1645,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
        }
 
        /* Create the new socket */
-       nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0);
+       nsk = iucv_sock_alloc(NULL, sk->sk_protocol, GFP_ATOMIC, 0);
        if (!nsk) {
                err = pr_iucv->path_sever(path, user_data);
                iucv_path_free(path);
@@ -1851,7 +1851,7 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb)
                goto out;
        }
 
-       nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0);
+       nsk = iucv_sock_alloc(NULL, sk->sk_protocol, GFP_ATOMIC, 0);
        bh_lock_sock(sk);
        if ((sk->sk_state != IUCV_LISTEN) ||
            sk_acceptq_is_full(sk) ||
index 1be7759..44154cc 100644 (file)
@@ -948,6 +948,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
                return ret;
        }
 
+       set_bit(SDATA_STATE_RUNNING, &sdata->state);
+
        ret = ieee80211_check_queues(sdata, NL80211_IFTYPE_MONITOR);
        if (ret) {
                kfree(sdata);
index 48f31ac..620ecf9 100644 (file)
@@ -60,6 +60,7 @@ static struct mesh_table *mesh_table_alloc(void)
        atomic_set(&newtbl->entries,  0);
        spin_lock_init(&newtbl->gates_lock);
        spin_lock_init(&newtbl->walk_lock);
+       rhashtable_init(&newtbl->rhead, &mesh_rht_params);
 
        return newtbl;
 }
@@ -773,9 +774,6 @@ int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata)
                goto free_path;
        }
 
-       rhashtable_init(&tbl_path->rhead, &mesh_rht_params);
-       rhashtable_init(&tbl_mpp->rhead, &mesh_rht_params);
-
        sdata->u.mesh.mesh_paths = tbl_path;
        sdata->u.mesh.mpp_paths = tbl_mpp;
 
index 86bc469..b13b1da 100644 (file)
@@ -274,7 +274,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
        success = !!(info->flags & IEEE80211_TX_STAT_ACK);
 
        for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
-               if (ar[i].idx < 0)
+               if (ar[i].idx < 0 || !ar[i].count)
                        break;
 
                ndx = rix_to_ndx(mi, ar[i].idx);
@@ -287,12 +287,6 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
                        mi->r[ndx].stats.success += success;
        }
 
-       if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && (i >= 0))
-               mi->sample_packets++;
-
-       if (mi->sample_deferred > 0)
-               mi->sample_deferred--;
-
        if (time_after(jiffies, mi->last_stats_update +
                                mp->update_interval / (mp->new_avg ? 2 : 1)))
                minstrel_update_stats(mp, mi);
@@ -367,7 +361,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
                return;
 
        delta = (mi->total_packets * sampling_ratio / 100) -
-                       (mi->sample_packets + mi->sample_deferred / 2);
+                       mi->sample_packets;
 
        /* delta < 0: no sampling required */
        prev_sample = mi->prev_sample;
@@ -376,7 +370,6 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
                return;
 
        if (mi->total_packets >= 10000) {
-               mi->sample_deferred = 0;
                mi->sample_packets = 0;
                mi->total_packets = 0;
        } else if (delta > mi->n_rates * 2) {
@@ -401,19 +394,8 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
         * rate sampling method should be used.
         * Respect such rates that are not sampled for 20 interations.
         */
-       if (mrr_capable &&
-           msr->perfect_tx_time > mr->perfect_tx_time &&
-           msr->stats.sample_skipped < 20) {
-               /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark
-                * packets that have the sampling rate deferred to the
-                * second MRR stage. Increase the sample counter only
-                * if the deferred sample rate was actually used.
-                * Use the sample_deferred counter to make sure that
-                * the sampling is not done in large bursts */
-               info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
-               rate++;
-               mi->sample_deferred++;
-       } else {
+       if (msr->perfect_tx_time < mr->perfect_tx_time ||
+           msr->stats.sample_skipped >= 20) {
                if (!msr->sample_limit)
                        return;
 
@@ -433,6 +415,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
 
        rate->idx = mi->r[ndx].rix;
        rate->count = minstrel_get_retry_count(&mi->r[ndx], info);
+       info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
 }
 
 
index dbb43bc..86cd80b 100644 (file)
@@ -126,7 +126,6 @@ struct minstrel_sta_info {
        u8 max_prob_rate;
        unsigned int total_packets;
        unsigned int sample_packets;
-       int sample_deferred;
 
        unsigned int sample_row;
        unsigned int sample_column;
index 4fe284f..ec6973e 100644 (file)
@@ -705,7 +705,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
  out_drop_sta:
        local->num_sta--;
        synchronize_net();
-       __cleanup_single_sta(sta);
+       cleanup_single_sta(sta);
  out_err:
        mutex_unlock(&local->sta_mtx);
        kfree(sinfo);
@@ -724,19 +724,13 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
 
        err = sta_info_insert_check(sta);
        if (err) {
+               sta_info_free(local, sta);
                mutex_unlock(&local->sta_mtx);
                rcu_read_lock();
-               goto out_free;
+               return err;
        }
 
-       err = sta_info_insert_finish(sta);
-       if (err)
-               goto out_free;
-
-       return 0;
- out_free:
-       sta_info_free(local, sta);
-       return err;
+       return sta_info_insert_finish(sta);
 }
 
 int sta_info_insert(struct sta_info *sta)
index 6feb451..3485610 100644 (file)
@@ -49,7 +49,8 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
        int ac;
 
        if (info->flags & (IEEE80211_TX_CTL_NO_PS_BUFFER |
-                          IEEE80211_TX_CTL_AMPDU)) {
+                          IEEE80211_TX_CTL_AMPDU |
+                          IEEE80211_TX_CTL_HW_80211_ENCAP)) {
                ieee80211_free_txskb(&local->hw, skb);
                return;
        }
@@ -915,15 +916,6 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
                        ieee80211_mpsp_trigger_process(
                                ieee80211_get_qos_ctl(hdr), sta, true, acked);
 
-               if (!acked && test_sta_flag(sta, WLAN_STA_PS_STA)) {
-                       /*
-                        * The STA is in power save mode, so assume
-                        * that this TX packet failed because of that.
-                        */
-                       ieee80211_handle_filtered_frame(local, sta, skb);
-                       return;
-               }
-
                if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) &&
                    (ieee80211_is_data(hdr->frame_control)) &&
                    (rates_idx != -1))
@@ -1150,6 +1142,12 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
                                                            -info->status.ack_signal);
                                }
                        } else if (test_sta_flag(sta, WLAN_STA_PS_STA)) {
+                               /*
+                                * The STA is in power save mode, so assume
+                                * that this TX packet failed because of that.
+                                */
+                               if (skb)
+                                       ieee80211_handle_filtered_frame(local, sta, skb);
                                return;
                        } else if (noack_success) {
                                /* nothing to do here, do not account as lost */
index 4934206..94e624e 100644 (file)
@@ -3455,7 +3455,7 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
 
        *chandef = he_chandef;
 
-       return false;
+       return true;
 }
 
 bool ieee80211_chandef_s1g_oper(const struct ieee80211_s1g_oper_ie *oper,
index 84d1194..b921cbd 100644 (file)
@@ -67,6 +67,7 @@ void mptcp_seq_show(struct seq_file *seq)
                for (i = 0; mptcp_snmp_list[i].name; i++)
                        seq_puts(seq, " 0");
 
+               seq_putc(seq, '\n');
                return;
        }
 
index ac4a1fe..953906e 100644 (file)
@@ -543,9 +543,8 @@ create_msk:
                        fallback = true;
        } else if (subflow_req->mp_join) {
                mptcp_get_options(skb, &mp_opt);
-               if (!mp_opt.mp_join ||
-                   !mptcp_can_accept_new_subflow(subflow_req->msk) ||
-                   !subflow_hmac_valid(req, &mp_opt)) {
+               if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
+                   !mptcp_can_accept_new_subflow(subflow_req->msk)) {
                        SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
                        fallback = true;
                }
index f1be3e3..a9cb355 100644 (file)
@@ -1726,9 +1726,6 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
        ndp->ptype.dev = dev;
        dev_add_pack(&ndp->ptype);
 
-       /* Set up generic netlink interface */
-       ncsi_init_netlink(dev);
-
        pdev = to_platform_device(dev->dev.parent);
        if (pdev) {
                np = pdev->dev.of_node;
@@ -1892,8 +1889,6 @@ void ncsi_unregister_dev(struct ncsi_dev *nd)
        list_del_rcu(&ndp->node);
        spin_unlock_irqrestore(&ncsi_dev_lock, flags);
 
-       ncsi_unregister_netlink(nd->dev);
-
        kfree(ndp);
 }
 EXPORT_SYMBOL_GPL(ncsi_unregister_dev);
index adddc77..bb5f165 100644 (file)
@@ -766,24 +766,8 @@ static struct genl_family ncsi_genl_family __ro_after_init = {
        .n_small_ops = ARRAY_SIZE(ncsi_ops),
 };
 
-int ncsi_init_netlink(struct net_device *dev)
+static int __init ncsi_init_netlink(void)
 {
-       int rc;
-
-       rc = genl_register_family(&ncsi_genl_family);
-       if (rc)
-               netdev_err(dev, "ncsi: failed to register netlink family\n");
-
-       return rc;
-}
-
-int ncsi_unregister_netlink(struct net_device *dev)
-{
-       int rc;
-
-       rc = genl_unregister_family(&ncsi_genl_family);
-       if (rc)
-               netdev_err(dev, "ncsi: failed to unregister netlink family\n");
-
-       return rc;
+       return genl_register_family(&ncsi_genl_family);
 }
+subsys_initcall(ncsi_init_netlink);
index 7502723..39a1a9d 100644 (file)
@@ -22,7 +22,4 @@ int ncsi_send_netlink_err(struct net_device *dev,
                          struct nlmsghdr *nlhdr,
                          int err);
 
-int ncsi_init_netlink(struct net_device *dev);
-int ncsi_unregister_netlink(struct net_device *dev);
-
 #endif /* __NCSI_NETLINK_H__ */
index 7cff6e5..2b19189 100644 (file)
@@ -271,8 +271,7 @@ flag_nested(const struct nlattr *nla)
 
 static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
        [IPSET_ATTR_IPADDR_IPV4]        = { .type = NLA_U32 },
-       [IPSET_ATTR_IPADDR_IPV6]        = { .type = NLA_BINARY,
-                                           .len = sizeof(struct in6_addr) },
+       [IPSET_ATTR_IPADDR_IPV6]        = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
 };
 
 int
index e279ded..d45dbcb 100644 (file)
@@ -4167,12 +4167,18 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
 
        spin_lock_init(&ipvs->tot_stats.lock);
 
-       proc_create_net("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_seq_ops,
-                       sizeof(struct ip_vs_iter));
-       proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net,
-                       ip_vs_stats_show, NULL);
-       proc_create_net_single("ip_vs_stats_percpu", 0, ipvs->net->proc_net,
-                       ip_vs_stats_percpu_show, NULL);
+#ifdef CONFIG_PROC_FS
+       if (!proc_create_net("ip_vs", 0, ipvs->net->proc_net,
+                            &ip_vs_info_seq_ops, sizeof(struct ip_vs_iter)))
+               goto err_vs;
+       if (!proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net,
+                                   ip_vs_stats_show, NULL))
+               goto err_stats;
+       if (!proc_create_net_single("ip_vs_stats_percpu", 0,
+                                   ipvs->net->proc_net,
+                                   ip_vs_stats_percpu_show, NULL))
+               goto err_percpu;
+#endif
 
        if (ip_vs_control_net_init_sysctl(ipvs))
                goto err;
@@ -4180,6 +4186,17 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
        return 0;
 
 err:
+#ifdef CONFIG_PROC_FS
+       remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
+
+err_percpu:
+       remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
+
+err_stats:
+       remove_proc_entry("ip_vs", ipvs->net->proc_net);
+
+err_vs:
+#endif
        free_percpu(ipvs->tot_stats.cpustats);
        return -ENOMEM;
 }
@@ -4188,9 +4205,11 @@ void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
 {
        ip_vs_trash_cleanup(ipvs);
        ip_vs_control_net_cleanup_sysctl(ipvs);
+#ifdef CONFIG_PROC_FS
        remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
        remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
        remove_proc_entry("ip_vs", ipvs->net->proc_net);
+#endif
        free_percpu(ipvs->tot_stats.cpustats);
 }
 
index 0f58e98..9a08076 100644 (file)
@@ -619,7 +619,8 @@ static int nft_request_module(struct net *net, const char *fmt, ...)
 static void lockdep_nfnl_nft_mutex_not_held(void)
 {
 #ifdef CONFIG_PROVE_LOCKING
-       WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+       if (debug_locks)
+               WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
 #endif
 }
 
@@ -1722,6 +1723,10 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
        }
 
        nla_strlcpy(ifname, attr, IFNAMSIZ);
+       /* nf_tables_netdev_event() is called under rtnl_mutex, this is
+        * indirectly serializing all the other holders of the commit_mutex with
+        * the rtnl_mutex.
+        */
        dev = __dev_get_by_name(net, ifname);
        if (!dev) {
                err = -ENOENT;
@@ -3718,7 +3723,7 @@ cont:
        return 0;
 }
 
-static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result)
+int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result)
 {
        u64 ms = be64_to_cpu(nla_get_be64(nla));
        u64 max = (u64)(~((u64)0));
@@ -3732,7 +3737,7 @@ static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result)
        return 0;
 }
 
-static __be64 nf_jiffies64_to_msecs(u64 input)
+__be64 nf_jiffies64_to_msecs(u64 input)
 {
        return cpu_to_be64(jiffies64_to_msecs(input));
 }
index 9f62572..9ae1427 100644 (file)
@@ -28,6 +28,23 @@ static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions)
        return flow;
 }
 
+void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
+                                enum flow_dissector_key_id addr_type)
+{
+       struct nft_flow_match *match = &flow->match;
+       struct nft_flow_key *mask = &match->mask;
+       struct nft_flow_key *key = &match->key;
+
+       if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL))
+               return;
+
+       key->control.addr_type = addr_type;
+       mask->control.addr_type = 0xffff;
+       match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL);
+       match->dissector.offset[FLOW_DISSECTOR_KEY_CONTROL] =
+               offsetof(struct nft_flow_key, control);
+}
+
 struct nft_flow_rule *nft_flow_rule_create(struct net *net,
                                           const struct nft_rule *rule)
 {
index bc079d6..00e563a 100644 (file)
@@ -123,11 +123,11 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx,
        u8 *mask = (u8 *)&flow->match.mask;
        u8 *key = (u8 *)&flow->match.key;
 
-       if (priv->op != NFT_CMP_EQ || reg->len != priv->len)
+       if (priv->op != NFT_CMP_EQ || priv->len > reg->len)
                return -EOPNOTSUPP;
 
-       memcpy(key + reg->offset, &priv->data, priv->len);
-       memcpy(mask + reg->offset, &reg->mask, priv->len);
+       memcpy(key + reg->offset, &priv->data, reg->len);
+       memcpy(mask + reg->offset, &reg->mask, reg->len);
 
        flow->match.dissector.used_keys |= BIT(reg->key);
        flow->match.dissector.offset[reg->key] = reg->base_offset;
@@ -137,7 +137,7 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx,
            nft_reg_load16(priv->data.data) != ARPHRD_ETHER)
                return -EOPNOTSUPP;
 
-       nft_offload_update_dependency(ctx, &priv->data, priv->len);
+       nft_offload_update_dependency(ctx, &priv->data, reg->len);
 
        return 0;
 }
index 322bd67..a1b0aac 100644 (file)
@@ -177,8 +177,6 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
        }
 #endif
        case NFT_CT_ID:
-               if (!nf_ct_is_confirmed(ct))
-                       goto err;
                *dest = nf_ct_get_id(ct);
                return;
        default:
index 64ca13a..9af4f93 100644 (file)
@@ -157,8 +157,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
        if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
                if (!(set->flags & NFT_SET_TIMEOUT))
                        return -EINVAL;
-               timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64(
-                                               tb[NFTA_DYNSET_TIMEOUT])));
+
+               err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout);
+               if (err)
+                       return err;
        }
 
        priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]);
@@ -267,7 +269,7 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
        if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name))
                goto nla_put_failure;
        if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT,
-                        cpu_to_be64(jiffies_to_msecs(priv->timeout)),
+                        nf_jiffies64_to_msecs(priv->timeout),
                         NFTA_DYNSET_PAD))
                goto nla_put_failure;
        if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr))
index b37bd02..bf4b3ad 100644 (file)
@@ -724,22 +724,22 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx,
 
        switch (priv->key) {
        case NFT_META_PROTOCOL:
-               NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, n_proto,
-                                 sizeof(__u16), reg);
+               NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_BASIC, basic, n_proto,
+                                       sizeof(__u16), reg);
                nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK);
                break;
        case NFT_META_L4PROTO:
-               NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
-                                 sizeof(__u8), reg);
+               NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
+                                       sizeof(__u8), reg);
                nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT);
                break;
        case NFT_META_IIF:
-               NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_META, meta,
-                                 ingress_ifindex, sizeof(__u32), reg);
+               NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_META, meta,
+                                       ingress_ifindex, sizeof(__u32), reg);
                break;
        case NFT_META_IIFTYPE:
-               NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_META, meta,
-                                 ingress_iftype, sizeof(__u16), reg);
+               NFT_OFFLOAD_MATCH_EXACT(FLOW_DISSECTOR_KEY_META, meta,
+                                       ingress_iftype, sizeof(__u16), reg);
                break;
        default:
                return -EOPNOTSUPP;
index dcd3c7b..47d4e0e 100644 (file)
@@ -165,6 +165,34 @@ nla_put_failure:
        return -1;
 }
 
+static bool nft_payload_offload_mask(struct nft_offload_reg *reg,
+                                    u32 priv_len, u32 field_len)
+{
+       unsigned int remainder, delta, k;
+       struct nft_data mask = {};
+       __be32 remainder_mask;
+
+       if (priv_len == field_len) {
+               memset(&reg->mask, 0xff, priv_len);
+               return true;
+       } else if (priv_len > field_len) {
+               return false;
+       }
+
+       memset(&mask, 0xff, field_len);
+       remainder = priv_len % sizeof(u32);
+       if (remainder) {
+               k = priv_len / sizeof(u32);
+               delta = field_len - priv_len;
+               remainder_mask = htonl(~((1 << (delta * BITS_PER_BYTE)) - 1));
+               mask.data[k] = (__force u32)remainder_mask;
+       }
+
+       memcpy(&reg->mask, &mask, field_len);
+
+       return true;
+}
+
 static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
                                  struct nft_flow_rule *flow,
                                  const struct nft_payload *priv)
@@ -173,21 +201,21 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
 
        switch (priv->offset) {
        case offsetof(struct ethhdr, h_source):
-               if (priv->len != ETH_ALEN)
+               if (!nft_payload_offload_mask(reg, priv->len, ETH_ALEN))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs,
                                  src, ETH_ALEN, reg);
                break;
        case offsetof(struct ethhdr, h_dest):
-               if (priv->len != ETH_ALEN)
+               if (!nft_payload_offload_mask(reg, priv->len, ETH_ALEN))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs,
                                  dst, ETH_ALEN, reg);
                break;
        case offsetof(struct ethhdr, h_proto):
-               if (priv->len != sizeof(__be16))
+               if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic,
@@ -195,14 +223,14 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
                nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK);
                break;
        case offsetof(struct vlan_ethhdr, h_vlan_TCI):
-               if (priv->len != sizeof(__be16))
+               if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan,
                                  vlan_tci, sizeof(__be16), reg);
                break;
        case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto):
-               if (priv->len != sizeof(__be16))
+               if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan,
@@ -210,7 +238,7 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
                nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK);
                break;
        case offsetof(struct vlan_ethhdr, h_vlan_TCI) + sizeof(struct vlan_hdr):
-               if (priv->len != sizeof(__be16))
+               if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan,
@@ -218,7 +246,7 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
                break;
        case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto) +
                                                        sizeof(struct vlan_hdr):
-               if (priv->len != sizeof(__be16))
+               if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan,
@@ -239,21 +267,25 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx,
 
        switch (priv->offset) {
        case offsetof(struct iphdr, saddr):
-               if (priv->len != sizeof(struct in_addr))
+               if (!nft_payload_offload_mask(reg, priv->len,
+                                             sizeof(struct in_addr)))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, src,
                                  sizeof(struct in_addr), reg);
+               nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV4_ADDRS);
                break;
        case offsetof(struct iphdr, daddr):
-               if (priv->len != sizeof(struct in_addr))
+               if (!nft_payload_offload_mask(reg, priv->len,
+                                             sizeof(struct in_addr)))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, dst,
                                  sizeof(struct in_addr), reg);
+               nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV4_ADDRS);
                break;
        case offsetof(struct iphdr, protocol):
-               if (priv->len != sizeof(__u8))
+               if (!nft_payload_offload_mask(reg, priv->len, sizeof(__u8)))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
@@ -275,21 +307,25 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx,
 
        switch (priv->offset) {
        case offsetof(struct ipv6hdr, saddr):
-               if (priv->len != sizeof(struct in6_addr))
+               if (!nft_payload_offload_mask(reg, priv->len,
+                                             sizeof(struct in6_addr)))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, src,
                                  sizeof(struct in6_addr), reg);
+               nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV6_ADDRS);
                break;
        case offsetof(struct ipv6hdr, daddr):
-               if (priv->len != sizeof(struct in6_addr))
+               if (!nft_payload_offload_mask(reg, priv->len,
+                                             sizeof(struct in6_addr)))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, dst,
                                  sizeof(struct in6_addr), reg);
+               nft_flow_rule_set_addr_type(flow, FLOW_DISSECTOR_KEY_IPV6_ADDRS);
                break;
        case offsetof(struct ipv6hdr, nexthdr):
-               if (priv->len != sizeof(__u8))
+               if (!nft_payload_offload_mask(reg, priv->len, sizeof(__u8)))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
@@ -331,14 +367,14 @@ static int nft_payload_offload_tcp(struct nft_offload_ctx *ctx,
 
        switch (priv->offset) {
        case offsetof(struct tcphdr, source):
-               if (priv->len != sizeof(__be16))
+               if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src,
                                  sizeof(__be16), reg);
                break;
        case offsetof(struct tcphdr, dest):
-               if (priv->len != sizeof(__be16))
+               if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst,
@@ -359,14 +395,14 @@ static int nft_payload_offload_udp(struct nft_offload_ctx *ctx,
 
        switch (priv->offset) {
        case offsetof(struct udphdr, source):
-               if (priv->len != sizeof(__be16))
+               if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src,
                                  sizeof(__be16), reg);
                break;
        case offsetof(struct udphdr, dest):
-               if (priv->len != sizeof(__be16))
+               if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
                        return -EOPNOTSUPP;
 
                NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst,
index af22dbe..acce622 100644 (file)
@@ -1349,6 +1349,14 @@ struct xt_counters *xt_counters_alloc(unsigned int counters)
 }
 EXPORT_SYMBOL(xt_counters_alloc);
 
+struct xt_table_info
+*xt_table_get_private_protected(const struct xt_table *table)
+{
+       return rcu_dereference_protected(table->private,
+                                        mutex_is_locked(&xt[table->af].mutex));
+}
+EXPORT_SYMBOL(xt_table_get_private_protected);
+
 struct xt_table_info *
 xt_replace_table(struct xt_table *table,
              unsigned int num_counters,
@@ -1356,7 +1364,6 @@ xt_replace_table(struct xt_table *table,
              int *error)
 {
        struct xt_table_info *private;
-       unsigned int cpu;
        int ret;
 
        ret = xt_jumpstack_alloc(newinfo);
@@ -1366,47 +1373,20 @@ xt_replace_table(struct xt_table *table,
        }
 
        /* Do the substitution. */
-       local_bh_disable();
-       private = table->private;
+       private = xt_table_get_private_protected(table);
 
        /* Check inside lock: is the old number correct? */
        if (num_counters != private->number) {
                pr_debug("num_counters != table->private->number (%u/%u)\n",
                         num_counters, private->number);
-               local_bh_enable();
                *error = -EAGAIN;
                return NULL;
        }
 
        newinfo->initial_entries = private->initial_entries;
-       /*
-        * Ensure contents of newinfo are visible before assigning to
-        * private.
-        */
-       smp_wmb();
-       table->private = newinfo;
-
-       /* make sure all cpus see new ->private value */
-       smp_wmb();
 
-       /*
-        * Even though table entries have now been swapped, other CPU's
-        * may still be using the old entries...
-        */
-       local_bh_enable();
-
-       /* ... so wait for even xt_recseq on all cpus */
-       for_each_possible_cpu(cpu) {
-               seqcount_t *s = &per_cpu(xt_recseq, cpu);
-               u32 seq = raw_read_seqcount(s);
-
-               if (seq & 1) {
-                       do {
-                               cond_resched();
-                               cpu_relax();
-                       } while (seq == raw_read_seqcount(s));
-               }
-       }
+       rcu_assign_pointer(table->private, newinfo);
+       synchronize_rcu();
 
        audit_log_nfcfg(table->name, table->af, private->number,
                        !private->number ? AUDIT_XT_OP_REGISTER :
@@ -1442,12 +1422,12 @@ struct xt_table *xt_register_table(struct net *net,
        }
 
        /* Simplifies replace_table code. */
-       table->private = bootstrap;
+       rcu_assign_pointer(table->private, bootstrap);
 
        if (!xt_replace_table(table, 0, newinfo, &ret))
                goto unlock;
 
-       private = table->private;
+       private = xt_table_get_private_protected(table);
        pr_debug("table->private->number = %u\n", private->number);
 
        /* save number of initial entries */
@@ -1470,7 +1450,8 @@ void *xt_unregister_table(struct xt_table *table)
        struct xt_table_info *private;
 
        mutex_lock(&xt[table->af].mutex);
-       private = table->private;
+       private = xt_table_get_private_protected(table);
+       RCU_INIT_POINTER(table->private, NULL);
        list_del(&table->list);
        mutex_unlock(&xt[table->af].mutex);
        audit_log_nfcfg(table->name, table->af, private->number,
index fc55c91..ccb4916 100644 (file)
@@ -1167,7 +1167,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
        u32 skip_bkt = cb->args[0];
        u32 skip_chain = cb->args[1];
        u32 skip_addr4 = cb->args[2];
-       u32 iter_bkt, iter_chain, iter_addr4 = 0, iter_addr6 = 0;
+       u32 iter_bkt, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0;
        struct netlbl_unlhsh_iface *iface;
        struct list_head *iter_list;
        struct netlbl_af4list *addr4;
index b87bfc8..c3a6648 100644 (file)
@@ -199,6 +199,9 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
        __be32 lse;
        int err;
 
+       if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN))
+               return -ENOMEM;
+
        stack = mpls_hdr(skb);
        lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask);
        err = skb_mpls_update_lse(skb, lse);
@@ -958,14 +961,13 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
 {
        /* The first action is always 'OVS_DEC_TTL_ATTR_ARG'. */
        struct nlattr *dec_ttl_arg = nla_data(attr);
-       int rem = nla_len(attr);
 
        if (nla_len(dec_ttl_arg)) {
-               struct nlattr *actions = nla_next(dec_ttl_arg, &rem);
+               struct nlattr *actions = nla_data(dec_ttl_arg);
 
                if (actions)
-                       return clone_execute(dp, skb, key, 0, actions, rem,
-                                            last, false);
+                       return clone_execute(dp, skb, key, 0, nla_data(actions),
+                                            nla_len(actions), last, false);
        }
        consume_skb(skb);
        return 0;
index 9d3e50c..4c5c233 100644 (file)
@@ -2503,28 +2503,42 @@ static int validate_and_copy_dec_ttl(struct net *net,
                                     __be16 eth_type, __be16 vlan_tci,
                                     u32 mpls_label_count, bool log)
 {
-       int start, err;
-       u32 nested = true;
+       const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1];
+       int start, action_start, err, rem;
+       const struct nlattr *a, *actions;
+
+       memset(attrs, 0, sizeof(attrs));
+       nla_for_each_nested(a, attr, rem) {
+               int type = nla_type(a);
 
-       if (!nla_len(attr))
-               return ovs_nla_add_action(sfa, OVS_ACTION_ATTR_DEC_TTL,
-                                         NULL, 0, log);
+               /* Ignore unknown attributes to be future proof. */
+               if (type > OVS_DEC_TTL_ATTR_MAX)
+                       continue;
+
+               if (!type || attrs[type])
+                       return -EINVAL;
+
+               attrs[type] = a;
+       }
+
+       actions = attrs[OVS_DEC_TTL_ATTR_ACTION];
+       if (rem || !actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
+               return -EINVAL;
 
        start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log);
        if (start < 0)
                return start;
 
-       err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ACTION, &nested,
-                                sizeof(nested), log);
+       action_start = add_nested_action_start(sfa, OVS_DEC_TTL_ATTR_ACTION, log);
+       if (action_start < 0)
+               return action_start;
 
-       if (err)
-               return err;
-
-       err = __ovs_nla_copy_actions(net, attr, key, sfa, eth_type,
+       err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type,
                                     vlan_tci, mpls_label_count, log);
        if (err)
                return err;
 
+       add_nested_action_end(*sfa, action_start);
        add_nested_action_end(*sfa, start);
        return 0;
 }
@@ -3487,20 +3501,42 @@ out:
 static int dec_ttl_action_to_attr(const struct nlattr *attr,
                                  struct sk_buff *skb)
 {
-       int err = 0, rem = nla_len(attr);
-       struct nlattr *start;
+       struct nlattr *start, *action_start;
+       const struct nlattr *a;
+       int err = 0, rem;
 
        start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL);
-
        if (!start)
                return -EMSGSIZE;
 
-       err = ovs_nla_put_actions(nla_data(attr), rem, skb);
-       if (err)
-               nla_nest_cancel(skb, start);
-       else
-               nla_nest_end(skb, start);
+       nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) {
+               switch (nla_type(a)) {
+               case OVS_DEC_TTL_ATTR_ACTION:
+
+                       action_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION);
+                       if (!action_start) {
+                               err = -EMSGSIZE;
+                               goto out;
+                       }
+
+                       err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
+                       if (err)
+                               goto out;
+
+                       nla_nest_end(skb, action_start);
+                       break;
 
+               default:
+                       /* Ignore all other option to be future compatible */
+                       break;
+               }
+       }
+
+       nla_nest_end(skb, start);
+       return 0;
+
+out:
+       nla_nest_cancel(skb, start);
        return err;
 }
 
index cefbd50..7a18fff 100644 (file)
@@ -93,8 +93,8 @@
 
 /*
    Assumptions:
-   - If the device has no dev->header_ops, there is no LL header visible
-     above the device. In this case, its hard_header_len should be 0.
+   - If the device has no dev->header_ops->create, there is no LL header
+     visible above the device. In this case, its hard_header_len should be 0.
      The device may prepend its own header internally. In this case, its
      needed_headroom should be set to the space needed for it to add its
      internal header.
 On receive:
 -----------
 
-Incoming, dev->header_ops != NULL
+Incoming, dev_has_header(dev) == true
    mac_header -> ll header
    data       -> data
 
-Outgoing, dev->header_ops != NULL
+Outgoing, dev_has_header(dev) == true
    mac_header -> ll header
    data       -> ll header
 
-Incoming, dev->header_ops == NULL
+Incoming, dev_has_header(dev) == false
    mac_header -> data
      However drivers often make it point to the ll header.
      This is incorrect because the ll header should be invisible to us.
    data       -> data
 
-Outgoing, dev->header_ops == NULL
+Outgoing, dev_has_header(dev) == false
    mac_header -> data. ll header is invisible to us.
    data       -> data
 
 Resume
-  If dev->header_ops == NULL we are unable to restore the ll header,
+  If dev_has_header(dev) == false we are unable to restore the ll header,
     because it is invisible to us.
 
 
@@ -2069,7 +2069,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 
        skb->dev = dev;
 
-       if (dev->header_ops) {
+       if (dev_has_header(dev)) {
                /* The device has an explicit notion of ll header,
                 * exported to higher levels.
                 *
@@ -2198,7 +2198,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
        if (!net_eq(dev_net(dev), sock_net(sk)))
                goto drop;
 
-       if (dev->header_ops) {
+       if (dev_has_header(dev)) {
                if (sk->sk_type != SOCK_DGRAM)
                        skb_push(skb, skb->data - skb_mac_header(skb));
                else if (skb->pkt_type == PACKET_OUTGOING) {
index 971c73c..97101c5 100644 (file)
@@ -876,6 +876,9 @@ static int rfkill_resume(struct device *dev)
 
        rfkill->suspended = false;
 
+       if (!rfkill->registered)
+               return 0;
+
        if (!rfkill->persistent) {
                cur = !!(rfkill->state & RFKILL_BLOCK_SW);
                rfkill_set_block(rfkill, cur);
index 7b09427..11c45c8 100644 (file)
@@ -96,10 +96,19 @@ static void rose_loopback_timer(struct timer_list *unused)
                }
 
                if (frametype == ROSE_CALL_REQUEST) {
-                       if ((dev = rose_dev_get(dest)) != NULL) {
-                               if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0)
-                                       kfree_skb(skb);
-                       } else {
+                       if (!rose_loopback_neigh->dev) {
+                               kfree_skb(skb);
+                               continue;
+                       }
+
+                       dev = rose_dev_get(dest);
+                       if (!dev) {
+                               kfree_skb(skb);
+                               continue;
+                       }
+
+                       if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0) {
+                               dev_put(dev);
                                kfree_skb(skb);
                        }
                } else {
index 5c7456e..d1486ea 100644 (file)
@@ -105,6 +105,9 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a,
                        goto drop;
                break;
        case TCA_MPLS_ACT_MODIFY:
+               if (!pskb_may_pull(skb,
+                                  skb_network_offset(skb) + MPLS_HLEN))
+                       goto drop;
                new_lse = tcf_mpls_get_lse(mpls_hdr(skb), p, false);
                if (skb_mpls_update_lse(skb, new_lse))
                        goto drop;
index fed18fd..1319986 100644 (file)
@@ -2424,8 +2424,8 @@ static int fl_dump_key_mpls_opt_lse(struct sk_buff *skb,
                        return err;
        }
        if (lse_mask->mpls_label) {
-               err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL,
-                                lse_key->mpls_label);
+               err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL,
+                                 lse_key->mpls_label);
                if (err)
                        return err;
        }
index 4dda155..949163f 100644 (file)
@@ -401,6 +401,7 @@ static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt,
 
        INIT_LIST_HEAD(&q->new_flows);
        INIT_LIST_HEAD(&q->old_flows);
+       timer_setup(&q->adapt_timer, fq_pie_timer, 0);
 
        if (opt) {
                err = fq_pie_change(sch, opt, extack);
@@ -426,7 +427,6 @@ static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt,
                pie_vars_init(&flow->vars);
        }
 
-       timer_setup(&q->adapt_timer, fq_pie_timer, 0);
        mod_timer(&q->adapt_timer, jiffies + HZ / 2);
 
        return 0;
index 55d4fc6..d508f6f 100644 (file)
@@ -449,7 +449,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
                else {
                        if (!mod_timer(&t->proto_unreach_timer,
                                                jiffies + (HZ/20)))
-                               sctp_association_hold(asoc);
+                               sctp_transport_hold(t);
                }
        } else {
                struct net *net = sock_net(sk);
@@ -458,7 +458,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
                         "encountered!\n", __func__);
 
                if (del_timer(&t->proto_unreach_timer))
-                       sctp_association_put(asoc);
+                       sctp_transport_put(t);
 
                sctp_do_sm(net, SCTP_EVENT_T_OTHER,
                           SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
index 813d307..0948f14 100644 (file)
@@ -419,7 +419,7 @@ void sctp_generate_proto_unreach_event(struct timer_list *t)
                /* Try again later.  */
                if (!mod_timer(&transport->proto_unreach_timer,
                                jiffies + (HZ/20)))
-                       sctp_association_hold(asoc);
+                       sctp_transport_hold(transport);
                goto out_unlock;
        }
 
@@ -435,7 +435,7 @@ void sctp_generate_proto_unreach_event(struct timer_list *t)
 
 out_unlock:
        bh_unlock_sock(sk);
-       sctp_association_put(asoc);
+       sctp_transport_put(transport);
 }
 
  /* Handle the timeout of the RE-CONFIG timer. */
index 806af58..60fcf31 100644 (file)
@@ -133,7 +133,7 @@ void sctp_transport_free(struct sctp_transport *transport)
 
        /* Delete the ICMP proto unreachable timer if it's active. */
        if (del_timer(&transport->proto_unreach_timer))
-               sctp_association_put(transport->asoc);
+               sctp_transport_put(transport);
 
        sctp_transport_put(transport);
 }
index e9f487c..5dd4faa 100644 (file)
@@ -979,7 +979,8 @@ static int __smc_connect(struct smc_sock *smc)
 
        /* check if smc modes and versions of CLC proposal and accept match */
        rc = smc_connect_check_aclc(ini, aclc);
-       version = aclc->hdr.version == SMC_V1 ? SMC_V1 : version;
+       version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2;
+       ini->smcd_version = version;
        if (rc)
                goto vlan_cleanup;
 
index 2b19863..af96f81 100644 (file)
@@ -1309,7 +1309,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
                                    ini->ism_peer_gid[ini->ism_selected]) :
                     smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
                    !lgr->sync_err &&
-                   lgr->vlan_id == ini->vlan_id &&
+                   (ini->smcd_version == SMC_V2 ||
+                    lgr->vlan_id == ini->vlan_id) &&
                    (role == SMC_CLNT || ini->is_smcd ||
                     lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
                        /* link group found */
index 1c314db..fc766b5 100644 (file)
@@ -198,9 +198,9 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
                rcu_read_lock();
                ndev = rdma_read_gid_attr_ndev_rcu(attr);
                if (!IS_ERR(ndev) &&
-                   ((!vlan_id && !is_vlan_dev(attr->ndev)) ||
-                    (vlan_id && is_vlan_dev(attr->ndev) &&
-                     vlan_dev_vlan_id(attr->ndev) == vlan_id)) &&
+                   ((!vlan_id && !is_vlan_dev(ndev)) ||
+                    (vlan_id && is_vlan_dev(ndev) &&
+                     vlan_dev_vlan_id(ndev) == vlan_id)) &&
                    attr->gid_type == IB_GID_TYPE_ROCE) {
                        rcu_read_unlock();
                        if (gid)
index d269ebe..83978d5 100644 (file)
@@ -2181,7 +2181,11 @@ void tipc_node_apply_property(struct net *net, struct tipc_bearer *b,
                                                        &xmitq);
                        else if (prop == TIPC_NLA_PROP_MTU)
                                tipc_link_set_mtu(e->link, b->mtu);
+
+                       /* Update MTU for node link entry */
+                       e->mtu = tipc_link_mss(e->link);
                }
+
                tipc_node_write_unlock(n);
                tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr, NULL);
        }
index cec8622..a3ab2d3 100644 (file)
@@ -694,36 +694,51 @@ static void tls_device_resync_rx(struct tls_context *tls_ctx,
 
 static bool
 tls_device_rx_resync_async(struct tls_offload_resync_async *resync_async,
-                          s64 resync_req, u32 *seq)
+                          s64 resync_req, u32 *seq, u16 *rcd_delta)
 {
        u32 is_async = resync_req & RESYNC_REQ_ASYNC;
        u32 req_seq = resync_req >> 32;
        u32 req_end = req_seq + ((resync_req >> 16) & 0xffff);
+       u16 i;
+
+       *rcd_delta = 0;
 
        if (is_async) {
+               /* shouldn't get to wraparound:
+                * too long in async stage, something bad happened
+                */
+               if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX))
+                       return false;
+
                /* asynchronous stage: log all headers seq such that
                 * req_seq <= seq <= end_seq, and wait for real resync request
                 */
-               if (between(*seq, req_seq, req_end) &&
+               if (before(*seq, req_seq))
+                       return false;
+               if (!after(*seq, req_end) &&
                    resync_async->loglen < TLS_DEVICE_RESYNC_ASYNC_LOGMAX)
                        resync_async->log[resync_async->loglen++] = *seq;
 
+               resync_async->rcd_delta++;
+
                return false;
        }
 
        /* synchronous stage: check against the logged entries and
         * proceed to check the next entries if no match was found
         */
-       while (resync_async->loglen) {
-               if (req_seq == resync_async->log[resync_async->loglen - 1] &&
-                   atomic64_try_cmpxchg(&resync_async->req,
-                                        &resync_req, 0)) {
-                       resync_async->loglen = 0;
+       for (i = 0; i < resync_async->loglen; i++)
+               if (req_seq == resync_async->log[i] &&
+                   atomic64_try_cmpxchg(&resync_async->req, &resync_req, 0)) {
+                       *rcd_delta = resync_async->rcd_delta - i;
                        *seq = req_seq;
+                       resync_async->loglen = 0;
+                       resync_async->rcd_delta = 0;
                        return true;
                }
-               resync_async->loglen--;
-       }
+
+       resync_async->loglen = 0;
+       resync_async->rcd_delta = 0;
 
        if (req_seq == *seq &&
            atomic64_try_cmpxchg(&resync_async->req,
@@ -741,6 +756,7 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
        u32 sock_data, is_req_pending;
        struct tls_prot_info *prot;
        s64 resync_req;
+       u16 rcd_delta;
        u32 req_seq;
 
        if (tls_ctx->rx_conf != TLS_HW)
@@ -786,8 +802,9 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
                        return;
 
                if (!tls_device_rx_resync_async(rx_ctx->resync_async,
-                                               resync_req, &seq))
+                                               resync_req, &seq, &rcd_delta))
                        return;
+               tls_bigint_subtract(rcd_sn, rcd_delta);
                break;
        }
 
@@ -1245,6 +1262,8 @@ void tls_device_offload_cleanup_rx(struct sock *sk)
        if (tls_ctx->tx_conf != TLS_HW) {
                dev_put(netdev);
                tls_ctx->netdev = NULL;
+       } else {
+               set_bit(TLS_RX_DEV_CLOSED, &tls_ctx->flags);
        }
 out:
        up_read(&device_offload_lock);
@@ -1274,7 +1293,8 @@ static int tls_device_down(struct net_device *netdev)
                if (ctx->tx_conf == TLS_HW)
                        netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
                                                        TLS_OFFLOAD_CTX_DIR_TX);
-               if (ctx->rx_conf == TLS_HW)
+               if (ctx->rx_conf == TLS_HW &&
+                   !test_bit(TLS_RX_DEV_CLOSED, &ctx->flags))
                        netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
                                                        TLS_OFFLOAD_CTX_DIR_RX);
                WRITE_ONCE(ctx->netdev, NULL);
index 95ab554..845c628 100644 (file)
@@ -1295,6 +1295,12 @@ static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock,
                        return NULL;
                }
 
+               if (!skb_queue_empty(&sk->sk_receive_queue)) {
+                       __strp_unpause(&ctx->strp);
+                       if (ctx->recv_pkt)
+                               return ctx->recv_pkt;
+               }
+
                if (sk->sk_shutdown & RCV_SHUTDOWN)
                        return NULL;
 
@@ -1913,7 +1919,7 @@ pick_next_record:
                         * another message type
                         */
                        msg->msg_flags |= MSG_EOR;
-                       if (ctx->control != TLS_RECORD_TYPE_DATA)
+                       if (control != TLS_RECORD_TYPE_DATA)
                                goto recv_end;
                } else {
                        break;
index b4d7b8a..d10916a 100644 (file)
@@ -438,7 +438,7 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
        case SOCK_STREAM:
                if (vsock_use_local_transport(remote_cid))
                        new_transport = transport_local;
-               else if (remote_cid <= VMADDR_CID_HOST)
+               else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g)
                        new_transport = transport_g2h;
                else
                        new_transport = transport_h2g;
index 0edda1e..5956939 100644 (file)
@@ -841,8 +841,10 @@ void virtio_transport_release(struct vsock_sock *vsk)
                virtio_transport_free_pkt(pkt);
        }
 
-       if (remove_sock)
+       if (remove_sock) {
+               sock_set_flag(sk, SOCK_DONE);
                vsock_remove_sock(vsk);
+       }
 }
 EXPORT_SYMBOL_GPL(virtio_transport_release);
 
@@ -1132,8 +1134,8 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
 
        lock_sock(sk);
 
-       /* Check if sk has been released before lock_sock */
-       if (sk->sk_shutdown == SHUTDOWN_MASK) {
+       /* Check if sk has been closed before lock_sock */
+       if (sock_flag(sk, SOCK_DONE)) {
                (void)virtio_transport_reset_no_sock(t, pkt);
                release_sock(sk);
                sock_put(sk);
index a77174b..f67ddf2 100644 (file)
@@ -12634,7 +12634,7 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
        struct net_device *dev = info->user_ptr[1];
        struct wireless_dev *wdev = dev->ieee80211_ptr;
        struct nlattr *tb[NUM_NL80211_REKEY_DATA];
-       struct cfg80211_gtk_rekey_data rekey_data;
+       struct cfg80211_gtk_rekey_data rekey_data = {};
        int err;
 
        if (!info->attrs[NL80211_ATTR_REKEY_DATA])
index 046d3fe..e65a501 100644 (file)
@@ -681,7 +681,8 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        int len, i, rc = 0;
 
        if (addr_len != sizeof(struct sockaddr_x25) ||
-           addr->sx25_family != AF_X25) {
+           addr->sx25_family != AF_X25 ||
+           strnlen(addr->sx25_addr.x25_addr, X25_ADDR_LEN) == X25_ADDR_LEN) {
                rc = -EINVAL;
                goto out;
        }
@@ -775,7 +776,8 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
 
        rc = -EINVAL;
        if (addr_len != sizeof(struct sockaddr_x25) ||
-           addr->sx25_family != AF_X25)
+           addr->sx25_family != AF_X25 ||
+           strnlen(addr->sx25_addr.x25_addr, X25_ADDR_LEN) == X25_ADDR_LEN)
                goto out;
 
        rc = -ENETUNREACH;
@@ -1050,6 +1052,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
        makex25->lci           = lci;
        makex25->dest_addr     = dest_addr;
        makex25->source_addr   = source_addr;
+       x25_neigh_hold(nb);
        makex25->neighbour     = nb;
        makex25->facilities    = facilities;
        makex25->dte_facilities= dte_facilities;
index 56d052b..56a28a6 100644 (file)
@@ -66,18 +66,31 @@ static void xdp_umem_release(struct xdp_umem *umem)
        kfree(umem);
 }
 
+static void xdp_umem_release_deferred(struct work_struct *work)
+{
+       struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
+
+       xdp_umem_release(umem);
+}
+
 void xdp_get_umem(struct xdp_umem *umem)
 {
        refcount_inc(&umem->users);
 }
 
-void xdp_put_umem(struct xdp_umem *umem)
+void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup)
 {
        if (!umem)
                return;
 
-       if (refcount_dec_and_test(&umem->users))
-               xdp_umem_release(umem);
+       if (refcount_dec_and_test(&umem->users)) {
+               if (defer_cleanup) {
+                       INIT_WORK(&umem->work, xdp_umem_release_deferred);
+                       schedule_work(&umem->work);
+               } else {
+                       xdp_umem_release(umem);
+               }
+       }
 }
 
 static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
index 181fdda..aa9fe27 100644 (file)
@@ -9,7 +9,7 @@
 #include <net/xdp_sock_drv.h>
 
 void xdp_get_umem(struct xdp_umem *umem);
-void xdp_put_umem(struct xdp_umem *umem);
+void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup);
 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr);
 
 #endif /* XDP_UMEM_H_ */
index cfbec39..6250447 100644 (file)
@@ -211,6 +211,14 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len,
        return 0;
 }
 
+static bool xsk_tx_writeable(struct xdp_sock *xs)
+{
+       if (xskq_cons_present_entries(xs->tx) > xs->tx->nentries / 2)
+               return false;
+
+       return true;
+}
+
 static bool xsk_is_bound(struct xdp_sock *xs)
 {
        if (READ_ONCE(xs->state) == XSK_BOUND) {
@@ -296,7 +304,8 @@ void xsk_tx_release(struct xsk_buff_pool *pool)
        rcu_read_lock();
        list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
                __xskq_cons_release(xs->tx);
-               xs->sk.sk_write_space(&xs->sk);
+               if (xsk_tx_writeable(xs))
+                       xs->sk.sk_write_space(&xs->sk);
        }
        rcu_read_unlock();
 }
@@ -411,11 +420,7 @@ static int xsk_generic_xmit(struct sock *sk)
                skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
                skb->destructor = xsk_destruct_skb;
 
-               /* Hinder dev_direct_xmit from freeing the packet and
-                * therefore completing it in the destructor
-                */
-               refcount_inc(&skb->users);
-               err = dev_direct_xmit(skb, xs->queue_id);
+               err = __dev_direct_xmit(skb, xs->queue_id);
                if  (err == NETDEV_TX_BUSY) {
                        /* Tell user-space to retry the send */
                        skb->destructor = sock_wfree;
@@ -429,12 +434,10 @@ static int xsk_generic_xmit(struct sock *sk)
                /* Ignore NET_XMIT_CN as packet might have been sent */
                if (err == NET_XMIT_DROP) {
                        /* SKB completed but not sent */
-                       kfree_skb(skb);
                        err = -EBUSY;
                        goto out;
                }
 
-               consume_skb(skb);
                sent_frame = true;
        }
 
@@ -442,7 +445,8 @@ static int xsk_generic_xmit(struct sock *sk)
 
 out:
        if (sent_frame)
-               sk->sk_write_space(sk);
+               if (xsk_tx_writeable(xs))
+                       sk->sk_write_space(sk);
 
        mutex_unlock(&xs->mutex);
        return err;
@@ -477,11 +481,13 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 static __poll_t xsk_poll(struct file *file, struct socket *sock,
                             struct poll_table_struct *wait)
 {
-       __poll_t mask = datagram_poll(file, sock, wait);
+       __poll_t mask = 0;
        struct sock *sk = sock->sk;
        struct xdp_sock *xs = xdp_sk(sk);
        struct xsk_buff_pool *pool;
 
+       sock_poll_wait(file, sock, wait);
+
        if (unlikely(!xsk_is_bound(xs)))
                return mask;
 
@@ -497,7 +503,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
 
        if (xs->rx && !xskq_prod_is_empty(xs->rx))
                mask |= EPOLLIN | EPOLLRDNORM;
-       if (xs->tx && !xskq_cons_is_full(xs->tx))
+       if (xs->tx && xsk_tx_writeable(xs))
                mask |= EPOLLOUT | EPOLLWRNORM;
 
        return mask;
@@ -1147,7 +1153,7 @@ static void xsk_destruct(struct sock *sk)
                return;
 
        if (!xp_put_pool(xs->pool))
-               xdp_put_umem(xs->umem);
+               xdp_put_umem(xs->umem, !xs->pool);
 
        sk_refcnt_debug_dec(sk);
 }
index 8a3bf4e..d5adeee 100644 (file)
@@ -175,6 +175,7 @@ static int __xp_assign_dev(struct xsk_buff_pool *pool,
 
        if (!pool->dma_pages) {
                WARN(1, "Driver did not DMA map zero-copy buffers");
+               err = -EINVAL;
                goto err_unreg_xsk;
        }
        pool->umem->zc = true;
@@ -185,8 +186,10 @@ err_unreg_xsk:
 err_unreg_pool:
        if (!force_zc)
                err = 0; /* fallback to copy mode */
-       if (err)
+       if (err) {
                xsk_clear_pool_at_qid(netdev, queue_id);
+               dev_put(netdev);
+       }
        return err;
 }
 
@@ -242,7 +245,7 @@ static void xp_release_deferred(struct work_struct *work)
                pool->cq = NULL;
        }
 
-       xdp_put_umem(pool->umem);
+       xdp_put_umem(pool->umem, false);
        xp_destroy(pool);
 }
 
index cdb9cf3..9e71b9f 100644 (file)
@@ -264,6 +264,12 @@ static inline bool xskq_cons_is_full(struct xsk_queue *q)
                q->nentries;
 }
 
+static inline u32 xskq_cons_present_entries(struct xsk_queue *q)
+{
+       /* No barriers needed since data is not accessed */
+       return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer);
+}
+
 /* Functions for producers */
 
 static inline bool xskq_prod_is_full(struct xsk_queue *q)
index e28f0c9..d8e8a11 100644 (file)
@@ -234,6 +234,7 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src)
        case XFRMA_PAD:
                /* Ignore */
                return 0;
+       case XFRMA_UNSPEC:
        case XFRMA_ALG_AUTH:
        case XFRMA_ALG_CRYPT:
        case XFRMA_ALG_COMP:
@@ -387,7 +388,7 @@ static int xfrm_attr_cpy32(void *dst, size_t *pos, const struct nlattr *src,
 
        memcpy(nla, src, nla_attr_size(copy_len));
        nla->nla_len = nla_attr_size(payload);
-       *pos += nla_attr_size(payload);
+       *pos += nla_attr_size(copy_len);
        nlmsg->nlmsg_len += nla->nla_len;
 
        memset(dst + *pos, 0, payload - copy_len);
@@ -563,7 +564,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32,
                return NULL;
 
        len += NLMSG_HDRLEN;
-       h64 = kvmalloc(len, GFP_KERNEL | __GFP_ZERO);
+       h64 = kvmalloc(len, GFP_KERNEL);
        if (!h64)
                return ERR_PTR(-ENOMEM);
 
index a77da7a..2f15178 100644 (file)
@@ -2382,8 +2382,10 @@ int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen)
        if (in_compat_syscall()) {
                struct xfrm_translator *xtr = xfrm_get_translator();
 
-               if (!xtr)
+               if (!xtr) {
+                       kfree(data);
                        return -EOPNOTSUPP;
+               }
 
                err = xtr->xlate_user_policy_sockptr(&data, optlen);
                xfrm_put_translator(xtr);
index c13a5bc..5b9a099 100644 (file)
@@ -21,6 +21,7 @@ static unsigned long my_ip = (unsigned long)schedule;
 asm (
 "      .pushsection    .text, \"ax\", @progbits\n"
 "      .type           my_tramp1, @function\n"
+"      .globl          my_tramp1\n"
 "   my_tramp1:"
 "      pushq %rbp\n"
 "      movq %rsp, %rbp\n"
@@ -29,6 +30,7 @@ asm (
 "      .size           my_tramp1, .-my_tramp1\n"
 "      ret\n"
 "      .type           my_tramp2, @function\n"
+"      .globl          my_tramp2\n"
 "   my_tramp2:"
 "      pushq %rbp\n"
 "      movq %rsp, %rbp\n"
index d5c5022..3f0079c 100644 (file)
@@ -16,6 +16,7 @@ extern void my_tramp(void *);
 asm (
 "      .pushsection    .text, \"ax\", @progbits\n"
 "      .type           my_tramp, @function\n"
+"      .globl          my_tramp\n"
 "   my_tramp:"
 "      pushq %rbp\n"
 "      movq %rsp, %rbp\n"
index 63ca06d..a2729d1 100644 (file)
@@ -14,6 +14,7 @@ extern void my_tramp(void *);
 asm (
 "      .pushsection    .text, \"ax\", @progbits\n"
 "      .type           my_tramp, @function\n"
+"      .globl          my_tramp\n"
 "   my_tramp:"
 "      pushq %rbp\n"
 "      movq %rsp, %rbp\n"
index ae64737..4c058f1 100644 (file)
@@ -252,6 +252,9 @@ objtool_dep = $(objtool_obj)                                        \
 ifdef CONFIG_TRIM_UNUSED_KSYMS
 cmd_gen_ksymdeps = \
        $(CONFIG_SHELL) $(srctree)/scripts/gen_ksymdeps.sh $@ >> $(dot-target).cmd
+
+# List module undefined symbols
+undefined_syms = $(NM) $< | $(AWK) '$$1 == "U" { printf("%s%s", x++ ? " " : "", $$2) }';
 endif
 
 define rule_cc_o_c
@@ -271,13 +274,6 @@ define rule_as_o_S
        $(call cmd,modversions_S)
 endef
 
-# List module undefined symbols (or empty line if not enabled)
-ifdef CONFIG_TRIM_UNUSED_KSYMS
-cmd_undef_syms = $(NM) $< | sed -n 's/^  *U //p' | xargs echo
-else
-cmd_undef_syms = echo
-endif
-
 # Built-in and composite module parts
 $(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_dep) FORCE
        $(call if_changed_rule,cc_o_c)
@@ -285,7 +281,7 @@ $(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_dep) FORCE
 
 cmd_mod = { \
        echo $(if $($*-objs)$($*-y)$($*-m), $(addprefix $(obj)/, $($*-objs) $($*-y) $($*-m)), $(@:.mod=.o)); \
-       $(cmd_undef_syms); \
+       $(undefined_syms) echo; \
        } > $@
 
 $(obj)/%.mod: $(obj)/%.o FORCE
index 95e4cdb..6baee12 100644 (file)
@@ -60,7 +60,6 @@ endif
 #
 ifneq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)
 
-KBUILD_CFLAGS += -Wcast-align
 KBUILD_CFLAGS += -Wdisabled-optimization
 KBUILD_CFLAGS += -Wnested-externs
 KBUILD_CFLAGS += -Wshadow
@@ -80,6 +79,7 @@ endif
 ifneq ($(findstring 3, $(KBUILD_EXTRA_WARN)),)
 
 KBUILD_CFLAGS += -Wbad-function-cast
+KBUILD_CFLAGS += -Wcast-align
 KBUILD_CFLAGS += -Wcast-qual
 KBUILD_CFLAGS += -Wconversion
 KBUILD_CFLAGS += -Wpacked
diff --git a/scripts/lld-version.sh b/scripts/lld-version.sh
new file mode 100755 (executable)
index 0000000..d70edb4
--- /dev/null
@@ -0,0 +1,20 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Usage: $ ./scripts/lld-version.sh ld.lld
+#
+# Print the linker version of `ld.lld' in a 5 or 6-digit form
+# such as `100001' for ld.lld 10.0.1 etc.
+
+linker_string="$($* --version)"
+
+if ! ( echo $linker_string | grep -q LLD ); then
+       echo 0
+       exit 1
+fi
+
+VERSION=$(echo $linker_string | cut -d ' ' -f 2)
+MAJOR=$(echo $VERSION | cut -d . -f 1)
+MINOR=$(echo $VERSION | cut -d . -f 2)
+PATCHLEVEL=$(echo $VERSION | cut -d . -f 3)
+printf "%d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL
index 1b11f89..91a502b 100755 (executable)
@@ -45,6 +45,8 @@ create_package() {
        chmod -R go-w "$pdir"
        # in case we are in a restrictive umask environment like 0077
        chmod -R a+rX "$pdir"
+       # in case we build in a setuid/setgid directory
+       chmod -R ug-s "$pdir"
 
        # Create the package
        dpkg-gencontrol -p$pname -P"$pdir"
index 4373de4..3b44378 100644 (file)
@@ -1539,7 +1539,7 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file,
 
  unlock:
        up_write(&card->controls_rwsem);
-       return 0;
+       return err;
 }
 
 static int snd_ctl_elem_add_user(struct snd_ctl_file *file,
index 0f533f5..9f8c53b 100644 (file)
@@ -123,7 +123,7 @@ copy_resp_to_buf(struct snd_efw *efw, void *data, size_t length, int *rcode)
        t = (struct snd_efw_transaction *)data;
        length = min_t(size_t, be32_to_cpu(t->length) * sizeof(u32), length);
 
-       spin_lock_irq(&efw->lock);
+       spin_lock(&efw->lock);
 
        if (efw->push_ptr < efw->pull_ptr)
                capacity = (unsigned int)(efw->pull_ptr - efw->push_ptr);
@@ -190,7 +190,7 @@ handle_resp_for_user(struct fw_card *card, int generation, int source,
 
        copy_resp_to_buf(efw, data, length, rcode);
 end:
-       spin_unlock_irq(&instances_lock);
+       spin_unlock(&instances_lock);
 }
 
 static void
index bbb1748..8060cc8 100644 (file)
@@ -1364,16 +1364,20 @@ static int try_assign_dacs(struct hda_codec *codec, int num_outs,
                struct nid_path *path;
                hda_nid_t pin = pins[i];
 
-               path = snd_hda_get_path_from_idx(codec, path_idx[i]);
-               if (path) {
-                       badness += assign_out_path_ctls(codec, path);
-                       continue;
+               if (!spec->obey_preferred_dacs) {
+                       path = snd_hda_get_path_from_idx(codec, path_idx[i]);
+                       if (path) {
+                               badness += assign_out_path_ctls(codec, path);
+                               continue;
+                       }
                }
 
                dacs[i] = get_preferred_dac(codec, pin);
                if (dacs[i]) {
                        if (is_dac_already_used(codec, dacs[i]))
                                badness += bad->shared_primary;
+               } else if (spec->obey_preferred_dacs) {
+                       badness += BAD_NO_PRIMARY_DAC;
                }
 
                if (!dacs[i])
index a43f0bb..0886bc8 100644 (file)
@@ -237,6 +237,7 @@ struct hda_gen_spec {
        unsigned int power_down_unused:1; /* power down unused widgets */
        unsigned int dac_min_mute:1; /* minimal = mute for DACs */
        unsigned int suppress_vmaster:1; /* don't create vmaster kctls */
+       unsigned int obey_preferred_dacs:1; /* obey preferred_dacs assignment */
 
        /* other internal flags */
        unsigned int no_analog:1; /* digital I/O only */
index d539f52..6852668 100644 (file)
@@ -2506,6 +2506,9 @@ static const struct pci_device_id azx_ids[] = {
        /* DG1 */
        { PCI_DEVICE(0x8086, 0x490d),
          .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+       /* Alderlake-S */
+       { PCI_DEVICE(0x8086, 0x7ad0),
+         .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
        /* Elkhart Lake */
        { PCI_DEVICE(0x8086, 0x4b55),
          .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
index e0c38f2..d8370a4 100644 (file)
@@ -9183,6 +9183,8 @@ static void ca0132_mmio_init(struct hda_codec *codec)
        case QUIRK_AE5:
                ca0132_mmio_init_ae5(codec);
                break;
+       default:
+               break;
        }
 }
 
index ccd1df0..b0068f8 100644 (file)
@@ -4274,6 +4274,7 @@ HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI",    patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI",    patch_i915_icl_hdmi),
 HDA_CODEC_ENTRY(0x80862812, "Tigerlake HDMI",  patch_i915_tgl_hdmi),
 HDA_CODEC_ENTRY(0x80862814, "DG1 HDMI",        patch_i915_tgl_hdmi),
+HDA_CODEC_ENTRY(0x80862815, "Alderlake HDMI",  patch_i915_tgl_hdmi),
 HDA_CODEC_ENTRY(0x80862816, "Rocketlake HDMI", patch_i915_tgl_hdmi),
 HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI", patch_i915_icl_hdmi),
 HDA_CODEC_ENTRY(0x8086281b, "Elkhartlake HDMI",        patch_i915_icl_hdmi),
index 6899089..8616c56 100644 (file)
@@ -119,6 +119,7 @@ struct alc_spec {
        unsigned int no_shutup_pins:1;
        unsigned int ultra_low_power:1;
        unsigned int has_hs_key:1;
+       unsigned int no_internal_mic_pin:1;
 
        /* for PLL fix */
        hda_nid_t pll_nid;
@@ -445,6 +446,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
                        alc_update_coef_idx(codec, 0x7, 1<<5, 0);
                break;
        case 0x10ec0892:
+       case 0x10ec0897:
                alc_update_coef_idx(codec, 0x7, 1<<5, 0);
                break;
        case 0x10ec0899:
@@ -2522,13 +2524,23 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
        SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
        SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
        SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950),
+       SND_PCI_QUIRK(0x1558, 0x9506, "Clevo P955HQ", ALC1220_FIXUP_CLEVO_P950),
+       SND_PCI_QUIRK(0x1558, 0x950A, "Clevo P955H[PR]", ALC1220_FIXUP_CLEVO_P950),
        SND_PCI_QUIRK(0x1558, 0x95e1, "Clevo P95xER", ALC1220_FIXUP_CLEVO_P950),
        SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950),
+       SND_PCI_QUIRK(0x1558, 0x95e3, "Clevo P955[ER]T", ALC1220_FIXUP_CLEVO_P950),
+       SND_PCI_QUIRK(0x1558, 0x95e4, "Clevo P955ER", ALC1220_FIXUP_CLEVO_P950),
+       SND_PCI_QUIRK(0x1558, 0x95e5, "Clevo P955EE6", ALC1220_FIXUP_CLEVO_P950),
+       SND_PCI_QUIRK(0x1558, 0x95e6, "Clevo P950R[CDF]", ALC1220_FIXUP_CLEVO_P950),
        SND_PCI_QUIRK(0x1558, 0x96e1, "Clevo P960[ER][CDFN]-K", ALC1220_FIXUP_CLEVO_P950),
        SND_PCI_QUIRK(0x1558, 0x97e1, "Clevo P970[ER][CDFN]", ALC1220_FIXUP_CLEVO_P950),
+       SND_PCI_QUIRK(0x1558, 0x97e2, "Clevo P970RC-M", ALC1220_FIXUP_CLEVO_P950),
+       SND_PCI_QUIRK(0x1558, 0x50d3, "Clevo PC50[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
        SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+       SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+       SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
        SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
-       SND_PCI_QUIRK(0x1558, 0x50d3, "Clevo PC50[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+       SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
        SND_PCI_QUIRK(0x1558, 0x70d1, "Clevo PC70[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
        SND_PCI_QUIRK(0x1558, 0x7714, "Clevo X170", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
        SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
@@ -4216,6 +4228,12 @@ static void alc286_fixup_hp_gpio_led(struct hda_codec *codec,
        alc_fixup_hp_gpio_led(codec, action, 0x02, 0x20);
 }
 
+static void alc287_fixup_hp_gpio_led(struct hda_codec *codec,
+                               const struct hda_fixup *fix, int action)
+{
+       alc_fixup_hp_gpio_led(codec, action, 0x10, 0);
+}
+
 /* turn on/off mic-mute LED per capture hook via VREF change */
 static int vref_micmute_led_set(struct led_classdev *led_cdev,
                                enum led_brightness brightness)
@@ -4507,6 +4525,7 @@ static const struct coef_fw alc225_pre_hsmode[] = {
 
 static void alc_headset_mode_unplugged(struct hda_codec *codec)
 {
+       struct alc_spec *spec = codec->spec;
        static const struct coef_fw coef0255[] = {
                WRITE_COEF(0x1b, 0x0c0b), /* LDO and MISC control */
                WRITE_COEF(0x45, 0xd089), /* UAJ function set to menual mode */
@@ -4581,6 +4600,11 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec)
                {}
        };
 
+       if (spec->no_internal_mic_pin) {
+               alc_update_coef_idx(codec, 0x45, 0xf<<12 | 1<<10, 5<<12);
+               return;
+       }
+
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
                alc_process_coef_fw(codec, coef0255);
@@ -5147,6 +5171,11 @@ static void alc_determine_headset_type(struct hda_codec *codec)
                {}
        };
 
+       if (spec->no_internal_mic_pin) {
+               alc_update_coef_idx(codec, 0x45, 0xf<<12 | 1<<10, 5<<12);
+               return;
+       }
+
        switch (codec->core.vendor_id) {
        case 0x10ec0255:
                alc_process_coef_fw(codec, coef0255);
@@ -5998,6 +6027,21 @@ static void alc274_fixup_bind_dacs(struct hda_codec *codec,
        codec->power_save_node = 0;
 }
 
+/* avoid DAC 0x06 for bass speaker 0x17; it has no volume control */
+static void alc289_fixup_asus_ga401(struct hda_codec *codec,
+                                   const struct hda_fixup *fix, int action)
+{
+       static const hda_nid_t preferred_pairs[] = {
+               0x14, 0x02, 0x17, 0x02, 0x21, 0x03, 0
+       };
+       struct alc_spec *spec = codec->spec;
+
+       if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+               spec->gen.preferred_dacs = preferred_pairs;
+               spec->gen.obey_preferred_dacs = 1;
+       }
+}
+
 /* The DAC of NID 0x3 will introduce click/pop noise on headphones, so invalidate it */
 static void alc285_fixup_invalidate_dacs(struct hda_codec *codec,
                              const struct hda_fixup *fix, int action)
@@ -6105,6 +6149,23 @@ static void alc274_fixup_hp_headset_mic(struct hda_codec *codec,
        }
 }
 
+static void alc_fixup_no_int_mic(struct hda_codec *codec,
+                                   const struct hda_fixup *fix, int action)
+{
+       struct alc_spec *spec = codec->spec;
+
+       switch (action) {
+       case HDA_FIXUP_ACT_PRE_PROBE:
+               /* Mic RING SLEEVE swap for combo jack */
+               alc_update_coef_idx(codec, 0x45, 0xf<<12 | 1<<10, 5<<12);
+               spec->no_internal_mic_pin = true;
+               break;
+       case HDA_FIXUP_ACT_INIT:
+               alc_combo_jack_hp_jd_restart(codec);
+               break;
+       }
+}
+
 /* for hda_fixup_thinkpad_acpi() */
 #include "thinkpad_helper.c"
 
@@ -6301,6 +6362,10 @@ enum {
        ALC274_FIXUP_HP_MIC,
        ALC274_FIXUP_HP_HEADSET_MIC,
        ALC256_FIXUP_ASUS_HPE,
+       ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK,
+       ALC287_FIXUP_HP_GPIO_LED,
+       ALC256_FIXUP_HP_HEADSET_MIC,
+       ALC236_FIXUP_DELL_AIO_HEADSET_MIC,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -7550,11 +7615,10 @@ static const struct hda_fixup alc269_fixups[] = {
                .chain_id = ALC269_FIXUP_HEADSET_MIC
        },
        [ALC289_FIXUP_ASUS_GA401] = {
-               .type = HDA_FIXUP_PINS,
-               .v.pins = (const struct hda_pintbl[]) {
-                       { 0x19, 0x03a11020 }, /* headset mic with jack detect */
-                       { }
-               },
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc289_fixup_asus_ga401,
+               .chained = true,
+               .chain_id = ALC289_FIXUP_ASUS_GA502,
        },
        [ALC289_FIXUP_ASUS_GA502] = {
                .type = HDA_FIXUP_PINS,
@@ -7678,7 +7742,7 @@ static const struct hda_fixup alc269_fixups[] = {
                        { }
                },
                .chained = true,
-               .chain_id = ALC289_FIXUP_ASUS_GA401
+               .chain_id = ALC289_FIXUP_ASUS_GA502
        },
        [ALC274_FIXUP_HP_MIC] = {
                .type = HDA_FIXUP_VERBS,
@@ -7705,6 +7769,26 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC
        },
+       [ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc_fixup_headset_jack,
+               .chained = true,
+               .chain_id = ALC269_FIXUP_THINKPAD_ACPI
+       },
+       [ALC287_FIXUP_HP_GPIO_LED] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc287_fixup_hp_gpio_led,
+       },
+       [ALC256_FIXUP_HP_HEADSET_MIC] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc274_fixup_hp_headset_mic,
+       },
+       [ALC236_FIXUP_DELL_AIO_HEADSET_MIC] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc_fixup_no_int_mic,
+               .chained = true,
+               .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -7782,6 +7866,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x097d, "Dell Precision", ALC289_FIXUP_DUAL_SPK),
        SND_PCI_QUIRK(0x1028, 0x098d, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x09bf, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1028, 0x0a2e, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC),
        SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -7848,6 +7934,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x820d, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3),
        SND_PCI_QUIRK(0x103c, 0x8256, "HP", ALC221_FIXUP_HP_FRONT_MIC),
        SND_PCI_QUIRK(0x103c, 0x827e, "HP x360", ALC295_FIXUP_HP_X360),
+       SND_PCI_QUIRK(0x103c, 0x827f, "HP x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
        SND_PCI_QUIRK(0x103c, 0x82bf, "HP G3 mini", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x82c0, "HP G3 mini premium", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
@@ -7859,6 +7946,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x8760, "HP", ALC285_FIXUP_HP_MUTE_LED),
        SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED),
        SND_PCI_QUIRK(0x103c, 0x877d, "HP", ALC236_FIXUP_HP_MUTE_LED),
+       SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
        SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
        SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -7924,11 +8013,49 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1558, 0x1323, "Clevo N130ZU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x1325, "System76 Darter Pro (darp5)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x1401, "Clevo L140[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x1403, "Clevo N140CU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x1404, "Clevo N150CU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x14a1, "Clevo L141MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x4018, "Clevo NV40M[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x4019, "Clevo NV40MZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x4020, "Clevo NV40MB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x40a1, "Clevo NL40GU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x40c1, "Clevo NL40[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x40d1, "Clevo NL41DU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x50a3, "Clevo NJ51GU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x50b3, "Clevo NK50S[BEZ]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x50b6, "Clevo NK50S5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x50b8, "Clevo NK50SZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x50d5, "Clevo NP50D5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x50f0, "Clevo NH50A[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x50f3, "Clevo NH58DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x5101, "Clevo S510WU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x5157, "Clevo W517GU1", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x51a1, "Clevo NS50MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x8228, "Clevo NR40BU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x8520, "Clevo NH50D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x8521, "Clevo NH77D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x8535, "Clevo NH50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x8536, "Clevo NH79D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x8550, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x8551, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x8560, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x1558, 0x8561, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x8a20, "Clevo NH55DCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x8a51, "Clevo NH70RCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x8d50, "Clevo NH55RCQ-M", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x951d, "Clevo N950T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x961d, "Clevo N960S[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x971d, "Clevo N970T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0xa500, "Clevo NL53RU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS),
        SND_PCI_QUIRK(0x17aa, 0x1048, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
@@ -7966,6 +8093,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x225d, "Thinkpad T480", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x17aa, 0x2292, "Thinkpad X1 Carbon 7th", ALC285_FIXUP_THINKPAD_HEADSET_JACK),
        SND_PCI_QUIRK(0x17aa, 0x22be, "Thinkpad X1 Carbon 8th", ALC285_FIXUP_THINKPAD_HEADSET_JACK),
+       SND_PCI_QUIRK(0x17aa, 0x22c1, "Thinkpad P1 Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK),
+       SND_PCI_QUIRK(0x17aa, 0x22c2, "Thinkpad X1 Extreme Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK),
        SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
@@ -8278,6 +8407,12 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x19, 0x02a11020},
                {0x1a, 0x02a11030},
                {0x21, 0x0221101f}),
+       SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC,
+               {0x21, 0x02211010}),
+       SND_HDA_PIN_QUIRK(0x10ec0236, 0x103c, "HP", ALC256_FIXUP_HP_HEADSET_MIC,
+               {0x14, 0x90170110},
+               {0x19, 0x02a11020},
+               {0x21, 0x02211030}),
        SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE,
                {0x14, 0x90170110},
                {0x21, 0x02211020}),
@@ -8380,6 +8515,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x1a, 0x90a70130},
                {0x1b, 0x90170110},
                {0x21, 0x03211020}),
+       SND_HDA_PIN_QUIRK(0x10ec0256, 0x103c, "HP", ALC256_FIXUP_HP_HEADSET_MIC,
+               {0x14, 0x90170110},
+               {0x19, 0x02a11020},
+               {0x21, 0x0221101f}),
        SND_HDA_PIN_QUIRK(0x10ec0274, 0x103c, "HP", ALC274_FIXUP_HP_HEADSET_MIC,
                {0x17, 0x90170110},
                {0x19, 0x03a11030},
@@ -8502,6 +8641,9 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
        SND_HDA_PIN_QUIRK(0x10ec0293, 0x1028, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE,
                ALC292_STANDARD_PINS,
                {0x13, 0x90a60140}),
+       SND_HDA_PIN_QUIRK(0x10ec0294, 0x1043, "ASUS", ALC294_FIXUP_ASUS_HPE,
+               {0x17, 0x90170110},
+               {0x21, 0x04211020}),
        SND_HDA_PIN_QUIRK(0x10ec0294, 0x1043, "ASUS", ALC294_FIXUP_ASUS_MIC,
                {0x14, 0x90170110},
                {0x1b, 0x90a70130},
@@ -10088,6 +10230,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
        HDA_CODEC_ENTRY(0x10ec0888, "ALC888", patch_alc882),
        HDA_CODEC_ENTRY(0x10ec0889, "ALC889", patch_alc882),
        HDA_CODEC_ENTRY(0x10ec0892, "ALC892", patch_alc662),
+       HDA_CODEC_ENTRY(0x10ec0897, "ALC897", patch_alc662),
        HDA_CODEC_ENTRY(0x10ec0899, "ALC898", patch_alc882),
        HDA_CODEC_ENTRY(0x10ec0900, "ALC1150", patch_alc882),
        HDA_CODEC_ENTRY(0x10ec0b00, "ALCS1200A", patch_alc882),
index 0bdd33b..fb8895a 100644 (file)
@@ -70,7 +70,6 @@ static int get_msg(struct mixart_mgr *mgr, struct mixart_msg *resp,
        unsigned int i;
 #endif
 
-       mutex_lock(&mgr->msg_lock);
        err = 0;
 
        /* copy message descriptor from miXart to driver */
@@ -119,8 +118,6 @@ static int get_msg(struct mixart_mgr *mgr, struct mixart_msg *resp,
        writel_be(headptr, MIXART_MEM(mgr, MSG_OUTBOUND_FREE_HEAD));
 
  _clean_exit:
-       mutex_unlock(&mgr->msg_lock);
-
        return err;
 }
 
@@ -258,7 +255,9 @@ int snd_mixart_send_msg(struct mixart_mgr *mgr, struct mixart_msg *request, int
        resp.data = resp_data;
        resp.size = max_resp_size;
 
+       mutex_lock(&mgr->msg_lock);
        err = get_msg(mgr, &resp, msg_frame);
+       mutex_unlock(&mgr->msg_lock);
 
        if( request->message_id != resp.message_id )
                dev_err(&mgr->pci->dev, "RESPONSE ERROR!\n");
index 25fe2dd..3db0729 100644 (file)
 #include <sound/soc-dapm.h>
 #include <sound/soc.h>
 #include <sound/tlv.h>
+#include <sound/rt1015.h>
 
 #include "rl6231.h"
 #include "rt1015.h"
 
+static const struct rt1015_platform_data i2s_default_platform_data = {
+       .power_up_delay_ms = 50,
+};
+
 static const struct reg_default rt1015_reg[] = {
        { 0x0000, 0x0000 },
        { 0x0004, 0xa000 },
@@ -539,7 +544,7 @@ static void rt1015_flush_work(struct work_struct *work)
        struct rt1015_priv *rt1015 = container_of(work, struct rt1015_priv,
                                                flush_work.work);
        struct snd_soc_component *component = rt1015->component;
-       unsigned int val, i = 0, count = 20;
+       unsigned int val, i = 0, count = 200;
 
        while (i < count) {
                usleep_range(1000, 1500);
@@ -650,6 +655,7 @@ static int rt1015_amp_drv_event(struct snd_soc_dapm_widget *w,
        case SND_SOC_DAPM_POST_PMU:
                if (rt1015->hw_config == RT1015_HW_28)
                        schedule_delayed_work(&rt1015->flush_work, msecs_to_jiffies(10));
+               msleep(rt1015->pdata.power_up_delay_ms);
                break;
        default:
                break;
@@ -1067,9 +1073,16 @@ static struct acpi_device_id rt1015_acpi_match[] = {
 MODULE_DEVICE_TABLE(acpi, rt1015_acpi_match);
 #endif
 
+static void rt1015_parse_dt(struct rt1015_priv *rt1015, struct device *dev)
+{
+       device_property_read_u32(dev, "realtek,power-up-delay-ms",
+               &rt1015->pdata.power_up_delay_ms);
+}
+
 static int rt1015_i2c_probe(struct i2c_client *i2c,
        const struct i2c_device_id *id)
 {
+       struct rt1015_platform_data *pdata = dev_get_platdata(&i2c->dev);
        struct rt1015_priv *rt1015;
        int ret;
        unsigned int val;
@@ -1081,6 +1094,13 @@ static int rt1015_i2c_probe(struct i2c_client *i2c,
 
        i2c_set_clientdata(i2c, rt1015);
 
+       rt1015->pdata = i2s_default_platform_data;
+
+       if (pdata)
+               rt1015->pdata = *pdata;
+       else
+               rt1015_parse_dt(rt1015, &i2c->dev);
+
        rt1015->regmap = devm_regmap_init_i2c(i2c, &rt1015_regmap);
        if (IS_ERR(rt1015->regmap)) {
                ret = PTR_ERR(rt1015->regmap);
index d3fdd30..15cadb3 100644 (file)
@@ -12,6 +12,7 @@
 
 #ifndef __RT1015_H__
 #define __RT1015_H__
+#include <sound/rt1015.h>
 
 #define RT1015_DEVICE_ID_VAL                   0x1011
 #define RT1015_DEVICE_ID_VAL2                  0x1015
@@ -380,6 +381,7 @@ enum {
 
 struct rt1015_priv {
        struct snd_soc_component *component;
+       struct rt1015_platform_data pdata;
        struct regmap *regmap;
        int sysclk;
        int sysclk_src;
index a9acce7..d987817 100644 (file)
@@ -43,6 +43,7 @@ static const struct reg_sequence patch_list[] = {
        {RT5682_DAC_ADC_DIG_VOL1, 0xa020},
        {RT5682_I2C_CTRL, 0x000f},
        {RT5682_PLL2_INTERNAL, 0x8266},
+       {RT5682_SAR_IL_CMD_3, 0x8365},
 };
 
 void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev)
index bcf18bf..e61d004 100644 (file)
@@ -1937,6 +1937,7 @@ static int wm_adsp_load(struct wm_adsp *dsp)
                        mem = wm_adsp_find_region(dsp, type);
                        if (!mem) {
                                adsp_err(dsp, "No region of type: %x\n", type);
+                               ret = -EINVAL;
                                goto out_fw;
                        }
 
index 9dadf65..f790514 100644 (file)
@@ -520,10 +520,10 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = {
                .driver_data = (void *)(BYT_RT5640_IN1_MAP |
                                        BYT_RT5640_MCLK_EN),
        },
-       {       /* HP Pavilion x2 10-n000nd */
+       {       /* HP Pavilion x2 10-k0XX, 10-n0XX */
                .matches = {
-                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
-                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"),
+                       DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"),
                },
                .driver_data = (void *)(BYT_RT5640_DMIC1_MAP |
                                        BYT_RT5640_JD_SRC_JD2_IN4N |
@@ -532,6 +532,17 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = {
                                        BYT_RT5640_SSP0_AIF1 |
                                        BYT_RT5640_MCLK_EN),
        },
+       {       /* HP Pavilion x2 10-p0XX */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "HP x2 Detachable 10-p0XX"),
+               },
+               .driver_data = (void *)(BYT_RT5640_DMIC1_MAP |
+                                       BYT_RT5640_JD_SRC_JD1_IN4P |
+                                       BYT_RT5640_OVCD_TH_1500UA |
+                                       BYT_RT5640_OVCD_SF_0P75 |
+                                       BYT_RT5640_MCLK_EN),
+       },
        {       /* HP Stream 7 */
                .matches = {
                        DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
index 922cd01..f95546c 100644 (file)
@@ -700,6 +700,8 @@ static int kabylake_set_bias_level(struct snd_soc_card *card,
        switch (level) {
        case SND_SOC_BIAS_PREPARE:
                if (dapm->bias_level == SND_SOC_BIAS_ON) {
+                       if (!__clk_is_enabled(priv->mclk))
+                               return 0;
                        dev_dbg(card->dev, "Disable mclk");
                        clk_disable_unprepare(priv->mclk);
                } else {
index ba653eb..408e64e 100644 (file)
@@ -458,10 +458,6 @@ static int catpt_dai_prepare(struct snd_pcm_substream *substream,
        if (ret)
                return CATPT_IPC_ERROR(ret);
 
-       ret = catpt_dsp_update_lpclock(cdev);
-       if (ret)
-               return ret;
-
        ret = catpt_dai_apply_usettings(dai, stream);
        if (ret)
                return ret;
@@ -500,6 +496,7 @@ static int catpt_dai_trigger(struct snd_pcm_substream *substream, int cmd,
        case SNDRV_PCM_TRIGGER_RESUME:
        case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
        resume_stream:
+               catpt_dsp_update_lpclock(cdev);
                ret = catpt_ipc_resume_stream(cdev, stream->info.stream_hw_id);
                if (ret)
                        return CATPT_IPC_ERROR(ret);
@@ -507,11 +504,11 @@ static int catpt_dai_trigger(struct snd_pcm_substream *substream, int cmd,
 
        case SNDRV_PCM_TRIGGER_STOP:
                stream->prepared = false;
-               catpt_dsp_update_lpclock(cdev);
                fallthrough;
        case SNDRV_PCM_TRIGGER_SUSPEND:
        case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
                ret = catpt_ipc_pause_stream(cdev, stream->info.stream_hw_id);
+               catpt_dsp_update_lpclock(cdev);
                if (ret)
                        return CATPT_IPC_ERROR(ret);
                break;
@@ -534,6 +531,8 @@ void catpt_stream_update_position(struct catpt_dev *cdev,
 
        dsppos = bytes_to_frames(r, pos->stream_position);
 
+       if (!stream->prepared)
+               goto exit;
        /* only offload is set_write_pos driven */
        if (stream->template->type != CATPT_STRM_TYPE_RENDER)
                goto exit;
index f54b710..291a686 100644 (file)
@@ -487,9 +487,9 @@ static int kmb_dai_hw_params(struct snd_pcm_substream *substream,
                kmb_i2s->xfer_resolution = 0x02;
                break;
        case SNDRV_PCM_FORMAT_S24_LE:
-               config->data_width = 24;
-               kmb_i2s->ccr = 0x08;
-               kmb_i2s->xfer_resolution = 0x04;
+               config->data_width = 32;
+               kmb_i2s->ccr = 0x14;
+               kmb_i2s->xfer_resolution = 0x05;
                break;
        case SNDRV_PCM_FORMAT_S32_LE:
                config->data_width = 32;
index 9d17c87..426235a 100644 (file)
@@ -263,28 +263,6 @@ static int lpass_cpu_daiops_hw_params(struct snd_pcm_substream *substream,
        return 0;
 }
 
-static int lpass_cpu_daiops_prepare(struct snd_pcm_substream *substream,
-               struct snd_soc_dai *dai)
-{
-       struct lpass_data *drvdata = snd_soc_dai_get_drvdata(dai);
-       struct lpaif_i2sctl *i2sctl = drvdata->i2sctl;
-       unsigned int id = dai->driver->id;
-       int ret;
-
-       if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
-               ret = regmap_fields_write(i2sctl->spken, id,
-                                        LPAIF_I2SCTL_SPKEN_ENABLE);
-       } else {
-               ret = regmap_fields_write(i2sctl->micen, id,
-                                        LPAIF_I2SCTL_MICEN_ENABLE);
-       }
-
-       if (ret)
-               dev_err(dai->dev, "error writing to i2sctl enable: %d\n", ret);
-
-       return ret;
-}
-
 static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream,
                int cmd, struct snd_soc_dai *dai)
 {
@@ -292,6 +270,18 @@ static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream,
        struct lpaif_i2sctl *i2sctl = drvdata->i2sctl;
        unsigned int id = dai->driver->id;
        int ret = -EINVAL;
+       unsigned int val = 0;
+
+       ret = regmap_read(drvdata->lpaif_map,
+                               LPAIF_I2SCTL_REG(drvdata->variant, dai->driver->id), &val);
+       if (ret) {
+               dev_err(dai->dev, "error reading from i2sctl reg: %d\n", ret);
+               return ret;
+       }
+       if (val == LPAIF_I2SCTL_RESET_STATE) {
+               dev_err(dai->dev, "error in i2sctl register state\n");
+               return -ENOTRECOVERABLE;
+       }
 
        switch (cmd) {
        case SNDRV_PCM_TRIGGER_START:
@@ -308,11 +298,14 @@ static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream,
                        dev_err(dai->dev, "error writing to i2sctl reg: %d\n",
                                ret);
 
-               ret = clk_enable(drvdata->mi2s_bit_clk[id]);
-               if (ret) {
-                       dev_err(dai->dev, "error in enabling mi2s bit clk: %d\n", ret);
-                       clk_disable(drvdata->mi2s_osr_clk[id]);
-                       return ret;
+               if (drvdata->bit_clk_state[id] == LPAIF_BIT_CLK_DISABLE) {
+                       ret = clk_enable(drvdata->mi2s_bit_clk[id]);
+                       if (ret) {
+                               dev_err(dai->dev, "error in enabling mi2s bit clk: %d\n", ret);
+                               clk_disable(drvdata->mi2s_osr_clk[id]);
+                               return ret;
+                       }
+                       drvdata->bit_clk_state[id] = LPAIF_BIT_CLK_ENABLE;
                }
 
                break;
@@ -329,7 +322,10 @@ static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream,
                if (ret)
                        dev_err(dai->dev, "error writing to i2sctl reg: %d\n",
                                ret);
-               clk_disable(drvdata->mi2s_bit_clk[dai->driver->id]);
+               if (drvdata->bit_clk_state[id] == LPAIF_BIT_CLK_ENABLE) {
+                       clk_disable(drvdata->mi2s_bit_clk[dai->driver->id]);
+                       drvdata->bit_clk_state[id] = LPAIF_BIT_CLK_DISABLE;
+               }
                break;
        }
 
@@ -341,7 +337,6 @@ const struct snd_soc_dai_ops asoc_qcom_lpass_cpu_dai_ops = {
        .startup        = lpass_cpu_daiops_startup,
        .shutdown       = lpass_cpu_daiops_shutdown,
        .hw_params      = lpass_cpu_daiops_hw_params,
-       .prepare        = lpass_cpu_daiops_prepare,
        .trigger        = lpass_cpu_daiops_trigger,
 };
 EXPORT_SYMBOL_GPL(asoc_qcom_lpass_cpu_dai_ops);
@@ -459,16 +454,20 @@ static bool lpass_cpu_regmap_volatile(struct device *dev, unsigned int reg)
        struct lpass_variant *v = drvdata->variant;
        int i;
 
+       for (i = 0; i < v->i2s_ports; ++i)
+               if (reg == LPAIF_I2SCTL_REG(v, i))
+                       return true;
        for (i = 0; i < v->irq_ports; ++i)
                if (reg == LPAIF_IRQSTAT_REG(v, i))
                        return true;
 
        for (i = 0; i < v->rdma_channels; ++i)
-               if (reg == LPAIF_RDMACURR_REG(v, i))
+               if (reg == LPAIF_RDMACURR_REG(v, i) || reg == LPAIF_RDMACTL_REG(v, i))
                        return true;
 
        for (i = 0; i < v->wrdma_channels; ++i)
-               if (reg == LPAIF_WRDMACURR_REG(v, i + v->wrdma_channel_start))
+               if (reg == LPAIF_WRDMACURR_REG(v, i + v->wrdma_channel_start) ||
+                       reg == LPAIF_WRDMACTL_REG(v, i + v->wrdma_channel_start))
                        return true;
 
        return false;
@@ -861,6 +860,7 @@ int asoc_qcom_lpass_cpu_platform_probe(struct platform_device *pdev)
                                PTR_ERR(drvdata->mi2s_bit_clk[dai_id]));
                        return PTR_ERR(drvdata->mi2s_bit_clk[dai_id]);
                }
+               drvdata->bit_clk_state[dai_id] = LPAIF_BIT_CLK_DISABLE;
        }
 
        /* Allocation for i2sctl regmap fields */
index 08f3fe5..4055428 100644 (file)
 #define LPAIF_I2SCTL_BITWIDTH_24       1
 #define LPAIF_I2SCTL_BITWIDTH_32       2
 
+#define LPAIF_BIT_CLK_DISABLE          0
+#define LPAIF_BIT_CLK_ENABLE           1
+
+#define LPAIF_I2SCTL_RESET_STATE       0x003C0004
+#define LPAIF_DMACTL_RESET_STATE       0x00200000
+
+
 /* LPAIF IRQ */
 #define LPAIF_IRQ_REG_ADDR(v, addr, port) \
        (v->irq_reg_base + (addr) + v->irq_reg_stride * (port))
index 36d1512..80b09de 100644 (file)
@@ -110,6 +110,7 @@ static int lpass_platform_pcmops_open(struct snd_soc_component *component,
        struct regmap *map;
        unsigned int dai_id = cpu_dai->driver->id;
 
+       component->id = dai_id;
        data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (!data)
                return -ENOMEM;
@@ -122,8 +123,10 @@ static int lpass_platform_pcmops_open(struct snd_soc_component *component,
        else
                dma_ch = 0;
 
-       if (dma_ch < 0)
+       if (dma_ch < 0) {
+               kfree(data);
                return dma_ch;
+       }
 
        if (cpu_dai->driver->id == LPASS_DP_RX) {
                map = drvdata->hdmiif_map;
@@ -147,6 +150,7 @@ static int lpass_platform_pcmops_open(struct snd_soc_component *component,
        ret = snd_pcm_hw_constraint_integer(runtime,
                        SNDRV_PCM_HW_PARAM_PERIODS);
        if (ret < 0) {
+               kfree(data);
                dev_err(soc_runtime->dev, "setting constraints failed: %d\n",
                        ret);
                return -EINVAL;
@@ -448,19 +452,34 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component,
        unsigned int reg_irqclr = 0, val_irqclr = 0;
        unsigned int  reg_irqen = 0, val_irqen = 0, val_mask = 0;
        unsigned int dai_id = cpu_dai->driver->id;
+       unsigned int dma_ctrl_reg = 0;
 
        ch = pcm_data->dma_ch;
        if (dir ==  SNDRV_PCM_STREAM_PLAYBACK) {
                id = pcm_data->dma_ch;
-               if (dai_id == LPASS_DP_RX)
+               if (dai_id == LPASS_DP_RX) {
                        dmactl = drvdata->hdmi_rd_dmactl;
-               else
+                       map = drvdata->hdmiif_map;
+               } else {
                        dmactl = drvdata->rd_dmactl;
+                       map = drvdata->lpaif_map;
+               }
        } else {
                dmactl = drvdata->wr_dmactl;
                id = pcm_data->dma_ch - v->wrdma_channel_start;
+               map = drvdata->lpaif_map;
+       }
+       ret = regmap_read(map, LPAIF_DMACTL_REG(v, ch, dir, dai_id), &dma_ctrl_reg);
+       if (ret) {
+               dev_err(soc_runtime->dev, "error reading from rdmactl reg: %d\n", ret);
+               return ret;
        }
 
+       if (dma_ctrl_reg == LPAIF_DMACTL_RESET_STATE ||
+               dma_ctrl_reg == LPAIF_DMACTL_RESET_STATE + 1) {
+               dev_err(soc_runtime->dev, "error in rdmactl register state\n");
+               return -ENOTRECOVERABLE;
+       }
        switch (cmd) {
        case SNDRV_PCM_TRIGGER_START:
        case SNDRV_PCM_TRIGGER_RESUME:
index b4830f3..bccd1a0 100644 (file)
@@ -68,6 +68,7 @@ struct lpass_data {
        unsigned int mi2s_playback_sd_mode[LPASS_MAX_MI2S_PORTS];
        unsigned int mi2s_capture_sd_mode[LPASS_MAX_MI2S_PORTS];
        int hdmi_port_enable;
+       int bit_clk_state[LPASS_MAX_MI2S_PORTS];
 
        /* low-power audio interface (LPAIF) registers */
        void __iomem *lpaif;
index fa764b6..4457214 100644 (file)
@@ -379,6 +379,10 @@ static const struct usb_audio_device_name usb_audio_names[] = {
 
        DEVICE_NAME(0x046d, 0x0990, "Logitech, Inc.", "QuickCam Pro 9000"),
 
+       /* ASUS ROG Strix */
+       PROFILE_NAME(0x0b05, 0x1917,
+                    "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"),
+
        /* Dell WD15 Dock */
        PROFILE_NAME(0x0bda, 0x4014, "Dell", "WD15 Dock", "Dell-WD15-Dock"),
        /* Dell WD19 Dock */
index c369c81..a7212f1 100644 (file)
@@ -561,7 +561,8 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
        },
        {       /* ASUS ROG Strix */
                .id = USB_ID(0x0b05, 0x1917),
-               .map = asus_rog_map,
+               .map = trx40_mobo_map,
+               .connector_map = trx40_mobo_connector_map,
        },
        {       /* MSI TRX40 Creator */
                .id = USB_ID(0x0db0, 0x0d64),
index 92b1a6d..bd63a9c 100644 (file)
@@ -607,7 +607,7 @@ static int snd_us16x08_eq_put(struct snd_kcontrol *kcontrol,
 static int snd_us16x08_meter_info(struct snd_kcontrol *kcontrol,
        struct snd_ctl_elem_info *uinfo)
 {
-       uinfo->count = 1;
+       uinfo->count = 34;
        uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
        uinfo->value.integer.max = 0x7FFF;
        uinfo->value.integer.min = 0;
index c989ad8..c50be2f 100644 (file)
@@ -1672,13 +1672,13 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
            && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
                msleep(20);
 
-       /* Zoom R16/24, Logitech H650e/H570e, Jabra 550a, Kingston HyperX
-        *  needs a tiny delay here, otherwise requests like get/set
-        *  frequency return as failed despite actually succeeding.
+       /* Zoom R16/24, many Logitech(at least H650e/H570e/BCC950),
+        * Jabra 550a, Kingston HyperX needs a tiny delay here,
+        * otherwise requests like get/set frequency return
+        * as failed despite actually succeeding.
         */
        if ((chip->usb_id == USB_ID(0x1686, 0x00dd) ||
-            chip->usb_id == USB_ID(0x046d, 0x0a46) ||
-            chip->usb_id == USB_ID(0x046d, 0x0a56) ||
+            USB_ID_VENDOR(chip->usb_id) == 0x046d  || /* Logitech */
             chip->usb_id == USB_ID(0x0b0e, 0x0349) ||
             chip->usb_id == USB_ID(0x0951, 0x16ad)) &&
            (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
index 568854b..52c6262 100644 (file)
@@ -201,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn)
        return insn_offset_displacement(insn) + insn->displacement.nbytes;
 }
 
+/**
+ * for_each_insn_prefix() -- Iterate prefixes in the instruction
+ * @insn: Pointer to struct insn.
+ * @idx:  Index storage.
+ * @prefix: Prefix byte.
+ *
+ * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
+ * and the index is stored in @idx (note that this @idx is just for a cursor,
+ * do not change it.)
+ * Since prefixes.nbytes can be bigger than 4 if some prefixes
+ * are repeated, it cannot be used for looping over the prefixes.
+ */
+#define for_each_insn_prefix(insn, idx, prefix)        \
+       for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+
 #define POP_SS_OPCODE 0x1f
 #define MOV_SREG_OPCODE 0x8e
 
index 0b5b8ae..1e299ac 100644 (file)
@@ -16,8 +16,6 @@
  * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
  */
 
-.weak memcpy
-
 /*
  * memcpy - Copy a memory block.
  *
@@ -30,7 +28,7 @@
  * rax original destination
  */
 SYM_FUNC_START_ALIAS(__memcpy)
-SYM_FUNC_START_LOCAL(memcpy)
+SYM_FUNC_START_WEAK(memcpy)
        ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
                      "jmp memcpy_erms", X86_FEATURE_ERMS
 
@@ -51,14 +49,14 @@ EXPORT_SYMBOL(__memcpy)
  * memcpy_erms() - enhanced fast string memcpy. This is faster and
  * simpler than memcpy. Use memcpy_erms when possible.
  */
-SYM_FUNC_START(memcpy_erms)
+SYM_FUNC_START_LOCAL(memcpy_erms)
        movq %rdi, %rax
        movq %rdx, %rcx
        rep movsb
        ret
 SYM_FUNC_END(memcpy_erms)
 
-SYM_FUNC_START(memcpy_orig)
+SYM_FUNC_START_LOCAL(memcpy_orig)
        movq %rdi, %rax
 
        cmpq $0x20, %rdx
index fd5d25a..0bfd26e 100644 (file)
@@ -4,8 +4,7 @@
 #include <linux/linkage.h>
 #include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
-
-.weak memset
+#include <asm/export.h>
 
 /*
  * ISO C memset - set a memory block to a byte value. This function uses fast
@@ -18,7 +17,7 @@
  *
  * rax   original destination
  */
-SYM_FUNC_START_ALIAS(memset)
+SYM_FUNC_START_WEAK(memset)
 SYM_FUNC_START(__memset)
        /*
         * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
@@ -44,6 +43,8 @@ SYM_FUNC_START(__memset)
        ret
 SYM_FUNC_END(__memset)
 SYM_FUNC_END_ALIAS(memset)
+EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL(__memset)
 
 /*
  * ISO C memset - set a memory block to a byte value. This function uses
@@ -56,7 +57,7 @@ SYM_FUNC_END_ALIAS(memset)
  *
  * rax   original destination
  */
-SYM_FUNC_START(memset_erms)
+SYM_FUNC_START_LOCAL(memset_erms)
        movq %rdi,%r9
        movb %sil,%al
        movq %rdx,%rcx
@@ -65,7 +66,7 @@ SYM_FUNC_START(memset_erms)
        ret
 SYM_FUNC_END(memset_erms)
 
-SYM_FUNC_START(memset_orig)
+SYM_FUNC_START_LOCAL(memset_orig)
        movq %rdi,%r10
 
        /* expand byte value  */
index eb92027..7362bef 100644 (file)
@@ -10,6 +10,7 @@
 #include <unistd.h>
 #include <string.h>
 #include <errno.h>
+#include <endian.h>
 
 #include <linux/kernel.h>
 #include <linux/bootconfig.h>
@@ -147,6 +148,12 @@ static int load_xbc_file(const char *path, char **buf)
        return ret;
 }
 
+static int pr_errno(const char *msg, int err)
+{
+       pr_err("%s: %d\n", msg, err);
+       return err;
+}
+
 static int load_xbc_from_initrd(int fd, char **buf)
 {
        struct stat stat;
@@ -162,26 +169,26 @@ static int load_xbc_from_initrd(int fd, char **buf)
        if (stat.st_size < 8 + BOOTCONFIG_MAGIC_LEN)
                return 0;
 
-       if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) {
-               pr_err("Failed to lseek: %d\n", -errno);
-               return -errno;
-       }
+       if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0)
+               return pr_errno("Failed to lseek for magic", -errno);
+
        if (read(fd, magic, BOOTCONFIG_MAGIC_LEN) < 0)
-               return -errno;
+               return pr_errno("Failed to read", -errno);
+
        /* Check the bootconfig magic bytes */
        if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0)
                return 0;
 
-       if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) {
-               pr_err("Failed to lseek: %d\n", -errno);
-               return -errno;
-       }
+       if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0)
+               return pr_errno("Failed to lseek for size", -errno);
 
        if (read(fd, &size, sizeof(u32)) < 0)
-               return -errno;
+               return pr_errno("Failed to read size", -errno);
+       size = le32toh(size);
 
        if (read(fd, &csum, sizeof(u32)) < 0)
-               return -errno;
+               return pr_errno("Failed to read checksum", -errno);
+       csum = le32toh(csum);
 
        /* Wrong size error  */
        if (stat.st_size < size + 8 + BOOTCONFIG_MAGIC_LEN) {
@@ -190,10 +197,8 @@ static int load_xbc_from_initrd(int fd, char **buf)
        }
 
        if (lseek(fd, stat.st_size - (size + 8 + BOOTCONFIG_MAGIC_LEN),
-                 SEEK_SET) < 0) {
-               pr_err("Failed to lseek: %d\n", -errno);
-               return -errno;
-       }
+                 SEEK_SET) < 0)
+               return pr_errno("Failed to lseek", -errno);
 
        ret = load_xbc_fd(fd, buf, size);
        if (ret < 0)
@@ -262,14 +267,16 @@ static int show_xbc(const char *path, bool list)
 
        ret = stat(path, &st);
        if (ret < 0) {
-               pr_err("Failed to stat %s: %d\n", path, -errno);
-               return -errno;
+               ret = -errno;
+               pr_err("Failed to stat %s: %d\n", path, ret);
+               return ret;
        }
 
        fd = open(path, O_RDONLY);
        if (fd < 0) {
-               pr_err("Failed to open initrd %s: %d\n", path, fd);
-               return -errno;
+               ret = -errno;
+               pr_err("Failed to open initrd %s: %d\n", path, ret);
+               return ret;
        }
 
        ret = load_xbc_from_initrd(fd, &buf);
@@ -307,8 +314,9 @@ static int delete_xbc(const char *path)
 
        fd = open(path, O_RDWR);
        if (fd < 0) {
-               pr_err("Failed to open initrd %s: %d\n", path, fd);
-               return -errno;
+               ret = -errno;
+               pr_err("Failed to open initrd %s: %d\n", path, ret);
+               return ret;
        }
 
        size = load_xbc_from_initrd(fd, &buf);
@@ -332,11 +340,13 @@ static int delete_xbc(const char *path)
 
 static int apply_xbc(const char *path, const char *xbc_path)
 {
+       char *buf, *data, *p;
+       size_t total_size;
+       struct stat stat;
+       const char *msg;
        u32 size, csum;
-       char *buf, *data;
+       int pos, pad;
        int ret, fd;
-       const char *msg;
-       int pos;
 
        ret = load_xbc_file(xbc_path, &buf);
        if (ret < 0) {
@@ -346,13 +356,12 @@ static int apply_xbc(const char *path, const char *xbc_path)
        size = strlen(buf) + 1;
        csum = checksum((unsigned char *)buf, size);
 
-       /* Prepare xbc_path data */
-       data = malloc(size + 8);
+       /* Backup the bootconfig data */
+       data = calloc(size + BOOTCONFIG_ALIGN +
+                     sizeof(u32) + sizeof(u32) + BOOTCONFIG_MAGIC_LEN, 1);
        if (!data)
                return -ENOMEM;
-       strcpy(data, buf);
-       *(u32 *)(data + size) = size;
-       *(u32 *)(data + size + 4) = csum;
+       memcpy(data, buf, size);
 
        /* Check the data format */
        ret = xbc_init(buf, &msg, &pos);
@@ -383,28 +392,61 @@ static int apply_xbc(const char *path, const char *xbc_path)
        /* Apply new one */
        fd = open(path, O_RDWR | O_APPEND);
        if (fd < 0) {
-               pr_err("Failed to open %s: %d\n", path, fd);
+               ret = -errno;
+               pr_err("Failed to open %s: %d\n", path, ret);
                free(data);
-               return fd;
+               return ret;
        }
        /* TODO: Ensure the @path is initramfs/initrd image */
-       ret = write(fd, data, size + 8);
-       if (ret < 0) {
-               pr_err("Failed to apply a boot config: %d\n", ret);
+       if (fstat(fd, &stat) < 0) {
+               pr_err("Failed to get the size of %s\n", path);
                goto out;
        }
-       /* Write a magic word of the bootconfig */
-       ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
-       if (ret < 0) {
-               pr_err("Failed to apply a boot config magic: %d\n", ret);
-               goto out;
-       }
-       ret = 0;
+
+       /* To align up the total size to BOOTCONFIG_ALIGN, get padding size */
+       total_size = stat.st_size + size + sizeof(u32) * 2 + BOOTCONFIG_MAGIC_LEN;
+       pad = ((total_size + BOOTCONFIG_ALIGN - 1) & (~BOOTCONFIG_ALIGN_MASK)) - total_size;
+       size += pad;
+
+       /* Add a footer */
+       p = data + size;
+       *(u32 *)p = htole32(size);
+       p += sizeof(u32);
+
+       *(u32 *)p = htole32(csum);
+       p += sizeof(u32);
+
+       memcpy(p, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
+       p += BOOTCONFIG_MAGIC_LEN;
+
+       total_size = p - data;
+
+       ret = write(fd, data, total_size);
+       if (ret < total_size) {
+               if (ret < 0)
+                       ret = -errno;
+               pr_err("Failed to apply a boot config: %d\n", ret);
+               if (ret >= 0)
+                       goto out_rollback;
+       } else
+               ret = 0;
+
 out:
        close(fd);
        free(data);
 
        return ret;
+
+out_rollback:
+       /* Map the partial write to -ENOSPC */
+       if (ret >= 0)
+               ret = -ENOSPC;
+       if (ftruncate(fd, stat.st_size) < 0) {
+               ret = -errno;
+               pr_err("Failed to rollback the write error: %d\n", ret);
+               pr_err("The initrd %s may be corrupted. Recommend to rebuild.\n", path);
+       }
+       goto out;
 }
 
 static int usage(void)
index d295e40..baed891 100755 (executable)
@@ -9,6 +9,7 @@ else
   TESTDIR=.
 fi
 BOOTCONF=${TESTDIR}/bootconfig
+ALIGN=4
 
 INITRD=`mktemp ${TESTDIR}/initrd-XXXX`
 TEMPCONF=`mktemp ${TESTDIR}/temp-XXXX.bconf`
@@ -59,7 +60,10 @@ echo "Show command test"
 xpass $BOOTCONF $INITRD
 
 echo "File size check"
-xpass test $new_size -eq $(expr $bconf_size + $initrd_size + 9 + 12)
+total_size=$(expr $bconf_size + $initrd_size + 9 + 12 + $ALIGN - 1 )
+total_size=$(expr $total_size / $ALIGN)
+total_size=$(expr $total_size \* $ALIGN)
+xpass test $new_size -eq $total_size
 
 echo "Apply command repeat test"
 xpass $BOOTCONF -a $TEMPCONF $INITRD
index 8ab142f..2afb7d5 100644 (file)
@@ -693,6 +693,7 @@ build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type,
                obj_node = calloc(1, sizeof(*obj_node));
                if (!obj_node) {
                        p_err("failed to allocate memory: %s", strerror(errno));
+                       err = -ENOMEM;
                        goto err_free;
                }
 
index 910e7ba..3fae61e 100644 (file)
@@ -578,8 +578,8 @@ static int do_attach(int argc, char **argv)
 
        ifindex = net_parse_dev(&argc, &argv);
        if (ifindex < 1) {
-               close(progfd);
-               return -EINVAL;
+               err = -EINVAL;
+               goto cleanup;
        }
 
        if (argc) {
@@ -587,8 +587,8 @@ static int do_attach(int argc, char **argv)
                        overwrite = true;
                } else {
                        p_err("expected 'overwrite', got: '%s'?", *argv);
-                       close(progfd);
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto cleanup;
                }
        }
 
@@ -596,17 +596,17 @@ static int do_attach(int argc, char **argv)
        if (is_prefix("xdp", attach_type_strings[attach_type]))
                err = do_attach_detach_xdp(progfd, attach_type, ifindex,
                                           overwrite);
-
-       if (err < 0) {
+       if (err) {
                p_err("interface %s attach failed: %s",
                      attach_type_strings[attach_type], strerror(-err));
-               return err;
+               goto cleanup;
        }
 
        if (json_output)
                jsonw_null(json_wtr);
-
-       return 0;
+cleanup:
+       close(progfd);
+       return err;
 }
 
 static int do_detach(int argc, char **argv)
index df7d8ec..477e55d 100644 (file)
@@ -89,9 +89,9 @@ libbpf_print_none(__maybe_unused enum libbpf_print_level level,
 
 int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
 {
-       char buf[4096];
-       struct pid_iter_bpf *skel;
        struct pid_iter_entry *e;
+       char buf[4096 / sizeof(*e) * sizeof(*e)];
+       struct pid_iter_bpf *skel;
        int err, ret, fd = -1, i;
        libbpf_print_fn_t default_print;
 
index e6ceac3..556216d 100644 (file)
@@ -3897,8 +3897,8 @@ union bpf_attr {
        FN(seq_printf_btf),             \
        FN(skb_cgroup_classid),         \
        FN(redirect_neigh),             \
-       FN(bpf_per_cpu_ptr),            \
-       FN(bpf_this_cpu_ptr),           \
+       FN(per_cpu_ptr),                \
+       FN(this_cpu_ptr),               \
        FN(redirect_peer),              \
        /* */
 
index d199a36..b0bf56c 100755 (executable)
@@ -742,7 +742,11 @@ class DebugfsProvider(Provider):
         The fields are all available KVM debugfs files
 
         """
-        return self.walkdir(PATH_DEBUGFS_KVM)[2]
+        exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns']
+        fields = [field for field in self.walkdir(PATH_DEBUGFS_KVM)[2]
+                  if field not in exempt_list]
+
+        return fields
 
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
index 5f9abed..55bd78b 100644 (file)
@@ -146,6 +146,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
                           awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
                           sort -u | wc -l)
 VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
+                             sed 's/\[.*\]//' | \
                              awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
                              grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
 
@@ -214,6 +215,7 @@ check_abi: $(OUTPUT)libbpf.so
                    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'|  \
                    sort -u > $(OUTPUT)libbpf_global_syms.tmp;           \
                readelf --dyn-syms --wide $(OUTPUT)libbpf.so |           \
+                   sed 's/\[.*\]//' |                                   \
                    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'|  \
                    grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 |             \
                    sort -u > $(OUTPUT)libbpf_versioned_syms.tmp;        \
index 3130341..28baee7 100644 (file)
@@ -560,8 +560,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
                      const char *name, size_t sec_idx, const char *sec_name,
                      size_t sec_off, void *insn_data, size_t insn_data_sz)
 {
-       int i;
-
        if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
                pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
                        sec_name, name, sec_off, insn_data_sz);
@@ -600,13 +598,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
                goto errout;
        memcpy(prog->insns, insn_data, insn_data_sz);
 
-       for (i = 0; i < prog->insns_cnt; i++) {
-               if (insn_is_subprog_call(&prog->insns[i])) {
-                       obj->has_subcalls = true;
-                       break;
-               }
-       }
-
        return 0;
 errout:
        pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
@@ -3280,7 +3271,19 @@ bpf_object__find_program_by_title(const struct bpf_object *obj,
 static bool prog_is_subprog(const struct bpf_object *obj,
                            const struct bpf_program *prog)
 {
-       return prog->sec_idx == obj->efile.text_shndx && obj->has_subcalls;
+       /* For legacy reasons, libbpf supports an entry-point BPF programs
+        * without SEC() attribute, i.e., those in the .text section. But if
+        * there are 2 or more such programs in the .text section, they all
+        * must be subprograms called from entry-point BPF programs in
+        * designated SEC()'tions, otherwise there is no way to distinguish
+        * which of those programs should be loaded vs which are a subprogram.
+        * Similarly, if there is a function/program in .text and at least one
+        * other BPF program with custom SEC() attribute, then we just assume
+        * .text programs are subprograms (even if they are not called from
+        * other programs), because libbpf never explicitly supported mixing
+        * SEC()-designated BPF programs and .text entry-point BPF programs.
+        */
+       return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
 }
 
 struct bpf_program *
index 5c6522c..98537ff 100644 (file)
@@ -278,7 +278,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
                err = ringbuf_process_ring(ring);
                if (err < 0)
                        return err;
-               res += cnt;
+               res += err;
        }
        return cnt < 0 ? -errno : res;
 }
index 4e40402..478078f 100644 (file)
@@ -38,6 +38,13 @@ static int sample_ustack(struct perf_sample *sample,
        stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
 
        memcpy(buf, (void *) sp, stack_size);
+#ifdef MEMORY_SANITIZER
+       /*
+        * Copying the stack may copy msan poison, avoid false positives in the
+        * unwinder by removing the poison here.
+        */
+       __msan_unpoison(buf, stack_size);
+#endif
        stack->data = (char *) buf;
        stack->size = stack_size;
        return 0;
index 9ad015a..6eb45a2 100644 (file)
@@ -2,6 +2,9 @@
 
 /* Various wrappers to make the kernel .S file build in user-space: */
 
+// memcpy_orig and memcpy_erms are being defined as SYM_L_LOCAL but we need it
+#define SYM_FUNC_START_LOCAL(name)                      \
+        SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
 #define memcpy MEMCPY /* don't hide glibc's memcpy() */
 #define altinstr_replacement text
 #define globl p2align 4; .globl
index d550bd5..6f093c4 100644 (file)
@@ -1,4 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+// memset_orig and memset_erms are being defined as SYM_L_LOCAL but we need it
+#define SYM_FUNC_START_LOCAL(name)                      \
+        SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
 #define memset MEMSET /* don't hide glibc's memset() */
 #define altinstr_replacement text
 #define globl p2align 4; .globl
index 584e2e1..cefc715 100644 (file)
@@ -1222,8 +1222,10 @@ static int __cmd_diff(void)
                if (compute == COMPUTE_STREAM) {
                        d->evlist_streams = evlist__create_streams(
                                                d->session->evlist, 5);
-                       if (!d->evlist_streams)
+                       if (!d->evlist_streams) {
+                               ret = -ENOMEM;
                                goto out_delete;
+                       }
                }
        }
 
index 452a75f..0462dc8 100644 (file)
@@ -779,25 +779,15 @@ static int __cmd_inject(struct perf_inject *inject)
                        dsos__hit_all(session);
                /*
                 * The AUX areas have been removed and replaced with
-                * synthesized hardware events, so clear the feature flag and
-                * remove the evsel.
+                * synthesized hardware events, so clear the feature flag.
                 */
                if (inject->itrace_synth_opts.set) {
-                       struct evsel *evsel;
-
                        perf_header__clear_feat(&session->header,
                                                HEADER_AUXTRACE);
                        if (inject->itrace_synth_opts.last_branch ||
                            inject->itrace_synth_opts.add_last_branch)
                                perf_header__set_feat(&session->header,
                                                      HEADER_BRANCH_STACK);
-                       evsel = perf_evlist__id2evsel_strict(session->evlist,
-                                                            inject->aux_id);
-                       if (evsel) {
-                               pr_debug("Deleting %s\n", evsel__name(evsel));
-                               evlist__remove(session->evlist, evsel);
-                               evsel__delete(evsel);
-                       }
                }
                session->header.data_offset = output_data_offset;
                session->header.data_size = inject->bytes_written;
index f0a1dba..a2f1e53 100644 (file)
@@ -406,7 +406,7 @@ static int report_lock_acquire_event(struct evsel *evsel,
        struct lock_seq_stat *seq;
        const char *name = evsel__strval(evsel, sample, "name");
        u64 tmp  = evsel__intval(evsel, sample, "lockdep_addr");
-       int flag = evsel__intval(evsel, sample, "flag");
+       int flag = evsel__intval(evsel, sample, "flags");
 
        memcpy(&addr, &tmp, sizeof(void *));
 
@@ -621,7 +621,7 @@ static int report_lock_release_event(struct evsel *evsel,
        case SEQ_STATE_READ_ACQUIRED:
                seq->read_count--;
                BUG_ON(seq->read_count < 0);
-               if (!seq->read_count) {
+               if (seq->read_count) {
                        ls->nr_release++;
                        goto end;
                }
index 8d84fdb..18fde2f 100755 (executable)
@@ -44,7 +44,7 @@ perf_script_branch_samples() {
        #   touch  6512          1         branches:u:      ffffb22082e0 strcmp+0xa0 (/lib/aarch64-linux-gnu/ld-2.27.so)
        #   touch  6512          1         branches:u:      ffffb2208320 strcmp+0xe0 (/lib/aarch64-linux-gnu/ld-2.27.so)
        perf script -F,-time -i ${perfdata} | \
-               egrep " +$1 +[0-9]+ .* +branches:([u|k]:)? +"
+               egrep " +$1 +[0-9]+ .* +branches:(.*:)? +"
 }
 
 perf_report_branch_samples() {
@@ -105,7 +105,7 @@ arm_cs_iterate_devices() {
                #     `> device_name = 'tmc_etf0'
                device_name=$(basename $path)
 
-               if is_device_sink $path $devce_name; then
+               if is_device_sink $path $device_name; then
 
                        record_touch_file $device_name $2 &&
                        perf_script_branch_samples touch &&
index aa89801..7b2d471 100644 (file)
@@ -356,9 +356,25 @@ bool die_is_signed_type(Dwarf_Die *tp_die)
 bool die_is_func_def(Dwarf_Die *dw_die)
 {
        Dwarf_Attribute attr;
+       Dwarf_Addr addr = 0;
+
+       if (dwarf_tag(dw_die) != DW_TAG_subprogram)
+               return false;
+
+       if (dwarf_attr(dw_die, DW_AT_declaration, &attr))
+               return false;
 
-       return (dwarf_tag(dw_die) == DW_TAG_subprogram &&
-               dwarf_attr(dw_die, DW_AT_declaration, &attr) == NULL);
+       /*
+        * DW_AT_declaration can be lost from function declaration
+        * by gcc's bug #97060.
+        * So we need to check this subprogram DIE has DW_AT_inline
+        * or an entry address.
+        */
+       if (!dwarf_attr(dw_die, DW_AT_inline, &attr) &&
+           die_entrypc(dw_die, &addr) < 0)
+               return false;
+
+       return true;
 }
 
 /**
@@ -373,6 +389,7 @@ bool die_is_func_def(Dwarf_Die *dw_die)
 int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr)
 {
        Dwarf_Addr base, end;
+       Dwarf_Attribute attr;
 
        if (!addr)
                return -EINVAL;
@@ -380,6 +397,13 @@ int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr)
        if (dwarf_entrypc(dw_die, addr) == 0)
                return 0;
 
+       /*
+        *  Since the dwarf_ranges() will return 0 if there is no
+        * DW_AT_ranges attribute, we should check it first.
+        */
+       if (!dwarf_attr(dw_die, DW_AT_ranges, &attr))
+               return -ENOENT;
+
        return dwarf_ranges(dw_die, 0, &base, addr, &end) < 0 ? -ENOENT : 0;
 }
 
index d9b385f..10a4c4c 100644 (file)
@@ -15,6 +15,9 @@
 static inline size_t hash_bits(size_t h, int bits)
 {
        /* shuffle bits and return requested number of upper bits */
+       if (bits == 0)
+               return 0;
+
 #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)
        /* LP64 case */
        return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);
@@ -174,17 +177,17 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value);
  * @key: key to iterate entries for
  */
 #define hashmap__for_each_key_entry(map, cur, _key)                        \
-       for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
-                                            map->cap_bits);                \
-                    map->buckets ? map->buckets[bkt] : NULL; });           \
+       for (cur = map->buckets                                             \
+                    ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+                    : NULL;                                                \
             cur;                                                           \
             cur = cur->next)                                               \
                if (map->equal_fn(cur->key, (_key), map->ctx))
 
 #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key)              \
-       for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
-                                            map->cap_bits);                \
-                    cur = map->buckets ? map->buckets[bkt] : NULL; });     \
+       for (cur = map->buckets                                             \
+                    ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+                    : NULL;                                                \
             cur && ({ tmp = cur->next; true; });                           \
             cur = tmp)                                                     \
                if (map->equal_fn(cur->key, (_key), map->ctx))
index b8a5159..5acf053 100644 (file)
@@ -25,6 +25,7 @@
 
 /* SYM_L_* -- linkage of symbols */
 #define SYM_L_GLOBAL(name)                     .globl name
+#define SYM_L_WEAK(name)                       .weak name
 #define SYM_L_LOCAL(name)                      /* nothing */
 
 #define ALIGN __ALIGN
        SYM_END(name, SYM_T_FUNC)
 #endif
 
+/* SYM_FUNC_START_WEAK -- use for weak functions */
+#ifndef SYM_FUNC_START_WEAK
+#define SYM_FUNC_START_WEAK(name)                      \
+       SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN)
+#endif
+
 /*
  * SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START,
  * SYM_FUNC_START_WEAK, ...
index 2c40610..76dd349 100644 (file)
@@ -1885,8 +1885,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
        if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die)))
                return DWARF_CB_OK;
 
-       if (die_is_func_def(sp_die) &&
-           die_match_name(sp_die, lr->function)) {
+       if (die_match_name(sp_die, lr->function) && die_is_func_def(sp_die)) {
                lf->fname = dwarf_decl_file(sp_die);
                dwarf_decl_line(sp_die, &lr->offset);
                pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset);
index 4b57c0c..a963b5b 100644 (file)
@@ -324,13 +324,10 @@ static int first_shadow_cpu(struct perf_stat_config *config,
        struct evlist *evlist = evsel->evlist;
        int i;
 
-       if (!config->aggr_get_id)
-               return 0;
-
        if (config->aggr_mode == AGGR_NONE)
                return id;
 
-       if (config->aggr_mode == AGGR_GLOBAL)
+       if (!config->aggr_get_id)
                return 0;
 
        for (i = 0; i < evsel__nr_cpus(evsel); i++) {
index 8a23391..d9c6243 100644 (file)
@@ -563,6 +563,9 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool,
        char cgrp_root[PATH_MAX];
        size_t mount_len;  /* length of mount point in the path */
 
+       if (!tool || !tool->cgroup_events)
+               return 0;
+
        if (cgroupfs_find_mountpoint(cgrp_root, PATH_MAX, "perf_event") < 0) {
                pr_debug("cannot find cgroup mount point\n");
                return -1;
index cb16d2a..54188ee 100755 (executable)
@@ -2040,7 +2040,7 @@ sub reboot_to {
 
     if ($reboot_type eq "grub") {
        run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'";
-    } elsif ($reboot_type eq "grub2") {
+    } elsif (($reboot_type eq "grub2") or ($reboot_type eq "grub2bls")) {
        run_ssh "$grub_reboot $grub_number";
     } elsif ($reboot_type eq "syslinux") {
        run_ssh "$syslinux --once \\\"$syslinux_label\\\" $syslinux_path";
diff --git a/tools/testing/kunit/.gitattributes b/tools/testing/kunit/.gitattributes
deleted file mode 100644 (file)
index 5b7da1f..0000000
+++ /dev/null
@@ -1 +0,0 @@
-test_data/* binary
index ebf5f57..d4f7846 100755 (executable)
@@ -11,7 +11,6 @@ import argparse
 import sys
 import os
 import time
-import shutil
 
 from collections import namedtuple
 from enum import Enum, auto
@@ -44,11 +43,6 @@ class KunitStatus(Enum):
        BUILD_FAILURE = auto()
        TEST_FAILURE = auto()
 
-def create_default_kunitconfig():
-       if not os.path.exists(kunit_kernel.kunitconfig_path):
-               shutil.copyfile('arch/um/configs/kunit_defconfig',
-                               kunit_kernel.kunitconfig_path)
-
 def get_kernel_root_path():
        parts = sys.argv[0] if not __file__ else __file__
        parts = os.path.realpath(parts).split('tools/testing/kunit')
@@ -61,7 +55,6 @@ def config_tests(linux: kunit_kernel.LinuxSourceTree,
        kunit_parser.print_with_timestamp('Configuring KUnit Kernel ...')
 
        config_start = time.time()
-       create_default_kunitconfig()
        success = linux.build_reconfig(request.build_dir, request.make_options)
        config_end = time.time()
        if not success:
@@ -262,12 +255,12 @@ def main(argv, linux=None):
                if not os.path.exists(cli_args.build_dir):
                        os.mkdir(cli_args.build_dir)
 
-               if not os.path.exists(kunit_kernel.kunitconfig_path):
-                       create_default_kunitconfig()
-
                if not linux:
                        linux = kunit_kernel.LinuxSourceTree()
 
+               linux.create_kunitconfig(cli_args.build_dir)
+               linux.read_kunitconfig(cli_args.build_dir)
+
                request = KunitRequest(cli_args.raw_output,
                                       cli_args.timeout,
                                       cli_args.jobs,
@@ -283,12 +276,12 @@ def main(argv, linux=None):
                                not os.path.exists(cli_args.build_dir)):
                        os.mkdir(cli_args.build_dir)
 
-               if not os.path.exists(kunit_kernel.kunitconfig_path):
-                       create_default_kunitconfig()
-
                if not linux:
                        linux = kunit_kernel.LinuxSourceTree()
 
+               linux.create_kunitconfig(cli_args.build_dir)
+               linux.read_kunitconfig(cli_args.build_dir)
+
                request = KunitConfigRequest(cli_args.build_dir,
                                             cli_args.make_options)
                result = config_tests(linux, request)
@@ -301,6 +294,9 @@ def main(argv, linux=None):
                if not linux:
                        linux = kunit_kernel.LinuxSourceTree()
 
+               linux.create_kunitconfig(cli_args.build_dir)
+               linux.read_kunitconfig(cli_args.build_dir)
+
                request = KunitBuildRequest(cli_args.jobs,
                                            cli_args.build_dir,
                                            cli_args.alltests,
@@ -315,6 +311,9 @@ def main(argv, linux=None):
                if not linux:
                        linux = kunit_kernel.LinuxSourceTree()
 
+               linux.create_kunitconfig(cli_args.build_dir)
+               linux.read_kunitconfig(cli_args.build_dir)
+
                exec_request = KunitExecRequest(cli_args.timeout,
                                                cli_args.build_dir,
                                                cli_args.alltests)
@@ -337,7 +336,7 @@ def main(argv, linux=None):
                                kunit_output = f.read().splitlines()
                request = KunitParseRequest(cli_args.raw_output,
                                            kunit_output,
-                                           cli_args.build_dir,
+                                           None,
                                            cli_args.json)
                result = parse_tests(request)
                if result.status != KunitStatus.SUCCESS:
index b557b1e..2e3cc0f 100644 (file)
@@ -6,10 +6,10 @@
 # Author: Felix Guo <felixguoxiuping@gmail.com>
 # Author: Brendan Higgins <brendanhiggins@google.com>
 
-
 import logging
 import subprocess
 import os
+import shutil
 import signal
 
 from contextlib import ExitStack
@@ -18,8 +18,10 @@ import kunit_config
 import kunit_parser
 
 KCONFIG_PATH = '.config'
-kunitconfig_path = '.kunitconfig'
+KUNITCONFIG_PATH = '.kunitconfig'
+DEFAULT_KUNITCONFIG_PATH = 'arch/um/configs/kunit_defconfig'
 BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config'
+OUTFILE_PATH = 'test.log'
 
 class ConfigError(Exception):
        """Represents an error trying to configure the Linux kernel."""
@@ -82,36 +84,51 @@ class LinuxSourceTreeOperations(object):
                if build_dir:
                        command += ['O=' + build_dir]
                try:
-                       subprocess.check_output(command, stderr=subprocess.STDOUT)
+                       proc = subprocess.Popen(command,
+                                               stderr=subprocess.PIPE,
+                                               stdout=subprocess.DEVNULL)
                except OSError as e:
-                       raise BuildError('Could not call execute make: ' + str(e))
-               except subprocess.CalledProcessError as e:
-                       raise BuildError(e.output.decode())
-
-       def linux_bin(self, params, timeout, build_dir, outfile):
+                       raise BuildError('Could not call make command: ' + str(e))
+               _, stderr = proc.communicate()
+               if proc.returncode != 0:
+                       raise BuildError(stderr.decode())
+               if stderr:  # likely only due to build warnings
+                       print(stderr.decode())
+
+       def linux_bin(self, params, timeout, build_dir):
                """Runs the Linux UML binary. Must be named 'linux'."""
                linux_bin = './linux'
                if build_dir:
                        linux_bin = os.path.join(build_dir, 'linux')
+               outfile = get_outfile_path(build_dir)
                with open(outfile, 'w') as output:
                        process = subprocess.Popen([linux_bin] + params,
                                                   stdout=output,
                                                   stderr=subprocess.STDOUT)
                        process.wait(timeout)
 
-
 def get_kconfig_path(build_dir):
        kconfig_path = KCONFIG_PATH
        if build_dir:
                kconfig_path = os.path.join(build_dir, KCONFIG_PATH)
        return kconfig_path
 
+def get_kunitconfig_path(build_dir):
+       kunitconfig_path = KUNITCONFIG_PATH
+       if build_dir:
+               kunitconfig_path = os.path.join(build_dir, KUNITCONFIG_PATH)
+       return kunitconfig_path
+
+def get_outfile_path(build_dir):
+       outfile_path = OUTFILE_PATH
+       if build_dir:
+               outfile_path = os.path.join(build_dir, OUTFILE_PATH)
+       return outfile_path
+
 class LinuxSourceTree(object):
        """Represents a Linux kernel source tree with KUnit tests."""
 
        def __init__(self):
-               self._kconfig = kunit_config.Kconfig()
-               self._kconfig.read_from_file(kunitconfig_path)
                self._ops = LinuxSourceTreeOperations()
                signal.signal(signal.SIGINT, self.signal_handler)
 
@@ -123,6 +140,16 @@ class LinuxSourceTree(object):
                        return False
                return True
 
+       def create_kunitconfig(self, build_dir, defconfig=DEFAULT_KUNITCONFIG_PATH):
+               kunitconfig_path = get_kunitconfig_path(build_dir)
+               if not os.path.exists(kunitconfig_path):
+                       shutil.copyfile(defconfig, kunitconfig_path)
+
+       def read_kunitconfig(self, build_dir):
+               kunitconfig_path = get_kunitconfig_path(build_dir)
+               self._kconfig = kunit_config.Kconfig()
+               self._kconfig.read_from_file(kunitconfig_path)
+
        def validate_config(self, build_dir):
                kconfig_path = get_kconfig_path(build_dir)
                validated_kconfig = kunit_config.Kconfig()
@@ -178,8 +205,8 @@ class LinuxSourceTree(object):
 
        def run_kernel(self, args=[], build_dir='', timeout=None):
                args.extend(['mem=1G'])
-               outfile = 'test.log'
-               self._ops.linux_bin(args, timeout, build_dir, outfile)
+               self._ops.linux_bin(args, timeout, build_dir)
+               outfile = get_outfile_path(build_dir)
                subprocess.call(['stty', 'sane'])
                with open(outfile, 'r') as file:
                        for line in file:
index 84a1af2..bbfe1b4 100644 (file)
@@ -12,7 +12,7 @@ from collections import namedtuple
 from datetime import datetime
 from enum import Enum, auto
 from functools import reduce
-from typing import List
+from typing import List, Optional, Tuple
 
 TestResult = namedtuple('TestResult', ['status','suites','log'])
 
@@ -54,6 +54,7 @@ kunit_end_re = re.compile('(List of all partitions:|'
 def isolate_kunit_output(kernel_output):
        started = False
        for line in kernel_output:
+               line = line.rstrip()  # line always has a trailing \n
                if kunit_start_re.search(line):
                        prefix_len = len(line.split('TAP version')[0])
                        started = True
@@ -65,7 +66,7 @@ def isolate_kunit_output(kernel_output):
 
 def raw_output(kernel_output):
        for line in kernel_output:
-               print(line)
+               print(line.rstrip())
 
 DIVIDER = '=' * 60
 
@@ -151,7 +152,7 @@ def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool:
        else:
                return False
 
-def parse_test_case(lines: List[str]) -> TestCase:
+def parse_test_case(lines: List[str]) -> Optional[TestCase]:
        test_case = TestCase()
        save_non_diagnositic(lines, test_case)
        while parse_diagnostic(lines, test_case):
@@ -163,7 +164,7 @@ def parse_test_case(lines: List[str]) -> TestCase:
 
 SUBTEST_HEADER = re.compile(r'^[\s]+# Subtest: (.*)$')
 
-def parse_subtest_header(lines: List[str]) -> str:
+def parse_subtest_header(lines: List[str]) -> Optional[str]:
        consume_non_diagnositic(lines)
        if not lines:
                return None
@@ -176,7 +177,7 @@ def parse_subtest_header(lines: List[str]) -> str:
 
 SUBTEST_PLAN = re.compile(r'[\s]+[0-9]+\.\.([0-9]+)')
 
-def parse_subtest_plan(lines: List[str]) -> int:
+def parse_subtest_plan(lines: List[str]) -> Optional[int]:
        consume_non_diagnositic(lines)
        match = SUBTEST_PLAN.match(lines[0])
        if match:
@@ -230,7 +231,7 @@ def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus:
        max_test_case_status = bubble_up_errors(lambda x: x.status, test_suite.cases)
        return max_status(max_test_case_status, test_suite.status)
 
-def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite:
+def parse_test_suite(lines: List[str], expected_suite_index: int) -> Optional[TestSuite]:
        if not lines:
                return None
        consume_non_diagnositic(lines)
@@ -271,7 +272,7 @@ def parse_tap_header(lines: List[str]) -> bool:
 
 TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)')
 
-def parse_test_plan(lines: List[str]) -> int:
+def parse_test_plan(lines: List[str]) -> Optional[int]:
        consume_non_diagnositic(lines)
        match = TEST_PLAN.match(lines[0])
        if match:
@@ -310,7 +311,7 @@ def parse_test_result(lines: List[str]) -> TestResult:
        else:
                return TestResult(TestStatus.NO_TESTS, [], lines)
 
-def print_and_count_results(test_result: TestResult) -> None:
+def print_and_count_results(test_result: TestResult) -> Tuple[int, int, int]:
        total_tests = 0
        failed_tests = 0
        crashed_tests = 0
index 0b60855..497ab51 100755 (executable)
@@ -102,7 +102,7 @@ class KUnitParserTest(unittest.TestCase):
                        'test_data/test_output_isolated_correctly.log')
                file = open(log_path)
                result = kunit_parser.isolate_kunit_output(file.readlines())
-               self.assertContains('TAP version 14\n', result)
+               self.assertContains('TAP version 14', result)
                self.assertContains('   # Subtest: example', result)
                self.assertContains('   1..2', result)
                self.assertContains('   ok 1 - example_simple_test', result)
@@ -115,7 +115,7 @@ class KUnitParserTest(unittest.TestCase):
                        'test_data/test_pound_sign.log')
                with open(log_path) as file:
                        result = kunit_parser.isolate_kunit_output(file.readlines())
-               self.assertContains('TAP version 14\n', result)
+               self.assertContains('TAP version 14', result)
                self.assertContains('   # Subtest: kunit-resource-test', result)
                self.assertContains('   1..5', result)
                self.assertContains('   ok 1 - kunit_resource_test_init_resources', result)
index 6ae907f..f9a1200 100644 (file)
@@ -33,6 +33,7 @@ typedef unsigned long dma_addr_t;
 #define __ALIGN_KERNEL(x, a)           __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
 #define __ALIGN_KERNEL_MASK(x, mask)   (((x) + (mask)) & ~(mask))
 #define ALIGN(x, a)                    __ALIGN_KERNEL((x), (a))
+#define ALIGN_DOWN(x, a)               __ALIGN_KERNEL((x) - ((a) - 1), (a))
 
 #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
 
index b2c7e9f..f561aed 100644 (file)
@@ -52,9 +52,9 @@ int main(void)
 {
        const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT;
        struct test *test, tests[] = {
-               { -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 },
                { -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 },
-               { -EINVAL, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 },
+               { 0, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 },
+               { 0, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 },
                { 0, 1, pfn(0), PAGE_SIZE, sgmax, 1 },
                { 0, 1, pfn(0), 1, sgmax, 1 },
                { 0, 2, pfn(0, 1), 2 * PAGE_SIZE, sgmax, 1 },
index 5241405..5861446 100644 (file)
@@ -456,10 +456,10 @@ static struct bpf_align_test tests[] = {
                         */
                        {7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
                        /* Checked s>=0 */
-                       {9, "R5=inv(id=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+                       {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
                        /* packet pointer + nonnegative (4n+2) */
-                       {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
-                       {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+                       {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+                       {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
                        /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
                         * We checked the bounds, but it might have been able
                         * to overflow if the packet pointer started in the
@@ -467,7 +467,7 @@ static struct bpf_align_test tests[] = {
                         * So we did not get a 'range' on R6, and the access
                         * attempt will fail.
                         */
-                       {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+                       {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
                }
        },
        {
diff --git a/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c b/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c
new file mode 100644 (file)
index 0000000..e419298
--- /dev/null
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "test_probe_read_user_str.skel.h"
+
+static const char str1[] = "mestring";
+static const char str2[] = "mestringalittlebigger";
+static const char str3[] = "mestringblubblubblubblubblub";
+
+static int test_one_str(struct test_probe_read_user_str *skel, const char *str,
+                       size_t len)
+{
+       int err, duration = 0;
+       char buf[256];
+
+       /* Ensure bytes after string are ones */
+       memset(buf, 1, sizeof(buf));
+       memcpy(buf, str, len);
+
+       /* Give prog our userspace pointer */
+       skel->bss->user_ptr = buf;
+
+       /* Trigger tracepoint */
+       usleep(1);
+
+       /* Did helper fail? */
+       if (CHECK(skel->bss->ret < 0, "prog_ret", "prog returned: %ld\n",
+                 skel->bss->ret))
+               return 1;
+
+       /* Check that string was copied correctly */
+       err = memcmp(skel->bss->buf, str, len);
+       if (CHECK(err, "memcmp", "prog copied wrong string"))
+               return 1;
+
+       /* Now check that no extra trailing bytes were copied */
+       memset(buf, 0, sizeof(buf));
+       err = memcmp(skel->bss->buf + len, buf, sizeof(buf) - len);
+       if (CHECK(err, "memcmp", "trailing bytes were not stripped"))
+               return 1;
+
+       return 0;
+}
+
+void test_probe_read_user_str(void)
+{
+       struct test_probe_read_user_str *skel;
+       int err, duration = 0;
+
+       skel = test_probe_read_user_str__open_and_load();
+       if (CHECK(!skel, "test_probe_read_user_str__open_and_load",
+                 "skeleton open and load failed\n"))
+               return;
+
+       /* Give pid to bpf prog so it doesn't read from anyone else */
+       skel->bss->pid = getpid();
+
+       err = test_probe_read_user_str__attach(skel);
+       if (CHECK(err, "test_probe_read_user_str__attach",
+                 "skeleton attach failed: %d\n", err))
+               goto out;
+
+       if (test_one_str(skel, str1, sizeof(str1)))
+               goto out;
+       if (test_one_str(skel, str2, sizeof(str2)))
+               goto out;
+       if (test_one_str(skel, str3, sizeof(str3)))
+               goto out;
+
+out:
+       test_probe_read_user_str__destroy(skel);
+}
index c165054..fddbc5d 100644 (file)
@@ -217,9 +217,15 @@ void test_ringbuf(void)
        if (CHECK(err, "join_bg", "err %d\n", err))
                goto cleanup;
 
-       if (CHECK(bg_ret != 1, "bg_ret", "epoll_wait result: %ld", bg_ret))
+       if (CHECK(bg_ret <= 0, "bg_ret", "epoll_wait result: %ld", bg_ret))
                goto cleanup;
 
+       /* due to timing variations, there could still be non-notified
+        * samples, so consume them here to collect all the samples
+        */
+       err = ring_buffer__consume(ringbuf);
+       CHECK(err < 0, "rb_consume", "failed: %d\b", err);
+
        /* 3 rounds, 2 samples each */
        cnt = atomic_xchg(&sample_cnt, 0);
        CHECK(cnt != 6, "cnt", "exp %d samples, got %d\n", 6, cnt);
index 78e4506..d37161e 100644 (file)
@@ -81,7 +81,7 @@ void test_ringbuf_multi(void)
 
        /* poll for samples, should get 2 ringbufs back */
        err = ring_buffer__poll(ringbuf, -1);
-       if (CHECK(err != 4, "poll_res", "expected 4 records, got %d\n", err))
+       if (CHECK(err != 2, "poll_res", "expected 2 records, got %d\n", err))
                goto cleanup;
 
        /* expect extra polling to return nothing */
index 29188d6..51fac97 100644 (file)
@@ -138,7 +138,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
         */
 
        buf = 0x40;
-       if (setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1) < 0) {
+       err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+       if (err < 0) {
                log_err("Failed to call setsockopt(IP_TOS)");
                goto detach;
        }
index a00abf5..3f3d2ac 100644 (file)
@@ -3,12 +3,14 @@
 #include <test_progs.h>
 #include <time.h>
 #include "test_subprogs.skel.h"
+#include "test_subprogs_unused.skel.h"
 
 static int duration;
 
 void test_subprogs(void)
 {
        struct test_subprogs *skel;
+       struct test_subprogs_unused *skel2;
        int err;
 
        skel = test_subprogs__open_and_load();
@@ -26,6 +28,10 @@ void test_subprogs(void)
        CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19);
        CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36);
 
+       skel2 = test_subprogs_unused__open_and_load();
+       ASSERT_OK_PTR(skel2, "unused_progs_skel");
+       test_subprogs_unused__destroy(skel2);
+
 cleanup:
        test_subprogs__destroy(skel);
 }
index 193002b..32e4348 100644 (file)
@@ -60,6 +60,7 @@ void test_test_global_funcs(void)
                { "test_global_func5.o" , "expected pointer to ctx, but got PTR" },
                { "test_global_func6.o" , "modified ctx ptr R2" },
                { "test_global_func7.o" , "foo() doesn't return scalar" },
+               { "test_global_func8.o" },
        };
        libbpf_print_fn_t old_print_fn = NULL;
        int err, i, duration = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func8.c b/tools/testing/selftests/bpf/progs/test_global_func8.c
new file mode 100644 (file)
index 0000000..d55a654
--- /dev/null
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__noinline int foo(struct __sk_buff *skb)
+{
+       return bpf_get_prandom_u32();
+}
+
+SEC("cgroup_skb/ingress")
+int test_cls(struct __sk_buff *skb)
+{
+       if (!foo(skb))
+               return 0;
+
+       return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c b/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c
new file mode 100644 (file)
index 0000000..3ae398b
--- /dev/null
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include <sys/types.h>
+
+pid_t pid = 0;
+long ret = 0;
+void *user_ptr = 0;
+char buf[256] = {};
+
+SEC("tracepoint/syscalls/sys_enter_nanosleep")
+int on_write(void *ctx)
+{
+       if (pid != (bpf_get_current_pid_tgid() >> 32))
+               return 0;
+
+       ret = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr);
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c
new file mode 100644 (file)
index 0000000..bc49e05
--- /dev/null
@@ -0,0 +1,21 @@
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+const char LICENSE[] SEC("license") = "GPL";
+
+__attribute__((unused)) __noinline int unused1(int x)
+{
+       return x + 1;
+}
+
+static __attribute__((unused)) __noinline int unused2(int x)
+{
+       return x + 2;
+}
+
+SEC("raw_tp/sys_enter")
+int main_prog(void *ctx)
+{
+       return 0;
+}
index 43c9cda..b99bb8e 100755 (executable)
@@ -184,9 +184,7 @@ def bpftool_prog_list(expected=None, ns=""):
 def bpftool_map_list(expected=None, ns=""):
     _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
     # Remove the base maps
-    for m in base_maps:
-        if m in maps:
-            maps.remove(m)
+    maps = [m for m in maps if m not in base_maps and m.get('name') not in base_map_names]
     if expected is not None:
         if len(maps) != expected:
             fail(True, "%d BPF maps loaded, expected %d" %
@@ -716,13 +714,11 @@ def test_multi_prog(simdev, sim, obj, modename, modeid):
     fail(ret == 0, "Replaced one of programs without -force")
     check_extack(err, "XDP program already attached.", args)
 
-    if modename == "" or modename == "drv":
-        othermode = "" if modename == "drv" else "drv"
-        start_test("Test multi-attachment XDP - detach...")
-        ret, _, err = sim.unset_xdp(othermode, force=True,
-                                    fail=False, include_stderr=True)
-        fail(ret == 0, "Removed program with a bad mode")
-        check_extack(err, "program loaded with different flags.", args)
+    start_test("Test multi-attachment XDP - remove without mode...")
+    ret, _, err = sim.unset_xdp("", force=True,
+                                fail=False, include_stderr=True)
+    fail(ret == 0, "Removed program without a mode flag")
+    check_extack(err, "More than one program loaded, unset mode is ambiguous.", args)
 
     sim.unset_xdp("offload")
     xdp = sim.ip_link_show(xdp=True)["xdp"]
@@ -772,6 +768,9 @@ ret, progs = bpftool("prog", fail=False)
 skip(ret != 0, "bpftool not installed")
 base_progs = progs
 _, base_maps = bpftool("map")
+base_map_names = [
+    'pid_iter.rodata' # created on each bpftool invocation
+]
 
 # Check netdevsim
 ret, out = cmd("modprobe netdevsim", fail=False)
@@ -913,11 +912,18 @@ try:
 
     sim.tc_flush_filters()
 
+    start_test("Test TC offloads failure...")
+    sim.dfs["dev/bpf_bind_verifier_accept"] = 0
+    ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
+                                         fail=False, include_stderr=True)
+    fail(ret == 0, "TC filter did not reject with TC offloads enabled")
+    check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
+    sim.dfs["dev/bpf_bind_verifier_accept"] = 1
+
     start_test("Test TC offloads work...")
     ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
                                          fail=False, include_stderr=True)
     fail(ret != 0, "TC filter did not load with TC offloads enabled")
-    check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
 
     start_test("Test TC offload basics...")
     dfs = simdev.dfs_get_bound_progs(expected=1)
@@ -941,6 +947,7 @@ try:
     start_test("Test disabling TC offloads is rejected while filters installed...")
     ret, _ = sim.set_ethtool_tc_offloads(False, fail=False)
     fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...")
+    sim.set_ethtool_tc_offloads(True)
 
     start_test("Test qdisc removal frees things...")
     sim.tc_flush_filters()
@@ -999,18 +1006,8 @@ try:
                               fail=False, include_stderr=True)
     fail(ret == 0, "Replaced XDP program with a program in different mode")
     check_extack(err,
-                 "native and generic XDP can't be active at the same time.",
+                 "Native and generic XDP can't be active at the same time.",
                  args)
-    ret, _, err = sim.set_xdp(obj, "", force=True,
-                              fail=False, include_stderr=True)
-    fail(ret == 0, "Replaced XDP program with a program in different mode")
-    check_extack(err, "program loaded with different flags.", args)
-
-    start_test("Test XDP prog remove with bad flags...")
-    ret, _, err = sim.unset_xdp("", force=True,
-                                fail=False, include_stderr=True)
-    fail(ret == 0, "Removed program with a bad mode")
-    check_extack(err, "program loaded with different flags.", args)
 
     start_test("Test MTU restrictions...")
     ret, _ = sim.set_mtu(9000, fail=False)
@@ -1040,10 +1037,19 @@ try:
     offload = bpf_pinned("/sys/fs/bpf/offload")
     ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True)
     fail(ret == 0, "attached offloaded XDP program to drv")
-    check_extack(err, "using device-bound program without HW_MODE flag is not supported.", args)
+    check_extack(err, "Using device-bound program without HW_MODE flag is not supported.", args)
     rm("/sys/fs/bpf/offload")
     sim.wait_for_flush()
 
+    start_test("Test XDP load failure...")
+    sim.dfs["dev/bpf_bind_verifier_accept"] = 0
+    ret, _, err = bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/offload",
+                                 dev=sim['ifname'], fail=False, include_stderr=True)
+    fail(ret == 0, "verifier should fail on load")
+    check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
+    sim.dfs["dev/bpf_bind_verifier_accept"] = 1
+    sim.wait_for_flush()
+
     start_test("Test XDP offload...")
     _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True)
     ipl = sim.ip_link_show(xdp=True)
@@ -1051,7 +1057,6 @@ try:
     progs = bpftool_prog_list(expected=1)
     prog = progs[0]
     fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID")
-    check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
 
     start_test("Test XDP offload is device bound...")
     dfs = simdev.dfs_get_bound_progs(expected=1)
index 1c4b193..bed53b5 100644 (file)
@@ -68,7 +68,7 @@
        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
        BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
-       BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1),
+       BPF_JMP32_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1),
        BPF_MOV32_IMM(BPF_REG_1, 0),
        BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
        BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
index dac40de..57ed67b 100644 (file)
        .fixup_map_hash_8b = { 3 },
        .result = ACCEPT,
 },
+{
+       "bounds checks after 32-bit truncation. test 1",
+       .insns = {
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+       BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+       /* This used to reduce the max bound to 0x7fffffff */
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+       BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0x7fffffff, 1),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_hash_8b = { 3 },
+       .errstr_unpriv = "R0 leaks addr",
+       .result_unpriv = REJECT,
+       .result = ACCEPT,
+},
+{
+       "bounds checks after 32-bit truncation. test 2",
+       .insns = {
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+       BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+       BPF_JMP_IMM(BPF_JSLT, BPF_REG_1, 1, 1),
+       BPF_JMP32_IMM(BPF_JSLT, BPF_REG_1, 0, 1),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_hash_8b = { 3 },
+       .errstr_unpriv = "R0 leaks addr",
+       .result_unpriv = REJECT,
+       .result = ACCEPT,
+},
index 7a2c242..ce8f4ad 100644 (file)
 /x86_64/set_sregs_test
 /x86_64/smm_test
 /x86_64/state_test
-/x86_64/user_msr_test
-/x86_64/vmx_preemption_timer_test
 /x86_64/svm_vmcall_test
 /x86_64/sync_regs_test
+/x86_64/tsc_msrs_test
+/x86_64/userspace_msr_exit_test
 /x86_64/vmx_apic_access_test
 /x86_64/vmx_close_while_nested_test
 /x86_64/vmx_dirty_log_test
+/x86_64/vmx_preemption_timer_test
 /x86_64/vmx_set_nested_state_test
 /x86_64/vmx_tsc_adjust_test
 /x86_64/xss_msr_test
-/clear_dirty_log_test
 /demand_paging_test
 /dirty_log_test
 /dirty_log_perf_test
index 3d14ef7..fe41c6a 100644 (file)
@@ -33,10 +33,10 @@ ifeq ($(ARCH),s390)
        UNAME_M := s390x
 endif
 
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
 LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
 LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
-LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
+LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
 
 TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
 TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
@@ -50,6 +50,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
 TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
 TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
+TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
@@ -58,7 +59,6 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
 TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
 TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
 TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
@@ -70,6 +70,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
+TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
 TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_aarch64 += set_memory_region_test
 TEST_GEN_PROGS_aarch64 += steal_time
index 3d96a7b..cdad1ec 100644 (file)
@@ -7,23 +7,20 @@
  * Copyright (C) 2019, Google, Inc.
  */
 
-#define _GNU_SOURCE /* for program_invocation_name */
+#define _GNU_SOURCE /* for pipe2 */
 
 #include <stdio.h>
 #include <stdlib.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-#include <asm/unistd.h>
 #include <time.h>
 #include <poll.h>
 #include <pthread.h>
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
 #include <linux/userfaultfd.h>
+#include <sys/syscall.h>
 
-#include "perf_test_util.h"
-#include "processor.h"
+#include "kvm_util.h"
 #include "test_util.h"
+#include "perf_test_util.h"
+#include "guest_modes.h"
 
 #ifdef __NR_userfaultfd
 
 #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
 #endif
 
+static int nr_vcpus = 1;
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
 static char *guest_data_prototype;
 
 static void *vcpu_worker(void *data)
 {
        int ret;
-       struct vcpu_args *vcpu_args = (struct vcpu_args *)data;
+       struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
        int vcpu_id = vcpu_args->vcpu_id;
        struct kvm_vm *vm = perf_test_args.vm;
        struct kvm_run *run;
@@ -248,9 +247,14 @@ static int setup_demand_paging(struct kvm_vm *vm,
        return 0;
 }
 
-static void run_test(enum vm_guest_mode mode, bool use_uffd,
-                    useconds_t uffd_delay)
+struct test_params {
+       bool use_uffd;
+       useconds_t uffd_delay;
+};
+
+static void run_test(enum vm_guest_mode mode, void *arg)
 {
+       struct test_params *p = arg;
        pthread_t *vcpu_threads;
        pthread_t *uffd_handler_threads = NULL;
        struct uffd_handler_args *uffd_args = NULL;
@@ -261,7 +265,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
        int vcpu_id;
        int r;
 
-       vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
+       vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size);
 
        perf_test_args.wr_fract = 1;
 
@@ -273,9 +277,9 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
        vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
        TEST_ASSERT(vcpu_threads, "Memory allocation failed");
 
-       add_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
+       perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
 
-       if (use_uffd) {
+       if (p->use_uffd) {
                uffd_handler_threads =
                        malloc(nr_vcpus * sizeof(*uffd_handler_threads));
                TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
@@ -308,7 +312,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
                        r = setup_demand_paging(vm,
                                                &uffd_handler_threads[vcpu_id],
                                                pipefds[vcpu_id * 2],
-                                               uffd_delay, &uffd_args[vcpu_id],
+                                               p->uffd_delay, &uffd_args[vcpu_id],
                                                vcpu_hva, guest_percpu_mem_size);
                        if (r < 0)
                                exit(-r);
@@ -339,7 +343,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 
        pr_info("All vCPU threads joined\n");
 
-       if (use_uffd) {
+       if (p->use_uffd) {
                char c;
 
                /* Tell the user fault fd handler threads to quit */
@@ -357,43 +361,23 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
                perf_test_args.vcpu_args[0].pages * nr_vcpus /
                ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
 
-       ucall_uninit(vm);
-       kvm_vm_free(vm);
+       perf_test_destroy_vm(vm);
 
        free(guest_data_prototype);
        free(vcpu_threads);
-       if (use_uffd) {
+       if (p->use_uffd) {
                free(uffd_handler_threads);
                free(uffd_args);
                free(pipefds);
        }
 }
 
-struct guest_mode {
-       bool supported;
-       bool enabled;
-};
-static struct guest_mode guest_modes[NUM_VM_MODES];
-
-#define guest_mode_init(mode, supported, enabled) ({ \
-       guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
-})
-
 static void help(char *name)
 {
-       int i;
-
        puts("");
        printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n"
               "          [-b memory] [-v vcpus]\n", name);
-       printf(" -m: specify the guest mode ID to test\n"
-              "     (default: test all supported modes)\n"
-              "     This option may be used multiple times.\n"
-              "     Guest mode IDs:\n");
-       for (i = 0; i < NUM_VM_MODES; ++i) {
-               printf("         %d:    %s%s\n", i, vm_guest_mode_string(i),
-                      guest_modes[i].supported ? " (supported)" : "");
-       }
+       guest_modes_help();
        printf(" -u: use User Fault FD to handle vCPU page\n"
               "     faults.\n");
        printf(" -d: add a delay in usec to the User Fault\n"
@@ -410,53 +394,22 @@ static void help(char *name)
 int main(int argc, char *argv[])
 {
        int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
-       bool mode_selected = false;
-       unsigned int mode;
-       int opt, i;
-       bool use_uffd = false;
-       useconds_t uffd_delay = 0;
-
-#ifdef __x86_64__
-       guest_mode_init(VM_MODE_PXXV48_4K, true, true);
-#endif
-#ifdef __aarch64__
-       guest_mode_init(VM_MODE_P40V48_4K, true, true);
-       guest_mode_init(VM_MODE_P40V48_64K, true, true);
-       {
-               unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
-
-               if (limit >= 52)
-                       guest_mode_init(VM_MODE_P52V48_64K, true, true);
-               if (limit >= 48) {
-                       guest_mode_init(VM_MODE_P48V48_4K, true, true);
-                       guest_mode_init(VM_MODE_P48V48_64K, true, true);
-               }
-       }
-#endif
-#ifdef __s390x__
-       guest_mode_init(VM_MODE_P40V48_4K, true, true);
-#endif
+       struct test_params p = {};
+       int opt;
+
+       guest_modes_append_default();
 
        while ((opt = getopt(argc, argv, "hm:ud:b:v:")) != -1) {
                switch (opt) {
                case 'm':
-                       if (!mode_selected) {
-                               for (i = 0; i < NUM_VM_MODES; ++i)
-                                       guest_modes[i].enabled = false;
-                               mode_selected = true;
-                       }
-                       mode = strtoul(optarg, NULL, 10);
-                       TEST_ASSERT(mode < NUM_VM_MODES,
-                                   "Guest mode ID %d too big", mode);
-                       guest_modes[mode].enabled = true;
+                       guest_modes_cmdline(optarg);
                        break;
                case 'u':
-                       use_uffd = true;
+                       p.use_uffd = true;
                        break;
                case 'd':
-                       uffd_delay = strtoul(optarg, NULL, 0);
-                       TEST_ASSERT(uffd_delay >= 0,
-                                   "A negative UFFD delay is not supported.");
+                       p.uffd_delay = strtoul(optarg, NULL, 0);
+                       TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported.");
                        break;
                case 'b':
                        guest_percpu_mem_size = parse_size(optarg);
@@ -473,14 +426,7 @@ int main(int argc, char *argv[])
                }
        }
 
-       for (i = 0; i < NUM_VM_MODES; ++i) {
-               if (!guest_modes[i].enabled)
-                       continue;
-               TEST_ASSERT(guest_modes[i].supported,
-                           "Guest mode ID %d (%s) not supported.",
-                           i, vm_guest_mode_string(i));
-               run_test(i, use_uffd, uffd_delay);
-       }
+       for_each_guest_mode(run_test, &p);
 
        return 0;
 }
index 85c9b8f..2283a0e 100644 (file)
@@ -8,28 +8,28 @@
  * Copyright (C) 2020, Google, Inc.
  */
 
-#define _GNU_SOURCE /* for program_invocation_name */
-
 #include <stdio.h>
 #include <stdlib.h>
-#include <unistd.h>
 #include <time.h>
 #include <pthread.h>
 #include <linux/bitmap.h>
-#include <linux/bitops.h>
 
 #include "kvm_util.h"
-#include "perf_test_util.h"
-#include "processor.h"
 #include "test_util.h"
+#include "perf_test_util.h"
+#include "guest_modes.h"
 
 /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
 #define TEST_HOST_LOOP_N               2UL
 
+static int nr_vcpus = 1;
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
 /* Host variables */
+static u64 dirty_log_manual_caps;
 static bool host_quit;
 static uint64_t iteration;
-static uint64_t vcpu_last_completed_iteration[MAX_VCPUS];
+static uint64_t vcpu_last_completed_iteration[KVM_MAX_VCPUS];
 
 static void *vcpu_worker(void *data)
 {
@@ -41,7 +41,7 @@ static void *vcpu_worker(void *data)
        struct timespec ts_diff;
        struct timespec total = (struct timespec){0};
        struct timespec avg;
-       struct vcpu_args *vcpu_args = (struct vcpu_args *)data;
+       struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
        int vcpu_id = vcpu_args->vcpu_id;
 
        vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
@@ -88,13 +88,15 @@ static void *vcpu_worker(void *data)
        return NULL;
 }
 
-#ifdef USE_CLEAR_DIRTY_LOG
-static u64 dirty_log_manual_caps;
-#endif
+struct test_params {
+       unsigned long iterations;
+       uint64_t phys_offset;
+       int wr_fract;
+};
 
-static void run_test(enum vm_guest_mode mode, unsigned long iterations,
-                    uint64_t phys_offset, int wr_fract)
+static void run_test(enum vm_guest_mode mode, void *arg)
 {
+       struct test_params *p = arg;
        pthread_t *vcpu_threads;
        struct kvm_vm *vm;
        unsigned long *bmap;
@@ -106,30 +108,28 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
        struct timespec get_dirty_log_total = (struct timespec){0};
        struct timespec vcpu_dirty_total = (struct timespec){0};
        struct timespec avg;
-#ifdef USE_CLEAR_DIRTY_LOG
        struct kvm_enable_cap cap = {};
        struct timespec clear_dirty_log_total = (struct timespec){0};
-#endif
 
-       vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
+       vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size);
 
-       perf_test_args.wr_fract = wr_fract;
+       perf_test_args.wr_fract = p->wr_fract;
 
        guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
        guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
        host_num_pages = vm_num_host_pages(mode, guest_num_pages);
        bmap = bitmap_alloc(host_num_pages);
 
-#ifdef USE_CLEAR_DIRTY_LOG
-       cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
-       cap.args[0] = dirty_log_manual_caps;
-       vm_enable_cap(vm, &cap);
-#endif
+       if (dirty_log_manual_caps) {
+               cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
+               cap.args[0] = dirty_log_manual_caps;
+               vm_enable_cap(vm, &cap);
+       }
 
        vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
        TEST_ASSERT(vcpu_threads, "Memory allocation failed");
 
-       add_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
+       perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
 
        sync_global_to_guest(vm, perf_test_args);
 
@@ -155,13 +155,13 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 
        /* Enable dirty logging */
        clock_gettime(CLOCK_MONOTONIC, &start);
-       vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX,
+       vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX,
                                KVM_MEM_LOG_DIRTY_PAGES);
        ts_diff = timespec_diff_now(start);
        pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
                ts_diff.tv_sec, ts_diff.tv_nsec);
 
-       while (iteration < iterations) {
+       while (iteration < p->iterations) {
                /*
                 * Incrementing the iteration number will start the vCPUs
                 * dirtying memory again.
@@ -182,7 +182,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
                        iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
 
                clock_gettime(CLOCK_MONOTONIC, &start);
-               kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+               kvm_vm_get_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap);
 
                ts_diff = timespec_diff_now(start);
                get_dirty_log_total = timespec_add(get_dirty_log_total,
@@ -190,17 +190,17 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
                pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n",
                        iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
 
-#ifdef USE_CLEAR_DIRTY_LOG
-               clock_gettime(CLOCK_MONOTONIC, &start);
-               kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
-                                      host_num_pages);
+               if (dirty_log_manual_caps) {
+                       clock_gettime(CLOCK_MONOTONIC, &start);
+                       kvm_vm_clear_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap, 0,
+                                              host_num_pages);
 
-               ts_diff = timespec_diff_now(start);
-               clear_dirty_log_total = timespec_add(clear_dirty_log_total,
-                                                    ts_diff);
-               pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n",
-                       iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
-#endif
+                       ts_diff = timespec_diff_now(start);
+                       clear_dirty_log_total = timespec_add(clear_dirty_log_total,
+                                                            ts_diff);
+                       pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n",
+                               iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+               }
        }
 
        /* Tell the vcpu thread to quit */
@@ -210,43 +210,30 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 
        /* Disable dirty logging */
        clock_gettime(CLOCK_MONOTONIC, &start);
-       vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0);
+       vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, 0);
        ts_diff = timespec_diff_now(start);
        pr_info("Disabling dirty logging time: %ld.%.9lds\n",
                ts_diff.tv_sec, ts_diff.tv_nsec);
 
-       avg = timespec_div(get_dirty_log_total, iterations);
+       avg = timespec_div(get_dirty_log_total, p->iterations);
        pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
-               iterations, get_dirty_log_total.tv_sec,
+               p->iterations, get_dirty_log_total.tv_sec,
                get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
 
-#ifdef USE_CLEAR_DIRTY_LOG
-       avg = timespec_div(clear_dirty_log_total, iterations);
-       pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
-               iterations, clear_dirty_log_total.tv_sec,
-               clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
-#endif
+       if (dirty_log_manual_caps) {
+               avg = timespec_div(clear_dirty_log_total, p->iterations);
+               pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+                       p->iterations, clear_dirty_log_total.tv_sec,
+                       clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+       }
 
        free(bmap);
        free(vcpu_threads);
-       ucall_uninit(vm);
-       kvm_vm_free(vm);
+       perf_test_destroy_vm(vm);
 }
 
-struct guest_mode {
-       bool supported;
-       bool enabled;
-};
-static struct guest_mode guest_modes[NUM_VM_MODES];
-
-#define guest_mode_init(mode, supported, enabled) ({ \
-       guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
-})
-
 static void help(char *name)
 {
-       int i;
-
        puts("");
        printf("usage: %s [-h] [-i iterations] [-p offset] "
               "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name);
@@ -255,14 +242,7 @@ static void help(char *name)
               TEST_HOST_LOOP_N);
        printf(" -p: specify guest physical test memory offset\n"
               "     Warning: a low offset can conflict with the loaded test code.\n");
-       printf(" -m: specify the guest mode ID to test "
-              "(default: test all supported modes)\n"
-              "     This option may be used multiple times.\n"
-              "     Guest mode IDs:\n");
-       for (i = 0; i < NUM_VM_MODES; ++i) {
-               printf("         %d:    %s%s\n", i, vm_guest_mode_string(i),
-                      guest_modes[i].supported ? " (supported)" : "");
-       }
+       guest_modes_help();
        printf(" -b: specify the size of the memory region which should be\n"
               "     dirtied by each vCPU. e.g. 10M or 3G.\n"
               "     (default: 1G)\n");
@@ -277,80 +257,43 @@ static void help(char *name)
 
 int main(int argc, char *argv[])
 {
-       unsigned long iterations = TEST_HOST_LOOP_N;
-       bool mode_selected = false;
-       uint64_t phys_offset = 0;
-       unsigned int mode;
-       int opt, i;
-       int wr_fract = 1;
-
-#ifdef USE_CLEAR_DIRTY_LOG
+       int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+       struct test_params p = {
+               .iterations = TEST_HOST_LOOP_N,
+               .wr_fract = 1,
+       };
+       int opt;
+
        dirty_log_manual_caps =
                kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
-       if (!dirty_log_manual_caps) {
-               print_skip("KVM_CLEAR_DIRTY_LOG not available");
-               exit(KSFT_SKIP);
-       }
        dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
                                  KVM_DIRTY_LOG_INITIALLY_SET);
-#endif
-
-#ifdef __x86_64__
-       guest_mode_init(VM_MODE_PXXV48_4K, true, true);
-#endif
-#ifdef __aarch64__
-       guest_mode_init(VM_MODE_P40V48_4K, true, true);
-       guest_mode_init(VM_MODE_P40V48_64K, true, true);
-
-       {
-               unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
-
-               if (limit >= 52)
-                       guest_mode_init(VM_MODE_P52V48_64K, true, true);
-               if (limit >= 48) {
-                       guest_mode_init(VM_MODE_P48V48_4K, true, true);
-                       guest_mode_init(VM_MODE_P48V48_64K, true, true);
-               }
-       }
-#endif
-#ifdef __s390x__
-       guest_mode_init(VM_MODE_P40V48_4K, true, true);
-#endif
+
+       guest_modes_append_default();
 
        while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) {
                switch (opt) {
                case 'i':
-                       iterations = strtol(optarg, NULL, 10);
+                       p.iterations = strtol(optarg, NULL, 10);
                        break;
                case 'p':
-                       phys_offset = strtoull(optarg, NULL, 0);
+                       p.phys_offset = strtoull(optarg, NULL, 0);
                        break;
                case 'm':
-                       if (!mode_selected) {
-                               for (i = 0; i < NUM_VM_MODES; ++i)
-                                       guest_modes[i].enabled = false;
-                               mode_selected = true;
-                       }
-                       mode = strtoul(optarg, NULL, 10);
-                       TEST_ASSERT(mode < NUM_VM_MODES,
-                                   "Guest mode ID %d too big", mode);
-                       guest_modes[mode].enabled = true;
+                       guest_modes_cmdline(optarg);
                        break;
                case 'b':
                        guest_percpu_mem_size = parse_size(optarg);
                        break;
                case 'f':
-                       wr_fract = atoi(optarg);
-                       TEST_ASSERT(wr_fract >= 1,
+                       p.wr_fract = atoi(optarg);
+                       TEST_ASSERT(p.wr_fract >= 1,
                                    "Write fraction cannot be less than one");
                        break;
                case 'v':
                        nr_vcpus = atoi(optarg);
-                       TEST_ASSERT(nr_vcpus > 0,
-                                   "Must have a positive number of vCPUs");
-                       TEST_ASSERT(nr_vcpus <= MAX_VCPUS,
-                                   "This test does not currently support\n"
-                                   "more than %d vCPUs.", MAX_VCPUS);
+                       TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
+                                   "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
                        break;
                case 'h':
                default:
@@ -359,18 +302,11 @@ int main(int argc, char *argv[])
                }
        }
 
-       TEST_ASSERT(iterations >= 2, "The test should have at least two iterations");
+       TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations");
 
-       pr_info("Test iterations: %"PRIu64"\n", iterations);
+       pr_info("Test iterations: %"PRIu64"\n", p.iterations);
 
-       for (i = 0; i < NUM_VM_MODES; ++i) {
-               if (!guest_modes[i].enabled)
-                       continue;
-               TEST_ASSERT(guest_modes[i].supported,
-                           "Guest mode ID %d (%s) not supported.",
-                           i, vm_guest_mode_string(i));
-               run_test(i, iterations, phys_offset, wr_fract);
-       }
+       for_each_guest_mode(run_test, &p);
 
        return 0;
 }
index 54da9cc..bb2752d 100644 (file)
@@ -9,14 +9,18 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include <unistd.h>
-#include <time.h>
 #include <pthread.h>
+#include <semaphore.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
 #include <linux/bitmap.h>
 #include <linux/bitops.h>
+#include <asm/barrier.h>
 
-#include "test_util.h"
 #include "kvm_util.h"
+#include "test_util.h"
+#include "guest_modes.h"
 #include "processor.h"
 
 #define VCPU_ID                                1
 # define test_and_clear_bit_le test_and_clear_bit
 #endif
 
+#define TEST_DIRTY_RING_COUNT          65536
+
+#define SIG_IPI SIGUSR1
+
 /*
  * Guest/Host shared variables. Ensure addr_gva2hva() and/or
  * sync_global_to/from_guest() are used when accessing from
@@ -128,6 +136,31 @@ static uint64_t host_dirty_count;
 static uint64_t host_clear_count;
 static uint64_t host_track_next_count;
 
+/* Whether dirty ring reset is requested, or finished */
+static sem_t dirty_ring_vcpu_stop;
+static sem_t dirty_ring_vcpu_cont;
+/*
+ * This is updated by the vcpu thread to tell the host whether it's a
+ * ring-full event.  It should only be read until a sem_wait() of
+ * dirty_ring_vcpu_stop and before vcpu continues to run.
+ */
+static bool dirty_ring_vcpu_ring_full;
+/*
+ * This is only used for verifying the dirty pages.  Dirty ring has a very
+ * tricky case when the ring just got full, kvm will do userspace exit due to
+ * ring full.  When that happens, the very last PFN is set but actually the
+ * data is not changed (the guest WRITE is not really applied yet), because
+ * we found that the dirty ring is full, refused to continue the vcpu, and
+ * recorded the dirty gfn with the old contents.
+ *
+ * For this specific case, it's safe to skip checking this pfn for this
+ * bit, because it's a redundant bit, and when the write happens later the bit
+ * will be set again.  We use this variable to always keep track of the latest
+ * dirty gfn we've collected, so that if a mismatch of data found later in the
+ * verifying process, we let it pass.
+ */
+static uint64_t dirty_ring_last_page;
+
 enum log_mode_t {
        /* Only use KVM_GET_DIRTY_LOG for logging */
        LOG_MODE_DIRTY_LOG = 0,
@@ -135,6 +168,9 @@ enum log_mode_t {
        /* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */
        LOG_MODE_CLEAR_LOG = 1,
 
+       /* Use dirty ring for logging */
+       LOG_MODE_DIRTY_RING = 2,
+
        LOG_MODE_NUM,
 
        /* Run all supported modes */
@@ -145,6 +181,26 @@ enum log_mode_t {
 static enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
 /* Logging mode for current run */
 static enum log_mode_t host_log_mode;
+static pthread_t vcpu_thread;
+static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT;
+
+static void vcpu_kick(void)
+{
+       pthread_kill(vcpu_thread, SIG_IPI);
+}
+
+/*
+ * In our test we do signal tricks, let's use a better version of
+ * sem_wait to avoid signal interrupts
+ */
+static void sem_wait_until(sem_t *sem)
+{
+       int ret;
+
+       do
+               ret = sem_wait(sem);
+       while (ret == -1 && errno == EINTR);
+}
 
 static bool clear_log_supported(void)
 {
@@ -178,6 +234,152 @@ static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
        kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages);
 }
 
+static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+{
+       struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+       TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR),
+                   "vcpu run failed: errno=%d", err);
+
+       TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
+                   "Invalid guest sync status: exit_reason=%s\n",
+                   exit_reason_str(run->exit_reason));
+}
+
+static bool dirty_ring_supported(void)
+{
+       return kvm_check_cap(KVM_CAP_DIRTY_LOG_RING);
+}
+
+static void dirty_ring_create_vm_done(struct kvm_vm *vm)
+{
+       /*
+        * Switch to dirty ring mode after VM creation but before any
+        * of the vcpu creation.
+        */
+       vm_enable_dirty_ring(vm, test_dirty_ring_count *
+                            sizeof(struct kvm_dirty_gfn));
+}
+
+static inline bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn)
+{
+       return gfn->flags == KVM_DIRTY_GFN_F_DIRTY;
+}
+
+static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn)
+{
+       gfn->flags = KVM_DIRTY_GFN_F_RESET;
+}
+
+static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
+                                      int slot, void *bitmap,
+                                      uint32_t num_pages, uint32_t *fetch_index)
+{
+       struct kvm_dirty_gfn *cur;
+       uint32_t count = 0;
+
+       while (true) {
+               cur = &dirty_gfns[*fetch_index % test_dirty_ring_count];
+               if (!dirty_gfn_is_dirtied(cur))
+                       break;
+               TEST_ASSERT(cur->slot == slot, "Slot number didn't match: "
+                           "%u != %u", cur->slot, slot);
+               TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
+                           "0x%llx >= 0x%x", cur->offset, num_pages);
+               //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset);
+               set_bit_le(cur->offset, bitmap);
+               dirty_ring_last_page = cur->offset;
+               dirty_gfn_set_collected(cur);
+               (*fetch_index)++;
+               count++;
+       }
+
+       return count;
+}
+
+static void dirty_ring_wait_vcpu(void)
+{
+       /* This makes sure that hardware PML cache flushed */
+       vcpu_kick();
+       sem_wait_until(&dirty_ring_vcpu_stop);
+}
+
+static void dirty_ring_continue_vcpu(void)
+{
+       pr_info("Notifying vcpu to continue\n");
+       sem_post(&dirty_ring_vcpu_cont);
+}
+
+static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot,
+                                          void *bitmap, uint32_t num_pages)
+{
+       /* We only have one vcpu */
+       static uint32_t fetch_index = 0;
+       uint32_t count = 0, cleared;
+       bool continued_vcpu = false;
+
+       dirty_ring_wait_vcpu();
+
+       if (!dirty_ring_vcpu_ring_full) {
+               /*
+                * This is not a ring-full event, it's safe to allow
+                * vcpu to continue
+                */
+               dirty_ring_continue_vcpu();
+               continued_vcpu = true;
+       }
+
+       /* Only have one vcpu */
+       count = dirty_ring_collect_one(vcpu_map_dirty_ring(vm, VCPU_ID),
+                                      slot, bitmap, num_pages, &fetch_index);
+
+       cleared = kvm_vm_reset_dirty_ring(vm);
+
+       /* Cleared pages should be the same as collected */
+       TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
+                   "with collected (%u)", cleared, count);
+
+       if (!continued_vcpu) {
+               TEST_ASSERT(dirty_ring_vcpu_ring_full,
+                           "Didn't continue vcpu even without ring full");
+               dirty_ring_continue_vcpu();
+       }
+
+       pr_info("Iteration %ld collected %u pages\n", iteration, count);
+}
+
+static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+{
+       struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+       /* A ucall-sync or ring-full event is allowed */
+       if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
+               /* We should allow this to continue */
+               ;
+       } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL ||
+                  (ret == -1 && err == EINTR)) {
+               /* Update the flag first before pause */
+               WRITE_ONCE(dirty_ring_vcpu_ring_full,
+                          run->exit_reason == KVM_EXIT_DIRTY_RING_FULL);
+               sem_post(&dirty_ring_vcpu_stop);
+               pr_info("vcpu stops because %s...\n",
+                       dirty_ring_vcpu_ring_full ?
+                       "dirty ring is full" : "vcpu is kicked out");
+               sem_wait_until(&dirty_ring_vcpu_cont);
+               pr_info("vcpu continues now.\n");
+       } else {
+               TEST_ASSERT(false, "Invalid guest sync status: "
+                           "exit_reason=%s\n",
+                           exit_reason_str(run->exit_reason));
+       }
+}
+
+static void dirty_ring_before_vcpu_join(void)
+{
+       /* Kick another round of vcpu just to make sure it will quit */
+       sem_post(&dirty_ring_vcpu_cont);
+}
+
 struct log_mode {
        const char *name;
        /* Return true if this mode is supported, otherwise false */
@@ -187,16 +389,29 @@ struct log_mode {
        /* Hook to collect the dirty pages into the bitmap provided */
        void (*collect_dirty_pages) (struct kvm_vm *vm, int slot,
                                     void *bitmap, uint32_t num_pages);
+       /* Hook to call when after each vcpu run */
+       void (*after_vcpu_run)(struct kvm_vm *vm, int ret, int err);
+       void (*before_vcpu_join) (void);
 } log_modes[LOG_MODE_NUM] = {
        {
                .name = "dirty-log",
                .collect_dirty_pages = dirty_log_collect_dirty_pages,
+               .after_vcpu_run = default_after_vcpu_run,
        },
        {
                .name = "clear-log",
                .supported = clear_log_supported,
                .create_vm_done = clear_log_create_vm_done,
                .collect_dirty_pages = clear_log_collect_dirty_pages,
+               .after_vcpu_run = default_after_vcpu_run,
+       },
+       {
+               .name = "dirty-ring",
+               .supported = dirty_ring_supported,
+               .create_vm_done = dirty_ring_create_vm_done,
+               .collect_dirty_pages = dirty_ring_collect_dirty_pages,
+               .before_vcpu_join = dirty_ring_before_vcpu_join,
+               .after_vcpu_run = dirty_ring_after_vcpu_run,
        },
 };
 
@@ -247,6 +462,22 @@ static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot,
        mode->collect_dirty_pages(vm, slot, bitmap, num_pages);
 }
 
+static void log_mode_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+{
+       struct log_mode *mode = &log_modes[host_log_mode];
+
+       if (mode->after_vcpu_run)
+               mode->after_vcpu_run(vm, ret, err);
+}
+
+static void log_mode_before_vcpu_join(void)
+{
+       struct log_mode *mode = &log_modes[host_log_mode];
+
+       if (mode->before_vcpu_join)
+               mode->before_vcpu_join();
+}
+
 static void generate_random_array(uint64_t *guest_array, uint64_t size)
 {
        uint64_t i;
@@ -257,29 +488,44 @@ static void generate_random_array(uint64_t *guest_array, uint64_t size)
 
 static void *vcpu_worker(void *data)
 {
-       int ret;
+       int ret, vcpu_fd;
        struct kvm_vm *vm = data;
        uint64_t *guest_array;
        uint64_t pages_count = 0;
-       struct kvm_run *run;
+       struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset)
+                                                + sizeof(sigset_t));
+       sigset_t *sigset = (sigset_t *) &sigmask->sigset;
 
-       run = vcpu_state(vm, VCPU_ID);
+       vcpu_fd = vcpu_get_fd(vm, VCPU_ID);
+
+       /*
+        * SIG_IPI is unblocked atomically while in KVM_RUN.  It causes the
+        * ioctl to return with -EINTR, but it is still pending and we need
+        * to accept it with the sigwait.
+        */
+       sigmask->len = 8;
+       pthread_sigmask(0, NULL, sigset);
+       vcpu_ioctl(vm, VCPU_ID, KVM_SET_SIGNAL_MASK, sigmask);
+       sigaddset(sigset, SIG_IPI);
+       pthread_sigmask(SIG_BLOCK, sigset, NULL);
+
+       sigemptyset(sigset);
+       sigaddset(sigset, SIG_IPI);
 
        guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
-       generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
 
        while (!READ_ONCE(host_quit)) {
+               /* Clear any existing kick signals */
+               generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+               pages_count += TEST_PAGES_PER_LOOP;
                /* Let the guest dirty the random pages */
-               ret = _vcpu_run(vm, VCPU_ID);
-               TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
-               if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
-                       pages_count += TEST_PAGES_PER_LOOP;
-                       generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
-               } else {
-                       TEST_FAIL("Invalid guest sync status: "
-                                 "exit_reason=%s\n",
-                                 exit_reason_str(run->exit_reason));
+               ret = ioctl(vcpu_fd, KVM_RUN, NULL);
+               if (ret == -1 && errno == EINTR) {
+                       int sig = -1;
+                       sigwait(sigset, &sig);
+                       assert(sig == SIG_IPI);
                }
+               log_mode_after_vcpu_run(vm, ret, errno);
        }
 
        pr_info("Dirtied %"PRIu64" pages\n", pages_count);
@@ -292,6 +538,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
        uint64_t step = vm_num_host_pages(mode, 1);
        uint64_t page;
        uint64_t *value_ptr;
+       uint64_t min_iter = 0;
 
        for (page = 0; page < host_num_pages; page += step) {
                value_ptr = host_test_mem + page * host_page_size;
@@ -306,14 +553,64 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
                }
 
                if (test_and_clear_bit_le(page, bmap)) {
+                       bool matched;
+
                        host_dirty_count++;
+
                        /*
                         * If the bit is set, the value written onto
                         * the corresponding page should be either the
                         * previous iteration number or the current one.
                         */
-                       TEST_ASSERT(*value_ptr == iteration ||
-                                   *value_ptr == iteration - 1,
+                       matched = (*value_ptr == iteration ||
+                                  *value_ptr == iteration - 1);
+
+                       if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) {
+                               if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) {
+                                       /*
+                                        * Short answer: this case is special
+                                        * only for dirty ring test where the
+                                        * page is the last page before a kvm
+                                        * dirty ring full in iteration N-2.
+                                        *
+                                        * Long answer: Assuming ring size R,
+                                        * one possible condition is:
+                                        *
+                                        *      main thr       vcpu thr
+                                        *      --------       --------
+                                        *    iter=1
+                                        *                   write 1 to page 0~(R-1)
+                                        *                   full, vmexit
+                                        *    collect 0~(R-1)
+                                        *    kick vcpu
+                                        *                   write 1 to (R-1)~(2R-2)
+                                        *                   full, vmexit
+                                        *    iter=2
+                                        *    collect (R-1)~(2R-2)
+                                        *    kick vcpu
+                                        *                   write 1 to (2R-2)
+                                        *                   (NOTE!!! "1" cached in cpu reg)
+                                        *                   write 2 to (2R-1)~(3R-3)
+                                        *                   full, vmexit
+                                        *    iter=3
+                                        *    collect (2R-2)~(3R-3)
+                                        *    (here if we read value on page
+                                        *     "2R-2" is 1, while iter=3!!!)
+                                        *
+                                        * This however can only happen once per iteration.
+                                        */
+                                       min_iter = iteration - 1;
+                                       continue;
+                               } else if (page == dirty_ring_last_page) {
+                                       /*
+                                        * Please refer to comments in
+                                        * dirty_ring_last_page.
+                                        */
+                                       continue;
+                               }
+                       }
+
+                       TEST_ASSERT(matched,
                                    "Set page %"PRIu64" value %"PRIu64
                                    " incorrect (iteration=%"PRIu64")",
                                    page, *value_ptr, iteration);
@@ -375,10 +672,15 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
 #define DIRTY_MEM_BITS 30 /* 1G */
 #define PAGE_SHIFT_4K  12
 
-static void run_test(enum vm_guest_mode mode, unsigned long iterations,
-                    unsigned long interval, uint64_t phys_offset)
+struct test_params {
+       unsigned long iterations;
+       unsigned long interval;
+       uint64_t phys_offset;
+};
+
+static void run_test(enum vm_guest_mode mode, void *arg)
 {
-       pthread_t vcpu_thread;
+       struct test_params *p = arg;
        struct kvm_vm *vm;
        unsigned long *bmap;
 
@@ -412,12 +714,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
        host_page_size = getpagesize();
        host_num_pages = vm_num_host_pages(mode, guest_num_pages);
 
-       if (!phys_offset) {
+       if (!p->phys_offset) {
                guest_test_phys_mem = (vm_get_max_gfn(vm) -
                                       guest_num_pages) * guest_page_size;
                guest_test_phys_mem &= ~(host_page_size - 1);
        } else {
-               guest_test_phys_mem = phys_offset;
+               guest_test_phys_mem = p->phys_offset;
        }
 
 #ifdef __s390x__
@@ -443,9 +745,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
        /* Cache the HVA pointer of the region */
        host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
 
-#ifdef __x86_64__
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-#endif
        ucall_init(vm, NULL);
 
        /* Export the shared variables to the guest */
@@ -464,9 +763,9 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 
        pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
 
-       while (iteration < iterations) {
+       while (iteration < p->iterations) {
                /* Give the vcpu thread some time to dirty some pages */
-               usleep(interval * 1000);
+               usleep(p->interval * 1000);
                log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX,
                                             bmap, host_num_pages);
                vm_dirty_log_verify(mode, bmap);
@@ -476,6 +775,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 
        /* Tell the vcpu thread to quit */
        host_quit = true;
+       log_mode_before_vcpu_join();
        pthread_join(vcpu_thread, NULL);
 
        pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
@@ -488,24 +788,15 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
        kvm_vm_free(vm);
 }
 
-struct guest_mode {
-       bool supported;
-       bool enabled;
-};
-static struct guest_mode guest_modes[NUM_VM_MODES];
-
-#define guest_mode_init(mode, supported, enabled) ({ \
-       guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
-})
-
 static void help(char *name)
 {
-       int i;
-
        puts("");
        printf("usage: %s [-h] [-i iterations] [-I interval] "
               "[-p offset] [-m mode]\n", name);
        puts("");
+       printf(" -c: specify dirty ring size, in number of entries\n");
+       printf("     (only useful for dirty-ring test; default: %"PRIu32")\n",
+              TEST_DIRTY_RING_COUNT);
        printf(" -i: specify iteration counts (default: %"PRIu64")\n",
               TEST_HOST_LOOP_N);
        printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
@@ -515,70 +806,40 @@ static void help(char *name)
        printf(" -M: specify the host logging mode "
               "(default: run all log modes).  Supported modes: \n\t");
        log_modes_dump();
-       printf(" -m: specify the guest mode ID to test "
-              "(default: test all supported modes)\n"
-              "     This option may be used multiple times.\n"
-              "     Guest mode IDs:\n");
-       for (i = 0; i < NUM_VM_MODES; ++i) {
-               printf("         %d:    %s%s\n", i, vm_guest_mode_string(i),
-                      guest_modes[i].supported ? " (supported)" : "");
-       }
+       guest_modes_help();
        puts("");
        exit(0);
 }
 
 int main(int argc, char *argv[])
 {
-       unsigned long iterations = TEST_HOST_LOOP_N;
-       unsigned long interval = TEST_HOST_LOOP_INTERVAL;
-       bool mode_selected = false;
-       uint64_t phys_offset = 0;
-       unsigned int mode;
-       int opt, i, j;
+       struct test_params p = {
+               .iterations = TEST_HOST_LOOP_N,
+               .interval = TEST_HOST_LOOP_INTERVAL,
+       };
+       int opt, i;
 
-#ifdef __x86_64__
-       guest_mode_init(VM_MODE_PXXV48_4K, true, true);
-#endif
-#ifdef __aarch64__
-       guest_mode_init(VM_MODE_P40V48_4K, true, true);
-       guest_mode_init(VM_MODE_P40V48_64K, true, true);
-
-       {
-               unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+       sem_init(&dirty_ring_vcpu_stop, 0, 0);
+       sem_init(&dirty_ring_vcpu_cont, 0, 0);
 
-               if (limit >= 52)
-                       guest_mode_init(VM_MODE_P52V48_64K, true, true);
-               if (limit >= 48) {
-                       guest_mode_init(VM_MODE_P48V48_4K, true, true);
-                       guest_mode_init(VM_MODE_P48V48_64K, true, true);
-               }
-       }
-#endif
-#ifdef __s390x__
-       guest_mode_init(VM_MODE_P40V48_4K, true, true);
-#endif
+       guest_modes_append_default();
 
-       while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) {
+       while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) {
                switch (opt) {
+               case 'c':
+                       test_dirty_ring_count = strtol(optarg, NULL, 10);
+                       break;
                case 'i':
-                       iterations = strtol(optarg, NULL, 10);
+                       p.iterations = strtol(optarg, NULL, 10);
                        break;
                case 'I':
-                       interval = strtol(optarg, NULL, 10);
+                       p.interval = strtol(optarg, NULL, 10);
                        break;
                case 'p':
-                       phys_offset = strtoull(optarg, NULL, 0);
+                       p.phys_offset = strtoull(optarg, NULL, 0);
                        break;
                case 'm':
-                       if (!mode_selected) {
-                               for (i = 0; i < NUM_VM_MODES; ++i)
-                                       guest_modes[i].enabled = false;
-                               mode_selected = true;
-                       }
-                       mode = strtoul(optarg, NULL, 10);
-                       TEST_ASSERT(mode < NUM_VM_MODES,
-                                   "Guest mode ID %d too big", mode);
-                       guest_modes[mode].enabled = true;
+                       guest_modes_cmdline(optarg);
                        break;
                case 'M':
                        if (!strcmp(optarg, "all")) {
@@ -607,32 +868,24 @@ int main(int argc, char *argv[])
                }
        }
 
-       TEST_ASSERT(iterations > 2, "Iterations must be greater than two");
-       TEST_ASSERT(interval > 0, "Interval must be greater than zero");
+       TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two");
+       TEST_ASSERT(p.interval > 0, "Interval must be greater than zero");
 
        pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
-               iterations, interval);
+               p.iterations, p.interval);
 
        srandom(time(0));
 
-       for (i = 0; i < NUM_VM_MODES; ++i) {
-               if (!guest_modes[i].enabled)
-                       continue;
-               TEST_ASSERT(guest_modes[i].supported,
-                           "Guest mode ID %d (%s) not supported.",
-                           i, vm_guest_mode_string(i));
-               if (host_log_mode_option == LOG_MODE_ALL) {
-                       /* Run each log mode */
-                       for (j = 0; j < LOG_MODE_NUM; j++) {
-                               pr_info("Testing Log Mode '%s'\n",
-                                       log_modes[j].name);
-                               host_log_mode = j;
-                               run_test(i, iterations, interval, phys_offset);
-                       }
-               } else {
-                       host_log_mode = host_log_mode_option;
-                       run_test(i, iterations, interval, phys_offset);
+       if (host_log_mode_option == LOG_MODE_ALL) {
+               /* Run each log mode */
+               for (i = 0; i < LOG_MODE_NUM; i++) {
+                       pr_info("Testing Log Mode '%s'\n", log_modes[i].name);
+                       host_log_mode = i;
+                       for_each_guest_mode(run_test, &p);
                }
+       } else {
+               host_log_mode = host_log_mode_option;
+               for_each_guest_mode(run_test, &p);
        }
 
        return 0;
diff --git a/tools/testing/selftests/kvm/include/guest_modes.h b/tools/testing/selftests/kvm/include/guest_modes.h
new file mode 100644 (file)
index 0000000..b691df3
--- /dev/null
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+struct guest_mode {
+       bool supported;
+       bool enabled;
+};
+
+extern struct guest_mode guest_modes[NUM_VM_MODES];
+
+#define guest_mode_append(mode, supported, enabled) ({ \
+       guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
+})
+
+void guest_modes_append_default(void);
+void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg);
+void guest_modes_help(void);
+void guest_modes_cmdline(const char *arg);
index 7d29aa7..5cbb861 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "sparsebit.h"
 
+#define KVM_MAX_VCPUS 512
 
 /*
  * Callers of kvm_util only have an incomplete/opaque description of the
@@ -45,16 +46,39 @@ enum vm_guest_mode {
 };
 
 #if defined(__aarch64__)
-#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+
+#define VM_MODE_DEFAULT                        VM_MODE_P40V48_4K
+#define MIN_PAGE_SHIFT                 12U
+#define ptes_per_page(page_size)       ((page_size) / 8)
+
 #elif defined(__x86_64__)
-#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
-#else
-#define VM_MODE_DEFAULT VM_MODE_P52V48_4K
+
+#define VM_MODE_DEFAULT                        VM_MODE_PXXV48_4K
+#define MIN_PAGE_SHIFT                 12U
+#define ptes_per_page(page_size)       ((page_size) / 8)
+
+#elif defined(__s390x__)
+
+#define VM_MODE_DEFAULT                        VM_MODE_P52V48_4K
+#define MIN_PAGE_SHIFT                 12U
+#define ptes_per_page(page_size)       ((page_size) / 16)
+
 #endif
 
+#define MIN_PAGE_SIZE          (1U << MIN_PAGE_SHIFT)
+#define PTES_PER_MIN_PAGE      ptes_per_page(MIN_PAGE_SIZE)
+
 #define vm_guest_mode_string(m) vm_guest_mode_string[m]
 extern const char * const vm_guest_mode_string[];
 
+struct vm_guest_mode_params {
+       unsigned int pa_bits;
+       unsigned int va_bits;
+       unsigned int page_size;
+       unsigned int page_shift;
+};
+extern const struct vm_guest_mode_params vm_guest_mode_params[];
+
 enum vm_mem_backing_src_type {
        VM_MEM_SRC_ANONYMOUS,
        VM_MEM_SRC_ANONYMOUS_THP,
@@ -74,6 +98,7 @@ void kvm_vm_release(struct kvm_vm *vmp);
 void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
 void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
                            uint64_t first_page, uint32_t num_pages);
+uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
 
 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
                       size_t len);
@@ -114,6 +139,8 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
 int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
                void *arg);
 void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
@@ -146,6 +173,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
 struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
 int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
                          struct kvm_guest_debug *debug);
@@ -199,6 +227,7 @@ void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
 int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
                          struct kvm_nested_state *state, bool ignore_error);
 #endif
+void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid);
 
 const char *exit_reason_str(unsigned int exit_reason);
 
@@ -246,6 +275,16 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
                                 void *guest_code);
 
+/* Same as vm_create_default, but can be used for more than one vcpu */
+struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
+                                           uint32_t num_percpu_pages, void *guest_code,
+                                           uint32_t vcpuids[]);
+
+/* Like vm_create_default_with_vcpus, but accepts mode as a parameter */
+struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
+                                   uint64_t extra_mem_pages, uint32_t num_percpu_pages,
+                                   void *guest_code, uint32_t vcpuids[]);
+
 /*
  * Adds a vCPU with reasonable defaults (e.g. a stack)
  *
index 2618052..b118882 100644 (file)
@@ -9,38 +9,15 @@
 #define SELFTEST_KVM_PERF_TEST_UTIL_H
 
 #include "kvm_util.h"
-#include "processor.h"
-
-#define MAX_VCPUS 512
-
-#define PAGE_SHIFT_4K  12
-#define PTES_PER_4K_PT 512
-
-#define TEST_MEM_SLOT_INDEX            1
 
 /* Default guest test virtual memory offset */
 #define DEFAULT_GUEST_TEST_MEM         0xc0000000
 
 #define DEFAULT_PER_VCPU_MEM_SIZE      (1 << 30) /* 1G */
 
-/*
- * Guest physical memory offset of the testing memory slot.
- * This will be set to the topmost valid physical address minus
- * the test memory size.
- */
-static uint64_t guest_test_phys_mem;
-
-/*
- * Guest virtual memory offset of the testing memory slot.
- * Must not conflict with identity mapped test code.
- */
-static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
-static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
-
-/* Number of VCPUs for the test */
-static int nr_vcpus = 1;
+#define PERF_TEST_MEM_SLOT_INDEX       1
 
-struct vcpu_args {
+struct perf_test_vcpu_args {
        uint64_t gva;
        uint64_t pages;
 
@@ -54,145 +31,21 @@ struct perf_test_args {
        uint64_t guest_page_size;
        int wr_fract;
 
-       struct vcpu_args vcpu_args[MAX_VCPUS];
+       struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS];
 };
 
-static struct perf_test_args perf_test_args;
+extern struct perf_test_args perf_test_args;
 
 /*
- * Continuously write to the first 8 bytes of each page in the
- * specified region.
+ * Guest physical memory offset of the testing memory slot.
+ * This will be set to the topmost valid physical address minus
+ * the test memory size.
  */
-static void guest_code(uint32_t vcpu_id)
-{
-       struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
-       uint64_t gva;
-       uint64_t pages;
-       int i;
-
-       /* Make sure vCPU args data structure is not corrupt. */
-       GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id);
-
-       gva = vcpu_args->gva;
-       pages = vcpu_args->pages;
-
-       while (true) {
-               for (i = 0; i < pages; i++) {
-                       uint64_t addr = gva + (i * perf_test_args.guest_page_size);
-
-                       if (i % perf_test_args.wr_fract == 0)
-                               *(uint64_t *)addr = 0x0123456789ABCDEF;
-                       else
-                               READ_ONCE(*(uint64_t *)addr);
-               }
-
-               GUEST_SYNC(1);
-       }
-}
-
-static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
-                               uint64_t vcpu_memory_bytes)
-{
-       struct kvm_vm *vm;
-       uint64_t pages = DEFAULT_GUEST_PHY_PAGES;
-       uint64_t guest_num_pages;
-
-       /* Account for a few pages per-vCPU for stacks */
-       pages += DEFAULT_STACK_PGS * vcpus;
-
-       /*
-        * Reserve twice the ammount of memory needed to map the test region and
-        * the page table / stacks region, at 4k, for page tables. Do the
-        * calculation with 4K page size: the smallest of all archs. (e.g., 64K
-        * page size guest will need even less memory for page tables).
-        */
-       pages += (2 * pages) / PTES_PER_4K_PT;
-       pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) /
-                PTES_PER_4K_PT;
-       pages = vm_adjust_num_guest_pages(mode, pages);
-
-       pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
-
-       vm = vm_create(mode, pages, O_RDWR);
-       kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-#ifdef __x86_64__
-       vm_create_irqchip(vm);
-#endif
-
-       perf_test_args.vm = vm;
-       perf_test_args.guest_page_size = vm_get_page_size(vm);
-       perf_test_args.host_page_size = getpagesize();
-
-       TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
-                   "Guest memory size is not guest page size aligned.");
-
-       guest_num_pages = (vcpus * vcpu_memory_bytes) /
-                         perf_test_args.guest_page_size;
-       guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
-
-       /*
-        * If there should be more memory in the guest test region than there
-        * can be pages in the guest, it will definitely cause problems.
-        */
-       TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
-                   "Requested more guest memory than address space allows.\n"
-                   "    guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
-                   guest_num_pages, vm_get_max_gfn(vm), vcpus,
-                   vcpu_memory_bytes);
-
-       TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0,
-                   "Guest memory size is not host page size aligned.");
-
-       guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
-                             perf_test_args.guest_page_size;
-       guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1);
-
-#ifdef __s390x__
-       /* Align to 1M (segment size) */
-       guest_test_phys_mem &= ~((1 << 20) - 1);
-#endif
-
-       pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
-
-       /* Add an extra memory slot for testing */
-       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-                                   guest_test_phys_mem,
-                                   TEST_MEM_SLOT_INDEX,
-                                   guest_num_pages, 0);
-
-       /* Do mapping for the demand paging memory slot */
-       virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
-
-       ucall_init(vm, NULL);
-
-       return vm;
-}
-
-static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes)
-{
-       vm_paddr_t vcpu_gpa;
-       struct vcpu_args *vcpu_args;
-       int vcpu_id;
-
-       for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
-               vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
-
-               vm_vcpu_add_default(vm, vcpu_id, guest_code);
-
-#ifdef __x86_64__
-               vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid());
-#endif
-
-               vcpu_args->vcpu_id = vcpu_id;
-               vcpu_args->gva = guest_test_virt_mem +
-                                (vcpu_id * vcpu_memory_bytes);
-               vcpu_args->pages = vcpu_memory_bytes /
-                                  perf_test_args.guest_page_size;
+extern uint64_t guest_test_phys_mem;
 
-               vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
-               pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
-                        vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
-       }
-}
+struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
+                               uint64_t vcpu_memory_bytes);
+void perf_test_destroy_vm(struct kvm_vm *vm);
+void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes);
 
 #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h
new file mode 100644 (file)
index 0000000..b0ed713
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Test handler for the s390x DIAGNOSE 0x0318 instruction.
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER
+#define SELFTEST_KVM_DIAG318_TEST_HANDLER
+
+uint64_t get_diag318_info(void);
+
+#endif
index 8e61340..90cd598 100644 (file)
@@ -27,6 +27,7 @@
 #define X86_CR4_OSFXSR         (1ul << 9)
 #define X86_CR4_OSXMMEXCPT     (1ul << 10)
 #define X86_CR4_UMIP           (1ul << 11)
+#define X86_CR4_LA57           (1ul << 12)
 #define X86_CR4_VMXE           (1ul << 13)
 #define X86_CR4_SMXE           (1ul << 14)
 #define X86_CR4_FSGSBASE       (1ul << 16)
 #define X86_CR4_SMAP           (1ul << 21)
 #define X86_CR4_PKE            (1ul << 22)
 
+/* CPUID.1.ECX */
+#define CPUID_VMX              (1ul << 5)
+#define CPUID_SMX              (1ul << 6)
+#define CPUID_PCID             (1ul << 17)
+#define CPUID_XSAVE            (1ul << 26)
+
+/* CPUID.7.EBX */
+#define CPUID_FSGSBASE         (1ul << 0)
+#define CPUID_SMEP             (1ul << 7)
+#define CPUID_SMAP             (1ul << 20)
+
+/* CPUID.7.ECX */
+#define CPUID_UMIP             (1ul << 2)
+#define CPUID_PKU              (1ul << 3)
+#define CPUID_LA57             (1ul << 16)
+
 #define UNEXPECTED_VECTOR_PORT 0xfff0u
 
 /* General Registers in 64-Bit Mode */
index e78d7e2..65eb107 100644 (file)
 #include <stdint.h>
 #include "processor.h"
 
-#define CPUID_VMX_BIT                          5
-
-#define CPUID_VMX                              (1 << 5)
-
 /*
  * Definitions of Primary Processor-Based VM-Execution Controls.
  */
index d6c32c3..cee92d4 100644 (file)
@@ -5,8 +5,6 @@
  * Copyright (C) 2018, Red Hat, Inc.
  */
 
-#define _GNU_SOURCE /* for program_invocation_name */
-
 #include <linux/compiler.h>
 
 #include "kvm_util.h"
@@ -219,21 +217,6 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
        }
 }
 
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
-                                void *guest_code)
-{
-       uint64_t ptrs_per_4k_pte = 512;
-       uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2;
-       struct kvm_vm *vm;
-
-       vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
-
-       kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-       vm_vcpu_add_default(vm, vcpuid, guest_code);
-
-       return vm;
-}
-
 void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init)
 {
        struct kvm_vcpu_init default_init = { .target = -1, };
diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c
new file mode 100644 (file)
index 0000000..25bff30
--- /dev/null
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include "guest_modes.h"
+
+struct guest_mode guest_modes[NUM_VM_MODES];
+
+void guest_modes_append_default(void)
+{
+       guest_mode_append(VM_MODE_DEFAULT, true, true);
+
+#ifdef __aarch64__
+       guest_mode_append(VM_MODE_P40V48_64K, true, true);
+       {
+               unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+               if (limit >= 52)
+                       guest_mode_append(VM_MODE_P52V48_64K, true, true);
+               if (limit >= 48) {
+                       guest_mode_append(VM_MODE_P48V48_4K, true, true);
+                       guest_mode_append(VM_MODE_P48V48_64K, true, true);
+               }
+       }
+#endif
+}
+
+void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg)
+{
+       int i;
+
+       for (i = 0; i < NUM_VM_MODES; ++i) {
+               if (!guest_modes[i].enabled)
+                       continue;
+               TEST_ASSERT(guest_modes[i].supported,
+                           "Guest mode ID %d (%s) not supported.",
+                           i, vm_guest_mode_string(i));
+               func(i, arg);
+       }
+}
+
+void guest_modes_help(void)
+{
+       int i;
+
+       printf(" -m: specify the guest mode ID to test\n"
+              "     (default: test all supported modes)\n"
+              "     This option may be used multiple times.\n"
+              "     Guest mode IDs:\n");
+       for (i = 0; i < NUM_VM_MODES; ++i) {
+               printf("         %d:    %s%s\n", i, vm_guest_mode_string(i),
+                      guest_modes[i].supported ? " (supported)" : "");
+       }
+}
+
+void guest_modes_cmdline(const char *arg)
+{
+       static bool mode_selected;
+       unsigned int mode;
+       int i;
+
+       if (!mode_selected) {
+               for (i = 0; i < NUM_VM_MODES; ++i)
+                       guest_modes[i].enabled = false;
+               mode_selected = true;
+       }
+
+       mode = strtoul(optarg, NULL, 10);
+       TEST_ASSERT(mode < NUM_VM_MODES, "Guest mode ID %d too big", mode);
+       guest_modes[mode].enabled = true;
+}
index 126c672..fa5a90e 100644 (file)
@@ -5,6 +5,7 @@
  * Copyright (C) 2018, Google LLC.
  */
 
+#define _GNU_SOURCE /* for program_invocation_name */
 #include "test_util.h"
 #include "kvm_util.h"
 #include "kvm_util_internal.h"
@@ -114,6 +115,16 @@ int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
        return r;
 }
 
+void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
+{
+       struct kvm_enable_cap cap = { 0 };
+
+       cap.cap = KVM_CAP_DIRTY_LOG_RING;
+       cap.args[0] = ring_size;
+       vm_enable_cap(vm, &cap);
+       vm->dirty_ring_size = ring_size;
+}
+
 static void vm_open(struct kvm_vm *vm, int perm)
 {
        vm->kvm_fd = open(KVM_DEV_PATH, perm);
@@ -142,14 +153,7 @@ const char * const vm_guest_mode_string[] = {
 _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
               "Missing new mode strings?");
 
-struct vm_guest_mode_params {
-       unsigned int pa_bits;
-       unsigned int va_bits;
-       unsigned int page_size;
-       unsigned int page_shift;
-};
-
-static const struct vm_guest_mode_params vm_guest_mode_params[] = {
+const struct vm_guest_mode_params vm_guest_mode_params[] = {
        { 52, 48,  0x1000, 12 },
        { 52, 48, 0x10000, 16 },
        { 48, 48,  0x1000, 12 },
@@ -271,6 +275,63 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
        return vm;
 }
 
+struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
+                                   uint64_t extra_mem_pages, uint32_t num_percpu_pages,
+                                   void *guest_code, uint32_t vcpuids[])
+{
+       /* The maximum page table size for a memory region will be when the
+        * smallest pages are used. Considering each page contains x page
+        * table descriptors, the total extra size for page tables (for extra
+        * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
+        * than N/x*2.
+        */
+       uint64_t vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
+       uint64_t extra_pg_pages = (extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
+       uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages;
+       struct kvm_vm *vm;
+       int i;
+
+       TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
+                   "nr_vcpus = %d too large for host, max-vcpus = %d",
+                   nr_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));
+
+       pages = vm_adjust_num_guest_pages(mode, pages);
+       vm = vm_create(mode, pages, O_RDWR);
+
+       kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+
+#ifdef __x86_64__
+       vm_create_irqchip(vm);
+#endif
+
+       for (i = 0; i < nr_vcpus; ++i) {
+               uint32_t vcpuid = vcpuids ? vcpuids[i] : i;
+
+               vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+#ifdef __x86_64__
+               vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
+#endif
+       }
+
+       return vm;
+}
+
+struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
+                                           uint32_t num_percpu_pages, void *guest_code,
+                                           uint32_t vcpuids[])
+{
+       return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, extra_mem_pages,
+                                   num_percpu_pages, guest_code, vcpuids);
+}
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+                                void *guest_code)
+{
+       return vm_create_default_with_vcpus(1, extra_mem_pages, 0, guest_code,
+                                           (uint32_t []){ vcpuid });
+}
+
 /*
  * VM Restart
  *
@@ -328,6 +389,11 @@ void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
                    __func__, strerror(-ret));
 }
 
+uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
+{
+       return ioctl(vm->fd, KVM_RESET_DIRTY_RINGS);
+}
+
 /*
  * Userspace Memory Region Find
  *
@@ -432,10 +498,17 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
  *
  * Removes a vCPU from a VM and frees its resources.
  */
-static void vm_vcpu_rm(struct vcpu *vcpu)
+static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
 {
        int ret;
 
+       if (vcpu->dirty_gfns) {
+               ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
+               TEST_ASSERT(ret == 0, "munmap of VCPU dirty ring failed, "
+                           "rc: %i errno: %i", ret, errno);
+               vcpu->dirty_gfns = NULL;
+       }
+
        ret = munmap(vcpu->state, sizeof(*vcpu->state));
        TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
                "errno: %i", ret, errno);
@@ -453,7 +526,7 @@ void kvm_vm_release(struct kvm_vm *vmp)
        int ret;
 
        list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
-               vm_vcpu_rm(vcpu);
+               vm_vcpu_rm(vmp, vcpu);
 
        ret = close(vmp->fd);
        TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
@@ -1233,6 +1306,15 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
        return rc;
 }
 
+int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid)
+{
+       struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+       TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+       return vcpu->fd;
+}
+
 void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
 {
        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
@@ -1561,6 +1643,42 @@ int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
        return ret;
 }
 
+void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid)
+{
+       struct vcpu *vcpu;
+       uint32_t size = vm->dirty_ring_size;
+
+       TEST_ASSERT(size > 0, "Should enable dirty ring first");
+
+       vcpu = vcpu_find(vm, vcpuid);
+
+       TEST_ASSERT(vcpu, "Cannot find vcpu %u", vcpuid);
+
+       if (!vcpu->dirty_gfns) {
+               void *addr;
+
+               addr = mmap(NULL, size, PROT_READ,
+                           MAP_PRIVATE, vcpu->fd,
+                           vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+               TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private");
+
+               addr = mmap(NULL, size, PROT_READ | PROT_EXEC,
+                           MAP_PRIVATE, vcpu->fd,
+                           vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+               TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
+
+               addr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+                           MAP_SHARED, vcpu->fd,
+                           vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+               TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
+
+               vcpu->dirty_gfns = addr;
+               vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
+       }
+
+       return vcpu->dirty_gfns;
+}
+
 /*
  * VM Ioctl
  *
@@ -1583,6 +1701,32 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
 }
 
 /*
+ * KVM system ioctl
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   cmd - Ioctl number
+ *   arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a KVM fd.
+ */
+void kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+{
+       int ret;
+
+       ret = ioctl(vm->kvm_fd, cmd, arg);
+       TEST_ASSERT(ret == 0, "KVM ioctl %lu failed, rc: %i errno: %i (%s)",
+               cmd, ret, errno, strerror(errno));
+}
+
+int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+{
+       return ioctl(vm->kvm_fd, cmd, arg);
+}
+
+/*
  * VM Dump
  *
  * Input Args:
@@ -1654,6 +1798,9 @@ static struct exit_reason {
        {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
        {KVM_EXIT_OSI, "OSI"},
        {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
+       {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"},
+       {KVM_EXIT_X86_RDMSR, "RDMSR"},
+       {KVM_EXIT_X86_WRMSR, "WRMSR"},
 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT
        {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
 #endif
index f07d383..34465dc 100644 (file)
@@ -28,6 +28,9 @@ struct vcpu {
        uint32_t id;
        int fd;
        struct kvm_run *state;
+       struct kvm_dirty_gfn *dirty_gfns;
+       uint32_t fetch_index;
+       uint32_t dirty_gfns_count;
 };
 
 struct kvm_vm {
@@ -52,6 +55,7 @@ struct kvm_vm {
        vm_vaddr_t tss;
        vm_vaddr_t idt;
        vm_vaddr_t handlers;
+       uint32_t dirty_ring_size;
 };
 
 struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
new file mode 100644 (file)
index 0000000..9be1944
--- /dev/null
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#include "kvm_util.h"
+#include "perf_test_util.h"
+#include "processor.h"
+
+struct perf_test_args perf_test_args;
+
+uint64_t guest_test_phys_mem;
+
+/*
+ * Guest virtual memory offset of the testing memory slot.
+ * Must not conflict with identity mapped test code.
+ */
+static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+
+/*
+ * Continuously write to the first 8 bytes of each page in the
+ * specified region.
+ */
+static void guest_code(uint32_t vcpu_id)
+{
+       struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+       uint64_t gva;
+       uint64_t pages;
+       int i;
+
+       /* Make sure vCPU args data structure is not corrupt. */
+       GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id);
+
+       gva = vcpu_args->gva;
+       pages = vcpu_args->pages;
+
+       while (true) {
+               for (i = 0; i < pages; i++) {
+                       uint64_t addr = gva + (i * perf_test_args.guest_page_size);
+
+                       if (i % perf_test_args.wr_fract == 0)
+                               *(uint64_t *)addr = 0x0123456789ABCDEF;
+                       else
+                               READ_ONCE(*(uint64_t *)addr);
+               }
+
+               GUEST_SYNC(1);
+       }
+}
+
+struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
+                                  uint64_t vcpu_memory_bytes)
+{
+       struct kvm_vm *vm;
+       uint64_t guest_num_pages;
+
+       pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
+       perf_test_args.host_page_size = getpagesize();
+       perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size;
+
+       guest_num_pages = vm_adjust_num_guest_pages(mode,
+                               (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size);
+
+       TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0,
+                   "Guest memory size is not host page size aligned.");
+       TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
+                   "Guest memory size is not guest page size aligned.");
+
+       vm = vm_create_with_vcpus(mode, vcpus,
+                                 (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size,
+                                 0, guest_code, NULL);
+
+       perf_test_args.vm = vm;
+
+       /*
+        * If there should be more memory in the guest test region than there
+        * can be pages in the guest, it will definitely cause problems.
+        */
+       TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
+                   "Requested more guest memory than address space allows.\n"
+                   "    guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
+                   guest_num_pages, vm_get_max_gfn(vm), vcpus,
+                   vcpu_memory_bytes);
+
+       guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
+                             perf_test_args.guest_page_size;
+       guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1);
+#ifdef __s390x__
+       /* Align to 1M (segment size) */
+       guest_test_phys_mem &= ~((1 << 20) - 1);
+#endif
+       pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
+
+       /* Add an extra memory slot for testing */
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   guest_test_phys_mem,
+                                   PERF_TEST_MEM_SLOT_INDEX,
+                                   guest_num_pages, 0);
+
+       /* Do mapping for the demand paging memory slot */
+       virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+
+       ucall_init(vm, NULL);
+
+       return vm;
+}
+
+void perf_test_destroy_vm(struct kvm_vm *vm)
+{
+       ucall_uninit(vm);
+       kvm_vm_free(vm);
+}
+
+void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes)
+{
+       vm_paddr_t vcpu_gpa;
+       struct perf_test_vcpu_args *vcpu_args;
+       int vcpu_id;
+
+       for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+               vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+
+               vcpu_args->vcpu_id = vcpu_id;
+               vcpu_args->gva = guest_test_virt_mem +
+                                (vcpu_id * vcpu_memory_bytes);
+               vcpu_args->pages = vcpu_memory_bytes /
+                                  perf_test_args.guest_page_size;
+
+               vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
+               pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+                        vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
+       }
+}
diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
new file mode 100644 (file)
index 0000000..86b9e61
--- /dev/null
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test handler for the s390x DIAGNOSE 0x0318 instruction.
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define VCPU_ID        6
+
+#define ICPT_INSTRUCTION       0x04
+#define IPA0_DIAG              0x8300
+
+static void guest_code(void)
+{
+       uint64_t diag318_info = 0x12345678;
+
+       asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info));
+}
+
+/*
+ * The DIAGNOSE 0x0318 instruction call must be handled via userspace. As such,
+ * we create an ad-hoc VM here to handle the instruction then extract the
+ * necessary data. It is up to the caller to decide what to do with that data.
+ */
+static uint64_t diag318_handler(void)
+{
+       struct kvm_vm *vm;
+       struct kvm_run *run;
+       uint64_t reg;
+       uint64_t diag318_info;
+
+       vm = vm_create_default(VCPU_ID, 0, guest_code);
+       vcpu_run(vm, VCPU_ID);
+       run = vcpu_state(vm, VCPU_ID);
+
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
+                   "DIAGNOSE 0x0318 instruction was not intercepted");
+       TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION,
+                   "Unexpected intercept code: 0x%x", run->s390_sieic.icptcode);
+       TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG,
+                   "Unexpected IPA0 code: 0x%x", (run->s390_sieic.ipa & 0xff00));
+
+       reg = (run->s390_sieic.ipa & 0x00f0) >> 4;
+       diag318_info = run->s.regs.gprs[reg];
+
+       TEST_ASSERT(diag318_info != 0, "DIAGNOSE 0x0318 info not set");
+
+       kvm_vm_free(vm);
+
+       return diag318_info;
+}
+
+uint64_t get_diag318_info(void)
+{
+       static uint64_t diag318_info;
+       static bool printed_skip;
+
+       /*
+        * If KVM does not support diag318, then return 0 to
+        * ensure tests do not break.
+        */
+       if (!kvm_check_cap(KVM_CAP_S390_DIAG318)) {
+               if (!printed_skip) {
+                       fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. "
+                               "Skipping diag318 test.\n");
+                       printed_skip = true;
+               }
+               return 0;
+       }
+
+       /*
+        * If a test has previously requested the diag318 info,
+        * then don't bother spinning up a temporary VM again.
+        */
+       if (!diag318_info)
+               diag318_info = diag318_handler();
+
+       return diag318_info;
+}
index 7349bb2..0152f35 100644 (file)
@@ -5,8 +5,6 @@
  * Copyright (C) 2019, Red Hat, Inc.
  */
 
-#define _GNU_SOURCE /* for program_invocation_name */
-
 #include "processor.h"
 #include "kvm_util.h"
 #include "../kvm_util_internal.h"
@@ -160,26 +158,6 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
        virt_dump_region(stream, vm, indent, vm->pgd);
 }
 
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
-                                void *guest_code)
-{
-       /*
-        * The additional amount of pages required for the page tables is:
-        * 1 * n / 256 + 4 * (n / 256) / 2048 + 4 * (n / 256) / 2048^2 + ...
-        * which is definitely smaller than (n / 256) * 2.
-        */
-       uint64_t extra_pg_pages = extra_mem_pages / 256 * 2;
-       struct kvm_vm *vm;
-
-       vm = vm_create(VM_MODE_DEFAULT,
-                      DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
-
-       kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-       vm_vcpu_add_default(vm, vcpuid, guest_code);
-
-       return vm;
-}
-
 void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 {
        size_t stack_size =  DEFAULT_STACK_PGS * getpagesize();
index d10c5c0..95e1a75 100644 (file)
@@ -5,8 +5,6 @@
  * Copyright (C) 2018, Google LLC.
  */
 
-#define _GNU_SOURCE /* for program_invocation_name */
-
 #include "test_util.h"
 #include "kvm_util.h"
 #include "../kvm_util_internal.h"
@@ -731,36 +729,6 @@ void vcpu_set_cpuid(struct kvm_vm *vm,
 
 }
 
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
-                                void *guest_code)
-{
-       struct kvm_vm *vm;
-       /*
-        * For x86 the maximum page table size for a memory region
-        * will be when only 4K pages are used.  In that case the
-        * total extra size for page tables (for extra N pages) will
-        * be: N/512+N/512^2+N/512^3+... which is definitely smaller
-        * than N/512*2.
-        */
-       uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
-
-       /* Create VM */
-       vm = vm_create(VM_MODE_DEFAULT,
-                      DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
-                      O_RDWR);
-
-       /* Setup guest code */
-       kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-
-       /* Setup IRQ Chip */
-       vm_create_irqchip(vm);
-
-       /* Add the first vCPU. */
-       vm_vcpu_add_default(vm, vcpuid, guest_code);
-
-       return vm;
-}
-
 /*
  * VCPU Get MSR
  *
index 5731ccf..caf7b88 100644 (file)
@@ -20,6 +20,7 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
+#include "diag318_test_handler.h"
 
 #define VCPU_ID 5
 
@@ -70,7 +71,7 @@ static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
 
 #undef REG_COMPARE
 
-#define TEST_SYNC_FIELDS   (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS)
+#define TEST_SYNC_FIELDS   (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318)
 #define INVALID_SYNC_FIELD 0x80000000
 
 int main(int argc, char *argv[])
@@ -152,6 +153,12 @@ int main(int argc, char *argv[])
 
        run->kvm_valid_regs = TEST_SYNC_FIELDS;
        run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS;
+
+       if (get_diag318_info() > 0) {
+               run->s.regs.diag318 = get_diag318_info();
+               run->kvm_dirty_regs |= KVM_SYNC_DIAG318;
+       }
+
        rv = _vcpu_run(vm, VCPU_ID);
        TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
        TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
@@ -164,6 +171,9 @@ int main(int argc, char *argv[])
        TEST_ASSERT(run->s.regs.acrs[0]  == 1 << 11,
                    "acr0 sync regs value incorrect 0x%x.",
                    run->s.regs.acrs[0]);
+       TEST_ASSERT(run->s.regs.diag318 == get_diag318_info(),
+                   "diag318 sync regs value incorrect 0x%llx.",
+                   run->s.regs.diag318);
 
        vcpu_regs_get(vm, VCPU_ID, &regs);
        compare_regs(&regs, &run->s.regs);
@@ -177,6 +187,7 @@ int main(int argc, char *argv[])
        run->kvm_valid_regs = TEST_SYNC_FIELDS;
        run->kvm_dirty_regs = 0;
        run->s.regs.gprs[11] = 0xDEADBEEF;
+       run->s.regs.diag318 = 0x4B1D;
        rv = _vcpu_run(vm, VCPU_ID);
        TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
        TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
@@ -186,6 +197,9 @@ int main(int argc, char *argv[])
        TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
                    "r11 sync regs value incorrect 0x%llx.",
                    run->s.regs.gprs[11]);
+       TEST_ASSERT(run->s.regs.diag318 != 0x4B1D,
+                   "diag318 sync regs value incorrect 0x%llx.",
+                   run->s.regs.diag318);
 
        kvm_vm_free(vm);
 
index b3ece55..f127ed3 100644 (file)
@@ -121,8 +121,6 @@ static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
 
        vm = vm_create_default(VCPU_ID, 0, guest_code);
 
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
        vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
                                    MEM_REGION_GPA, MEM_REGION_SLOT,
                                    MEM_REGION_SIZE / getpagesize(), 0);
@@ -156,14 +154,23 @@ static void guest_code_move_memory_region(void)
        GUEST_SYNC(0);
 
        /*
-        * Spin until the memory region is moved to a misaligned address.  This
-        * may or may not trigger MMIO, as the window where the memslot is
-        * invalid is quite small.
+        * Spin until the memory region starts getting moved to a
+        * misaligned address.
+        * Every region move may or may not trigger MMIO, as the
+        * window where the memslot is invalid is usually quite small.
         */
        val = guest_spin_on_val(0);
        GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
 
-       /* Spin until the memory region is realigned. */
+       /* Spin until the misaligning memory region move completes. */
+       val = guest_spin_on_val(MMIO_VAL);
+       GUEST_ASSERT_1(val == 1 || val == 0, val);
+
+       /* Spin until the memory region starts to get re-aligned. */
+       val = guest_spin_on_val(0);
+       GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+
+       /* Spin until the re-aligning memory region move completes. */
        val = guest_spin_on_val(MMIO_VAL);
        GUEST_ASSERT_1(val == 1, val);
 
index 140e919..f40fd09 100644 (file)
@@ -81,7 +81,6 @@ int main(int argc, char *argv[])
 
        /* Create VM */
        vm = vm_create_default(VCPU_ID, 0, guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
        run = vcpu_state(vm, VCPU_ID);
 
        while (1) {
index 2fc6b3a..6097a82 100644 (file)
@@ -85,7 +85,6 @@ int main(void)
        }
 
        vm = vm_create_default(VCPU_ID, 0, guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
        run = vcpu_state(vm, VCPU_ID);
 
        /* Test software BPs - int3 */
index 7579281..37b8a78 100644 (file)
@@ -92,8 +92,6 @@ int main(int argc, char *argv[])
        /* Create VM */
        vm = vm_create_default(VCPU_ID, 0, guest_code);
 
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
        if (!nested_vmx_supported() ||
            !kvm_check_cap(KVM_CAP_NESTED_STATE) ||
            !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
index 745b708..88a595b 100644 (file)
@@ -46,19 +46,19 @@ static bool smt_possible(void)
 }
 
 static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
-                         bool evmcs_enabled)
+                         bool evmcs_expected)
 {
        int i;
        int nent = 9;
        u32 test_val;
 
-       if (evmcs_enabled)
+       if (evmcs_expected)
                nent += 1; /* 0x4000000A */
 
        TEST_ASSERT(hv_cpuid_entries->nent == nent,
                    "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
                    " with evmcs=%d (returned %d)",
-                   nent, evmcs_enabled, hv_cpuid_entries->nent);
+                   nent, evmcs_expected, hv_cpuid_entries->nent);
 
        for (i = 0; i < hv_cpuid_entries->nent; i++) {
                struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
@@ -68,7 +68,7 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
                            "function %x is our of supported range",
                            entry->function);
 
-               TEST_ASSERT(evmcs_enabled || (entry->function != 0x4000000A),
+               TEST_ASSERT(evmcs_expected || (entry->function != 0x4000000A),
                            "0x4000000A leaf should not be reported");
 
                TEST_ASSERT(entry->index == 0,
@@ -87,7 +87,7 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
                        TEST_ASSERT(entry->eax == test_val,
                                    "Wrong max leaf report in 0x40000000.EAX: %x"
                                    " (evmcs=%d)",
-                                   entry->eax, evmcs_enabled
+                                   entry->eax, evmcs_expected
                                );
                        break;
                case 0x40000004:
@@ -110,20 +110,23 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
 
 }
 
-void test_hv_cpuid_e2big(struct kvm_vm *vm)
+void test_hv_cpuid_e2big(struct kvm_vm *vm, bool system)
 {
        static struct kvm_cpuid2 cpuid = {.nent = 0};
        int ret;
 
-       ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+       if (!system)
+               ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+       else
+               ret = _kvm_ioctl(vm, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
 
        TEST_ASSERT(ret == -1 && errno == E2BIG,
-                   "KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
-                   " it should have: %d %d", ret, errno);
+                   "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
+                   " it should have: %d %d", system ? "KVM" : "vCPU", ret, errno);
 }
 
 
-struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
+struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm, bool system)
 {
        int nent = 20; /* should be enough */
        static struct kvm_cpuid2 *cpuid;
@@ -137,7 +140,10 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
 
        cpuid->nent = nent;
 
-       vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+       if (!system)
+               vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+       else
+               kvm_ioctl(vm, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
 
        return cpuid;
 }
@@ -146,45 +152,50 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
 int main(int argc, char *argv[])
 {
        struct kvm_vm *vm;
-       int rv, stage;
        struct kvm_cpuid2 *hv_cpuid_entries;
-       bool evmcs_enabled;
 
        /* Tell stdout not to buffer its content */
        setbuf(stdout, NULL);
 
-       rv = kvm_check_cap(KVM_CAP_HYPERV_CPUID);
-       if (!rv) {
+       if (!kvm_check_cap(KVM_CAP_HYPERV_CPUID)) {
                print_skip("KVM_CAP_HYPERV_CPUID not supported");
                exit(KSFT_SKIP);
        }
 
-       for (stage = 0; stage < 3; stage++) {
-               evmcs_enabled = false;
+       vm = vm_create_default(VCPU_ID, 0, guest_code);
 
-               vm = vm_create_default(VCPU_ID, 0, guest_code);
-               switch (stage) {
-               case 0:
-                       test_hv_cpuid_e2big(vm);
-                       continue;
-               case 1:
-                       break;
-               case 2:
-                       if (!nested_vmx_supported() ||
-                           !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
-                               print_skip("Enlightened VMCS is unsupported");
-                               continue;
-                       }
-                       vcpu_enable_evmcs(vm, VCPU_ID);
-                       evmcs_enabled = true;
-                       break;
-               }
+       /* Test vCPU ioctl version */
+       test_hv_cpuid_e2big(vm, false);
+
+       hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, false);
+       test_hv_cpuid(hv_cpuid_entries, false);
+       free(hv_cpuid_entries);
 
-               hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
-               test_hv_cpuid(hv_cpuid_entries, evmcs_enabled);
-               free(hv_cpuid_entries);
-               kvm_vm_free(vm);
+       if (!nested_vmx_supported() ||
+           !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+               print_skip("Enlightened VMCS is unsupported");
+               goto do_sys;
        }
+       vcpu_enable_evmcs(vm, VCPU_ID);
+       hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, false);
+       test_hv_cpuid(hv_cpuid_entries, true);
+       free(hv_cpuid_entries);
+
+do_sys:
+       /* Test system ioctl version */
+       if (!kvm_check_cap(KVM_CAP_SYS_HYPERV_CPUID)) {
+               print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported");
+               goto out;
+       }
+
+       test_hv_cpuid_e2big(vm, true);
+
+       hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, true);
+       test_hv_cpuid(hv_cpuid_entries, nested_vmx_supported());
+       free(hv_cpuid_entries);
+
+out:
+       kvm_vm_free(vm);
 
        return 0;
 }
index b10a274..732b244 100644 (file)
@@ -211,8 +211,8 @@ int main(void)
        struct kvm_vm *vm;
 
        if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) {
-               pr_info("will skip kvm paravirt restriction tests.\n");
-               return 0;
+               print_skip("KVM_CAP_ENFORCE_PV_FEATURE_CPUID not supported");
+               exit(KSFT_SKIP);
        }
 
        vm = vm_create_default(VCPU_ID, 0, guest_main);
index 9f76561..318be0b 100644 (file)
 
 #define VCPU_ID                  5
 
+static void test_cr4_feature_bit(struct kvm_vm *vm, struct kvm_sregs *orig,
+                                uint64_t feature_bit)
+{
+       struct kvm_sregs sregs;
+       int rc;
+
+       /* Skip the sub-test, the feature is supported. */
+       if (orig->cr4 & feature_bit)
+               return;
+
+       memcpy(&sregs, orig, sizeof(sregs));
+       sregs.cr4 |= feature_bit;
+
+       rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+       TEST_ASSERT(rc, "KVM allowed unsupported CR4 bit (0x%lx)", feature_bit);
+
+       /* Sanity check that KVM didn't change anything. */
+       vcpu_sregs_get(vm, VCPU_ID, &sregs);
+       TEST_ASSERT(!memcmp(&sregs, orig, sizeof(sregs)), "KVM modified sregs");
+}
+
+static uint64_t calc_cr4_feature_bits(struct kvm_vm *vm)
+{
+       struct kvm_cpuid_entry2 *cpuid_1, *cpuid_7;
+       uint64_t cr4;
+
+       cpuid_1 = kvm_get_supported_cpuid_entry(1);
+       cpuid_7 = kvm_get_supported_cpuid_entry(7);
+
+       cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
+             X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
+             X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
+       if (cpuid_7->ecx & CPUID_UMIP)
+               cr4 |= X86_CR4_UMIP;
+       if (cpuid_7->ecx & CPUID_LA57)
+               cr4 |= X86_CR4_LA57;
+       if (cpuid_1->ecx & CPUID_VMX)
+               cr4 |= X86_CR4_VMXE;
+       if (cpuid_1->ecx & CPUID_SMX)
+               cr4 |= X86_CR4_SMXE;
+       if (cpuid_7->ebx & CPUID_FSGSBASE)
+               cr4 |= X86_CR4_FSGSBASE;
+       if (cpuid_1->ecx & CPUID_PCID)
+               cr4 |= X86_CR4_PCIDE;
+       if (cpuid_1->ecx & CPUID_XSAVE)
+               cr4 |= X86_CR4_OSXSAVE;
+       if (cpuid_7->ebx & CPUID_SMEP)
+               cr4 |= X86_CR4_SMEP;
+       if (cpuid_7->ebx & CPUID_SMAP)
+               cr4 |= X86_CR4_SMAP;
+       if (cpuid_7->ecx & CPUID_PKU)
+               cr4 |= X86_CR4_PKE;
+
+       return cr4;
+}
+
 int main(int argc, char *argv[])
 {
        struct kvm_sregs sregs;
        struct kvm_vm *vm;
+       uint64_t cr4;
        int rc;
 
        /* Tell stdout not to buffer its content */
        setbuf(stdout, NULL);
 
-       /* Create VM */
+       /*
+        * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
+        * use it to verify all supported CR4 bits can be set prior to defining
+        * the vCPU model, i.e. without doing KVM_SET_CPUID2.
+        */
+       vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+       vm_vcpu_add(vm, VCPU_ID);
+
+       vcpu_sregs_get(vm, VCPU_ID, &sregs);
+
+       sregs.cr4 |= calc_cr4_feature_bits(vm);
+       cr4 = sregs.cr4;
+
+       rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+       TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
+
+       vcpu_sregs_get(vm, VCPU_ID, &sregs);
+       TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
+                   sregs.cr4, cr4);
+
+       /* Verify all unsupported features are rejected by KVM. */
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_UMIP);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_LA57);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_VMXE);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_SMXE);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_FSGSBASE);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_PCIDE);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_OSXSAVE);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_SMEP);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_SMAP);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_PKE);
+       kvm_vm_free(vm);
+
+       /* Create a "real" VM and verify APIC_BASE can be set. */
        vm = vm_create_default(VCPU_ID, 0, NULL);
 
        vcpu_sregs_get(vm, VCPU_ID, &sregs);
index ae39a22..613c42c 100644 (file)
@@ -102,8 +102,6 @@ int main(int argc, char *argv[])
        /* Create VM */
        vm = vm_create_default(VCPU_ID, 0, guest_code);
 
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
        run = vcpu_state(vm, VCPU_ID);
 
        vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
index f6c8b90..32854c1 100644 (file)
@@ -165,7 +165,6 @@ int main(int argc, char *argv[])
 
        /* Create VM */
        vm = vm_create_default(VCPU_ID, 0, guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
        run = vcpu_state(vm, VCPU_ID);
 
        vcpu_regs_get(vm, VCPU_ID, &regs1);
index 0e1adb4..be2ca15 100644 (file)
@@ -44,7 +44,6 @@ int main(int argc, char *argv[])
        nested_svm_check_supported();
 
        vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
        vcpu_alloc_svm(vm, &svm_gva);
        vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
index f8e7611..e357d8e 100644 (file)
@@ -107,7 +107,6 @@ int main(void)
        uint64_t val;
 
        vm = vm_create_default(VCPU_ID, 0, guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
        val = 0;
        ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
diff --git a/tools/testing/selftests/kvm/x86_64/user_msr_test.c b/tools/testing/selftests/kvm/x86_64/user_msr_test.c
deleted file mode 100644 (file)
index cbe1b08..0000000
+++ /dev/null
@@ -1,248 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * tests for KVM_CAP_X86_USER_SPACE_MSR and KVM_X86_SET_MSR_FILTER
- *
- * Copyright (C) 2020, Amazon Inc.
- *
- * This is a functional test to verify that we can deflect MSR events
- * into user space.
- */
-#define _GNU_SOURCE /* for program_invocation_short_name */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-#define VCPU_ID                  5
-
-static u32 msr_reads, msr_writes;
-
-static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_deadbeef[1] = { 0x1 };
-
-static void deny_msr(uint8_t *bitmap, u32 msr)
-{
-       u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
-
-       bitmap[idx / 8] &= ~(1 << (idx % 8));
-}
-
-static void prepare_bitmaps(void)
-{
-       memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
-       memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
-       memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
-       memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
-       memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
-
-       deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
-       deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
-       deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
-}
-
-struct kvm_msr_filter filter = {
-       .flags = KVM_MSR_FILTER_DEFAULT_DENY,
-       .ranges = {
-               {
-                       .flags = KVM_MSR_FILTER_READ,
-                       .base = 0x00000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_00000000,
-               }, {
-                       .flags = KVM_MSR_FILTER_WRITE,
-                       .base = 0x00000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_00000000_write,
-               }, {
-                       .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
-                       .base = 0x40000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_40000000,
-               }, {
-                       .flags = KVM_MSR_FILTER_READ,
-                       .base = 0xc0000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_c0000000_read,
-               }, {
-                       .flags = KVM_MSR_FILTER_WRITE,
-                       .base = 0xc0000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_c0000000,
-               }, {
-                       .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
-                       .base = 0xdeadbeef,
-                       .nmsrs = 1,
-                       .bitmap = bitmap_deadbeef,
-               },
-       },
-};
-
-struct kvm_msr_filter no_filter = {
-       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
-};
-
-static void guest_msr_calls(bool trapped)
-{
-       /* This goes into the in-kernel emulation */
-       wrmsr(MSR_SYSCALL_MASK, 0);
-
-       if (trapped) {
-               /* This goes into user space emulation */
-               GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
-               GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
-       } else {
-               GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
-               GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
-       }
-
-       /* If trapped == true, this goes into user space emulation */
-       wrmsr(MSR_IA32_POWER_CTL, 0x1234);
-
-       /* This goes into the in-kernel emulation */
-       rdmsr(MSR_IA32_POWER_CTL);
-
-       /* Invalid MSR, should always be handled by user space exit */
-       GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
-       wrmsr(0xdeadbeef, 0x1234);
-}
-
-static void guest_code(void)
-{
-       guest_msr_calls(true);
-
-       /*
-        * Disable msr filtering, so that the kernel
-        * handles everything in the next round
-        */
-       GUEST_SYNC(0);
-
-       guest_msr_calls(false);
-
-       GUEST_DONE();
-}
-
-static int handle_ucall(struct kvm_vm *vm)
-{
-       struct ucall uc;
-
-       switch (get_ucall(vm, VCPU_ID, &uc)) {
-       case UCALL_ABORT:
-               TEST_FAIL("Guest assertion not met");
-               break;
-       case UCALL_SYNC:
-               vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter);
-               break;
-       case UCALL_DONE:
-               return 1;
-       default:
-               TEST_FAIL("Unknown ucall %lu", uc.cmd);
-       }
-
-       return 0;
-}
-
-static void handle_rdmsr(struct kvm_run *run)
-{
-       run->msr.data = run->msr.index;
-       msr_reads++;
-
-       if (run->msr.index == MSR_SYSCALL_MASK ||
-           run->msr.index == MSR_GS_BASE) {
-               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
-                           "MSR read trap w/o access fault");
-       }
-
-       if (run->msr.index == 0xdeadbeef) {
-               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
-                           "MSR deadbeef read trap w/o inval fault");
-       }
-}
-
-static void handle_wrmsr(struct kvm_run *run)
-{
-       /* ignore */
-       msr_writes++;
-
-       if (run->msr.index == MSR_IA32_POWER_CTL) {
-               TEST_ASSERT(run->msr.data == 0x1234,
-                           "MSR data for MSR_IA32_POWER_CTL incorrect");
-               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
-                           "MSR_IA32_POWER_CTL trap w/o access fault");
-       }
-
-       if (run->msr.index == 0xdeadbeef) {
-               TEST_ASSERT(run->msr.data == 0x1234,
-                           "MSR data for deadbeef incorrect");
-               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
-                           "deadbeef trap w/o inval fault");
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_enable_cap cap = {
-               .cap = KVM_CAP_X86_USER_SPACE_MSR,
-               .args[0] = KVM_MSR_EXIT_REASON_INVAL |
-                          KVM_MSR_EXIT_REASON_UNKNOWN |
-                          KVM_MSR_EXIT_REASON_FILTER,
-       };
-       struct kvm_vm *vm;
-       struct kvm_run *run;
-       int rc;
-
-       /* Tell stdout not to buffer its content */
-       setbuf(stdout, NULL);
-
-       /* Create VM */
-       vm = vm_create_default(VCPU_ID, 0, guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-       run = vcpu_state(vm, VCPU_ID);
-
-       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
-       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
-       vm_enable_cap(vm, &cap);
-
-       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
-       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
-
-       prepare_bitmaps();
-       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter);
-
-       while (1) {
-               rc = _vcpu_run(vm, VCPU_ID);
-
-               TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
-
-               switch (run->exit_reason) {
-               case KVM_EXIT_X86_RDMSR:
-                       handle_rdmsr(run);
-                       break;
-               case KVM_EXIT_X86_WRMSR:
-                       handle_wrmsr(run);
-                       break;
-               case KVM_EXIT_IO:
-                       if (handle_ucall(vm))
-                               goto done;
-                       break;
-               }
-
-       }
-
-done:
-       TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
-       TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
-
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
new file mode 100644 (file)
index 0000000..72c0d07
--- /dev/null
@@ -0,0 +1,770 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for exiting into userspace on registered MSRs
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+/* Forced emulation prefix, used to invoke the emulator unconditionally. */
+#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
+#define KVM_FEP_LENGTH 5
+static int fep_available = 1;
+
+#define VCPU_ID              1
+#define MSR_NON_EXISTENT 0x474f4f00
+
+static u64 deny_bits = 0;
+struct kvm_msr_filter filter_allow = {
+       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+       .ranges = {
+               {
+                       .flags = KVM_MSR_FILTER_READ |
+                                KVM_MSR_FILTER_WRITE,
+                       .nmsrs = 1,
+                       /* Test an MSR the kernel knows about. */
+                       .base = MSR_IA32_XSS,
+                       .bitmap = (uint8_t*)&deny_bits,
+               }, {
+                       .flags = KVM_MSR_FILTER_READ |
+                                KVM_MSR_FILTER_WRITE,
+                       .nmsrs = 1,
+                       /* Test an MSR the kernel doesn't know about. */
+                       .base = MSR_IA32_FLUSH_CMD,
+                       .bitmap = (uint8_t*)&deny_bits,
+               }, {
+                       .flags = KVM_MSR_FILTER_READ |
+                                KVM_MSR_FILTER_WRITE,
+                       .nmsrs = 1,
+                       /* Test a fabricated MSR that no one knows about. */
+                       .base = MSR_NON_EXISTENT,
+                       .bitmap = (uint8_t*)&deny_bits,
+               },
+       },
+};
+
+struct kvm_msr_filter filter_fs = {
+       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+       .ranges = {
+               {
+                       .flags = KVM_MSR_FILTER_READ,
+                       .nmsrs = 1,
+                       .base = MSR_FS_BASE,
+                       .bitmap = (uint8_t*)&deny_bits,
+               },
+       },
+};
+
+struct kvm_msr_filter filter_gs = {
+       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+       .ranges = {
+               {
+                       .flags = KVM_MSR_FILTER_READ,
+                       .nmsrs = 1,
+                       .base = MSR_GS_BASE,
+                       .bitmap = (uint8_t*)&deny_bits,
+               },
+       },
+};
+
+static uint64_t msr_non_existent_data;
+static int guest_exception_count;
+static u32 msr_reads, msr_writes;
+
+static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_deadbeef[1] = { 0x1 };
+
+static void deny_msr(uint8_t *bitmap, u32 msr)
+{
+       u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
+
+       bitmap[idx / 8] &= ~(1 << (idx % 8));
+}
+
+static void prepare_bitmaps(void)
+{
+       memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
+       memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
+       memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
+       memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
+       memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
+
+       deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
+       deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
+       deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
+}
+
+struct kvm_msr_filter filter_deny = {
+       .flags = KVM_MSR_FILTER_DEFAULT_DENY,
+       .ranges = {
+               {
+                       .flags = KVM_MSR_FILTER_READ,
+                       .base = 0x00000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_00000000,
+               }, {
+                       .flags = KVM_MSR_FILTER_WRITE,
+                       .base = 0x00000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_00000000_write,
+               }, {
+                       .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
+                       .base = 0x40000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_40000000,
+               }, {
+                       .flags = KVM_MSR_FILTER_READ,
+                       .base = 0xc0000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_c0000000_read,
+               }, {
+                       .flags = KVM_MSR_FILTER_WRITE,
+                       .base = 0xc0000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_c0000000,
+               }, {
+                       .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
+                       .base = 0xdeadbeef,
+                       .nmsrs = 1,
+                       .bitmap = bitmap_deadbeef,
+               },
+       },
+};
+
+struct kvm_msr_filter no_filter_deny = {
+       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+};
+
+/*
+ * Note: Force test_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_rdmsr(uint32_t msr)
+{
+       uint32_t a, d;
+
+       guest_exception_count = 0;
+
+       __asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" :
+                       "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+       return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_wrmsr(uint32_t msr, uint64_t value)
+{
+       uint32_t a = value;
+       uint32_t d = value >> 32;
+
+       guest_exception_count = 0;
+
+       __asm__ __volatile__("wrmsr_start: wrmsr; wrmsr_end:" ::
+                       "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char rdmsr_start, rdmsr_end;
+extern char wrmsr_start, wrmsr_end;
+
+/*
+ * Note: Force test_em_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_em_rdmsr(uint32_t msr)
+{
+       uint32_t a, d;
+
+       guest_exception_count = 0;
+
+       __asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" :
+                       "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+       return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_em_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_em_wrmsr(uint32_t msr, uint64_t value)
+{
+       uint32_t a = value;
+       uint32_t d = value >> 32;
+
+       guest_exception_count = 0;
+
+       __asm__ __volatile__(KVM_FEP "em_wrmsr_start: wrmsr; em_wrmsr_end:" ::
+                       "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char em_rdmsr_start, em_rdmsr_end;
+extern char em_wrmsr_start, em_wrmsr_end;
+
+static void guest_code_filter_allow(void)
+{
+       uint64_t data;
+
+       /*
+        * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS.
+        *
+        * A GP is thrown if anything other than 0 is written to
+        * MSR_IA32_XSS.
+        */
+       data = test_rdmsr(MSR_IA32_XSS);
+       GUEST_ASSERT(data == 0);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       test_wrmsr(MSR_IA32_XSS, 0);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       test_wrmsr(MSR_IA32_XSS, 1);
+       GUEST_ASSERT(guest_exception_count == 1);
+
+       /*
+        * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_FLUSH_CMD.
+        *
+        * A GP is thrown if MSR_IA32_FLUSH_CMD is read
+        * from or if a value other than 1 is written to it.
+        */
+       test_rdmsr(MSR_IA32_FLUSH_CMD);
+       GUEST_ASSERT(guest_exception_count == 1);
+
+       test_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+       GUEST_ASSERT(guest_exception_count == 1);
+
+       test_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       /*
+        * Test userspace intercepting rdmsr / wrmsr for MSR_NON_EXISTENT.
+        *
+        * Test that a fabricated MSR can pass through the kernel
+        * and be handled in userspace.
+        */
+       test_wrmsr(MSR_NON_EXISTENT, 2);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       data = test_rdmsr(MSR_NON_EXISTENT);
+       GUEST_ASSERT(data == 2);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       /*
+        * Test to see if the instruction emulator is available (ie: the module
+        * parameter 'kvm.force_emulation_prefix=1' is set).  This instruction
+        * will #UD if it isn't available.
+        */
+       __asm__ __volatile__(KVM_FEP "nop");
+
+       if (fep_available) {
+               /* Let userspace know we aren't done. */
+               GUEST_SYNC(0);
+
+               /*
+                * Now run the same tests with the instruction emulator.
+                */
+               data = test_em_rdmsr(MSR_IA32_XSS);
+               GUEST_ASSERT(data == 0);
+               GUEST_ASSERT(guest_exception_count == 0);
+               test_em_wrmsr(MSR_IA32_XSS, 0);
+               GUEST_ASSERT(guest_exception_count == 0);
+               test_em_wrmsr(MSR_IA32_XSS, 1);
+               GUEST_ASSERT(guest_exception_count == 1);
+
+               test_em_rdmsr(MSR_IA32_FLUSH_CMD);
+               GUEST_ASSERT(guest_exception_count == 1);
+               test_em_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+               GUEST_ASSERT(guest_exception_count == 1);
+               test_em_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+               GUEST_ASSERT(guest_exception_count == 0);
+
+               test_em_wrmsr(MSR_NON_EXISTENT, 2);
+               GUEST_ASSERT(guest_exception_count == 0);
+               data = test_em_rdmsr(MSR_NON_EXISTENT);
+               GUEST_ASSERT(data == 2);
+               GUEST_ASSERT(guest_exception_count == 0);
+       }
+
+       GUEST_DONE();
+}
+
+static void guest_msr_calls(bool trapped)
+{
+       /* This goes into the in-kernel emulation */
+       wrmsr(MSR_SYSCALL_MASK, 0);
+
+       if (trapped) {
+               /* This goes into user space emulation */
+               GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
+               GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
+       } else {
+               GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
+               GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
+       }
+
+       /* If trapped == true, this goes into user space emulation */
+       wrmsr(MSR_IA32_POWER_CTL, 0x1234);
+
+       /* This goes into the in-kernel emulation */
+       rdmsr(MSR_IA32_POWER_CTL);
+
+       /* Invalid MSR, should always be handled by user space exit */
+       GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
+       wrmsr(0xdeadbeef, 0x1234);
+}
+
+static void guest_code_filter_deny(void)
+{
+       guest_msr_calls(true);
+
+       /*
+        * Disable msr filtering, so that the kernel
+        * handles everything in the next round
+        */
+       GUEST_SYNC(0);
+
+       guest_msr_calls(false);
+
+       GUEST_DONE();
+}
+
+static void guest_code_permission_bitmap(void)
+{
+       uint64_t data;
+
+       data = test_rdmsr(MSR_FS_BASE);
+       GUEST_ASSERT(data == MSR_FS_BASE);
+       data = test_rdmsr(MSR_GS_BASE);
+       GUEST_ASSERT(data != MSR_GS_BASE);
+
+       /* Let userspace know to switch the filter */
+       GUEST_SYNC(0);
+
+       data = test_rdmsr(MSR_FS_BASE);
+       GUEST_ASSERT(data != MSR_FS_BASE);
+       data = test_rdmsr(MSR_GS_BASE);
+       GUEST_ASSERT(data == MSR_GS_BASE);
+
+       GUEST_DONE();
+}
+
+static void __guest_gp_handler(struct ex_regs *regs,
+                              char *r_start, char *r_end,
+                              char *w_start, char *w_end)
+{
+       if (regs->rip == (uintptr_t)r_start) {
+               regs->rip = (uintptr_t)r_end;
+               regs->rax = 0;
+               regs->rdx = 0;
+       } else if (regs->rip == (uintptr_t)w_start) {
+               regs->rip = (uintptr_t)w_end;
+       } else {
+               GUEST_ASSERT(!"RIP is at an unknown location!");
+       }
+
+       ++guest_exception_count;
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+       __guest_gp_handler(regs, &rdmsr_start, &rdmsr_end,
+                          &wrmsr_start, &wrmsr_end);
+}
+
+static void guest_fep_gp_handler(struct ex_regs *regs)
+{
+       __guest_gp_handler(regs, &em_rdmsr_start, &em_rdmsr_end,
+                          &em_wrmsr_start, &em_wrmsr_end);
+}
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+       fep_available = 0;
+       regs->rip += KVM_FEP_LENGTH;
+}
+
+static void run_guest(struct kvm_vm *vm)
+{
+       int rc;
+
+       rc = _vcpu_run(vm, VCPU_ID);
+       TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+}
+
+static void check_for_guest_assert(struct kvm_vm *vm)
+{
+       struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+       struct ucall uc;
+
+       if (run->exit_reason == KVM_EXIT_IO &&
+               get_ucall(vm, VCPU_ID, &uc) == UCALL_ABORT) {
+                       TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+                               __FILE__, uc.args[1]);
+       }
+}
+
+static void process_rdmsr(struct kvm_vm *vm, uint32_t msr_index)
+{
+       struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+       check_for_guest_assert(vm);
+
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_RDMSR,
+                   "Unexpected exit reason: %u (%s),\n",
+                   run->exit_reason,
+                   exit_reason_str(run->exit_reason));
+       TEST_ASSERT(run->msr.index == msr_index,
+                       "Unexpected msr (0x%04x), expected 0x%04x",
+                       run->msr.index, msr_index);
+
+       switch (run->msr.index) {
+       case MSR_IA32_XSS:
+               run->msr.data = 0;
+               break;
+       case MSR_IA32_FLUSH_CMD:
+               run->msr.error = 1;
+               break;
+       case MSR_NON_EXISTENT:
+               run->msr.data = msr_non_existent_data;
+               break;
+       case MSR_FS_BASE:
+               run->msr.data = MSR_FS_BASE;
+               break;
+       case MSR_GS_BASE:
+               run->msr.data = MSR_GS_BASE;
+               break;
+       default:
+               TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+       }
+}
+
+static void process_wrmsr(struct kvm_vm *vm, uint32_t msr_index)
+{
+       struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+       check_for_guest_assert(vm);
+
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_WRMSR,
+                   "Unexpected exit reason: %u (%s),\n",
+                   run->exit_reason,
+                   exit_reason_str(run->exit_reason));
+       TEST_ASSERT(run->msr.index == msr_index,
+                       "Unexpected msr (0x%04x), expected 0x%04x",
+                       run->msr.index, msr_index);
+
+       switch (run->msr.index) {
+       case MSR_IA32_XSS:
+               if (run->msr.data != 0)
+                       run->msr.error = 1;
+               break;
+       case MSR_IA32_FLUSH_CMD:
+               if (run->msr.data != 1)
+                       run->msr.error = 1;
+               break;
+       case MSR_NON_EXISTENT:
+               msr_non_existent_data = run->msr.data;
+               break;
+       default:
+               TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+       }
+}
+
+static void process_ucall_done(struct kvm_vm *vm)
+{
+       struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+       struct ucall uc;
+
+       check_for_guest_assert(vm);
+
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                   "Unexpected exit reason: %u (%s)",
+                   run->exit_reason,
+                   exit_reason_str(run->exit_reason));
+
+       TEST_ASSERT(get_ucall(vm, VCPU_ID, &uc) == UCALL_DONE,
+                   "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
+                   uc.cmd, UCALL_DONE);
+}
+
+static uint64_t process_ucall(struct kvm_vm *vm)
+{
+       struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+       struct ucall uc = {};
+
+       check_for_guest_assert(vm);
+
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                   "Unexpected exit reason: %u (%s)",
+                   run->exit_reason,
+                   exit_reason_str(run->exit_reason));
+
+       switch (get_ucall(vm, VCPU_ID, &uc)) {
+       case UCALL_SYNC:
+               break;
+       case UCALL_ABORT:
+               check_for_guest_assert(vm);
+               break;
+       case UCALL_DONE:
+               process_ucall_done(vm);
+               break;
+       default:
+               TEST_ASSERT(false, "Unexpected ucall");
+       }
+
+       return uc.cmd;
+}
+
+static void run_guest_then_process_rdmsr(struct kvm_vm *vm, uint32_t msr_index)
+{
+       run_guest(vm);
+       process_rdmsr(vm, msr_index);
+}
+
+static void run_guest_then_process_wrmsr(struct kvm_vm *vm, uint32_t msr_index)
+{
+       run_guest(vm);
+       process_wrmsr(vm, msr_index);
+}
+
+static uint64_t run_guest_then_process_ucall(struct kvm_vm *vm)
+{
+       run_guest(vm);
+       return process_ucall(vm);
+}
+
+static void run_guest_then_process_ucall_done(struct kvm_vm *vm)
+{
+       run_guest(vm);
+       process_ucall_done(vm);
+}
+
+static void test_msr_filter_allow(void) {
+       struct kvm_enable_cap cap = {
+               .cap = KVM_CAP_X86_USER_SPACE_MSR,
+               .args[0] = KVM_MSR_EXIT_REASON_FILTER,
+       };
+       struct kvm_vm *vm;
+       int rc;
+
+       /* Create VM */
+       vm = vm_create_default(VCPU_ID, 0, guest_code_filter_allow);
+       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+       vm_enable_cap(vm, &cap);
+
+       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
+
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+       vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+
+       /* Process guest code userspace exits. */
+       run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
+       run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
+       run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
+
+       run_guest_then_process_rdmsr(vm, MSR_IA32_FLUSH_CMD);
+       run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
+       run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
+
+       run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT);
+       run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT);
+
+       vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
+       run_guest(vm);
+       vm_handle_exception(vm, UD_VECTOR, NULL);
+
+       if (process_ucall(vm) != UCALL_DONE) {
+               vm_handle_exception(vm, GP_VECTOR, guest_fep_gp_handler);
+
+               /* Process emulated rdmsr and wrmsr instructions. */
+               run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
+               run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
+               run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
+
+               run_guest_then_process_rdmsr(vm, MSR_IA32_FLUSH_CMD);
+               run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
+               run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
+
+               run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT);
+               run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT);
+
+               /* Confirm the guest completed without issues. */
+               run_guest_then_process_ucall_done(vm);
+       } else {
+               printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
+       }
+
+       kvm_vm_free(vm);
+}
+
+static int handle_ucall(struct kvm_vm *vm)
+{
+       struct ucall uc;
+
+       switch (get_ucall(vm, VCPU_ID, &uc)) {
+       case UCALL_ABORT:
+               TEST_FAIL("Guest assertion not met");
+               break;
+       case UCALL_SYNC:
+               vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny);
+               break;
+       case UCALL_DONE:
+               return 1;
+       default:
+               TEST_FAIL("Unknown ucall %lu", uc.cmd);
+       }
+
+       return 0;
+}
+
+static void handle_rdmsr(struct kvm_run *run)
+{
+       run->msr.data = run->msr.index;
+       msr_reads++;
+
+       if (run->msr.index == MSR_SYSCALL_MASK ||
+           run->msr.index == MSR_GS_BASE) {
+               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+                           "MSR read trap w/o access fault");
+       }
+
+       if (run->msr.index == 0xdeadbeef) {
+               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+                           "MSR deadbeef read trap w/o inval fault");
+       }
+}
+
+static void handle_wrmsr(struct kvm_run *run)
+{
+       /* ignore */
+       msr_writes++;
+
+       if (run->msr.index == MSR_IA32_POWER_CTL) {
+               TEST_ASSERT(run->msr.data == 0x1234,
+                           "MSR data for MSR_IA32_POWER_CTL incorrect");
+               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+                           "MSR_IA32_POWER_CTL trap w/o access fault");
+       }
+
+       if (run->msr.index == 0xdeadbeef) {
+               TEST_ASSERT(run->msr.data == 0x1234,
+                           "MSR data for deadbeef incorrect");
+               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+                           "deadbeef trap w/o inval fault");
+       }
+}
+
+static void test_msr_filter_deny(void) {
+       struct kvm_enable_cap cap = {
+               .cap = KVM_CAP_X86_USER_SPACE_MSR,
+               .args[0] = KVM_MSR_EXIT_REASON_INVAL |
+                          KVM_MSR_EXIT_REASON_UNKNOWN |
+                          KVM_MSR_EXIT_REASON_FILTER,
+       };
+       struct kvm_vm *vm;
+       struct kvm_run *run;
+       int rc;
+
+       /* Create VM */
+       vm = vm_create_default(VCPU_ID, 0, guest_code_filter_deny);
+       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+       run = vcpu_state(vm, VCPU_ID);
+
+       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+       vm_enable_cap(vm, &cap);
+
+       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+       prepare_bitmaps();
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny);
+
+       while (1) {
+               rc = _vcpu_run(vm, VCPU_ID);
+
+               TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+
+               switch (run->exit_reason) {
+               case KVM_EXIT_X86_RDMSR:
+                       handle_rdmsr(run);
+                       break;
+               case KVM_EXIT_X86_WRMSR:
+                       handle_wrmsr(run);
+                       break;
+               case KVM_EXIT_IO:
+                       if (handle_ucall(vm))
+                               goto done;
+                       break;
+               }
+
+       }
+
+done:
+       TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
+       TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
+
+       kvm_vm_free(vm);
+}
+
+static void test_msr_permission_bitmap(void) {
+       struct kvm_enable_cap cap = {
+               .cap = KVM_CAP_X86_USER_SPACE_MSR,
+               .args[0] = KVM_MSR_EXIT_REASON_FILTER,
+       };
+       struct kvm_vm *vm;
+       int rc;
+
+       /* Create VM */
+       vm = vm_create_default(VCPU_ID, 0, guest_code_permission_bitmap);
+       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+       vm_enable_cap(vm, &cap);
+
+       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs);
+       run_guest_then_process_rdmsr(vm, MSR_FS_BASE);
+       TEST_ASSERT(run_guest_then_process_ucall(vm) == UCALL_SYNC, "Expected ucall state to be UCALL_SYNC.");
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
+       run_guest_then_process_rdmsr(vm, MSR_GS_BASE);
+       run_guest_then_process_ucall_done(vm);
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       /* Tell stdout not to buffer its content */
+       setbuf(stdout, NULL);
+
+       test_msr_filter_allow();
+
+       test_msr_filter_deny();
+
+       test_msr_permission_bitmap();
+
+       return 0;
+}
index 1f65342..d14888b 100644 (file)
@@ -87,7 +87,6 @@ int main(int argc, char *argv[])
        nested_vmx_check_supported();
 
        vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
        kvm_get_cpu_address_width(&paddr_width, &vaddr_width);
        high_gpa = (1ul << paddr_width) - getpagesize();
index fe40ade..2835a17 100644 (file)
@@ -57,7 +57,6 @@ int main(int argc, char *argv[])
        nested_vmx_check_supported();
 
        vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
        /* Allocate VMX pages and shared descriptors (vmx_pages). */
        vcpu_alloc_vmx(vm, &vmx_pages_gva);
index e894a63..537de10 100644 (file)
@@ -82,7 +82,6 @@ int main(int argc, char *argv[])
 
        /* Create VM */
        vm = vm_create_default(VCPU_ID, 0, l1_guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
        vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
        vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
        run = vcpu_state(vm, VCPU_ID);
index a7737af..a07480a 100644 (file)
@@ -169,20 +169,19 @@ int main(int argc, char *argv[])
         */
        nested_vmx_check_supported();
 
+       if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+               print_skip("KVM_CAP_NESTED_STATE not supported");
+               exit(KSFT_SKIP);
+       }
+
        /* Create VM */
        vm = vm_create_default(VCPU_ID, 0, guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
        run = vcpu_state(vm, VCPU_ID);
 
        vcpu_regs_get(vm, VCPU_ID, &regs1);
 
-       if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
-               vcpu_alloc_vmx(vm, &vmx_pages_gva);
-               vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
-       } else {
-               pr_info("will skip vmx preemption timer checks\n");
-               goto done;
-       }
+       vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
 
        for (stage = 1;; stage++) {
                _vcpu_run(vm, VCPU_ID);
index d59f3eb..5827b9b 100644 (file)
@@ -244,6 +244,22 @@ void test_vmx_nested_state(struct kvm_vm *vm)
        free(state);
 }
 
+void disable_vmx(struct kvm_vm *vm)
+{
+       struct kvm_cpuid2 *cpuid = kvm_get_supported_cpuid();
+       int i;
+
+       for (i = 0; i < cpuid->nent; ++i)
+               if (cpuid->entries[i].function == 1 &&
+                   cpuid->entries[i].index == 0)
+                       break;
+       TEST_ASSERT(i != cpuid->nent, "CPUID function 1 not found");
+
+       cpuid->entries[i].ecx &= ~CPUID_VMX;
+       vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+       cpuid->entries[i].ecx |= CPUID_VMX;
+}
+
 int main(int argc, char *argv[])
 {
        struct kvm_vm *vm;
@@ -264,6 +280,11 @@ int main(int argc, char *argv[])
 
        vm = vm_create_default(VCPU_ID, 0, 0);
 
+       /*
+        * First run tests with VMX disabled to check error handling.
+        */
+       disable_vmx(vm);
+
        /* Passing a NULL kvm_nested_state causes a EFAULT. */
        test_nested_state_expect_efault(vm, NULL);
 
index fbe8417..7e33a35 100644 (file)
@@ -132,7 +132,6 @@ int main(int argc, char *argv[])
        nested_vmx_check_supported();
 
        vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
        /* Allocate VMX pages and shared descriptors (vmx_pages). */
        vcpu_alloc_vmx(vm, &vmx_pages_gva);
index fb5c55d..02b0b9e 100755 (executable)
@@ -256,6 +256,28 @@ setup_cmd_nsb()
        fi
 }
 
+setup_cmd_nsc()
+{
+       local cmd="$*"
+       local rc
+
+       run_cmd_nsc ${cmd}
+       rc=$?
+       if [ $rc -ne 0 ]; then
+               # show user the command if not done so already
+               if [ "$VERBOSE" = "0" ]; then
+                       echo "setup command: $cmd"
+               fi
+               echo "failed. stopping tests"
+               if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+                       echo
+                       echo "hit enter to continue"
+                       read a
+               fi
+               exit $rc
+       fi
+}
+
 # set sysctl values in NS-A
 set_sysctl()
 {
@@ -471,6 +493,36 @@ setup()
        sleep 1
 }
 
+setup_lla_only()
+{
+       # make sure we are starting with a clean slate
+       kill_procs
+       cleanup 2>/dev/null
+
+       log_debug "Configuring network namespaces"
+       set -e
+
+       create_ns ${NSA} "-" "-"
+       create_ns ${NSB} "-" "-"
+       create_ns ${NSC} "-" "-"
+       connect_ns ${NSA} ${NSA_DEV} "-" "-" \
+                  ${NSB} ${NSB_DEV} "-" "-"
+       connect_ns ${NSA} ${NSA_DEV2} "-" "-" \
+                  ${NSC} ${NSC_DEV}  "-" "-"
+
+       NSA_LINKIP6=$(get_linklocal ${NSA} ${NSA_DEV})
+       NSB_LINKIP6=$(get_linklocal ${NSB} ${NSB_DEV})
+       NSC_LINKIP6=$(get_linklocal ${NSC} ${NSC_DEV})
+
+       create_vrf ${NSA} ${VRF} ${VRF_TABLE} "-" "-"
+       ip -netns ${NSA} link set dev ${NSA_DEV} vrf ${VRF}
+       ip -netns ${NSA} link set dev ${NSA_DEV2} vrf ${VRF}
+
+       set +e
+
+       sleep 1
+}
+
 ################################################################################
 # IPv4
 
@@ -3787,10 +3839,53 @@ use_case_br()
        setup_cmd_nsb ip li del vlan100 2>/dev/null
 }
 
+# VRF only.
+# ns-A device is connected to both ns-B and ns-C on a single VRF but only has
+# LLA on the interfaces
+use_case_ping_lla_multi()
+{
+       setup_lla_only
+       # only want reply from ns-A
+       setup_cmd_nsb sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1
+       setup_cmd_nsc sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1
+
+       log_start
+       run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV}
+       log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Pre cycle, ping out ns-B"
+
+       run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV}
+       log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Pre cycle, ping out ns-C"
+
+       # cycle/flap the first ns-A interface
+       setup_cmd ip link set ${NSA_DEV} down
+       setup_cmd ip link set ${NSA_DEV} up
+       sleep 1
+
+       log_start
+       run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV}
+       log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-B"
+       run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV}
+       log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-C"
+
+       # cycle/flap the second ns-A interface
+       setup_cmd ip link set ${NSA_DEV2} down
+       setup_cmd ip link set ${NSA_DEV2} up
+       sleep 1
+
+       log_start
+       run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV}
+       log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-B"
+       run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV}
+       log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-C"
+}
+
 use_cases()
 {
        log_section "Use cases"
+       log_subsection "Device enslaved to bridge"
        use_case_br
+       log_subsection "Ping LLA with multiple interfaces"
+       use_case_ping_lla_multi
 }
 
 ################################################################################
index db3d4a8..76a2405 100644 (file)
@@ -113,6 +113,9 @@ static void do_poll(int fd, int timeout_ms)
                                interrupted = true;
                                break;
                        }
+
+                       /* no events and more time to wait, do poll again */
+                       continue;
                }
                if (pfd.revents != POLLIN)
                        error(1, errno, "poll: 0x%x expected 0x%x\n",
index 052b5a7..b7d188f 100644 (file)
@@ -42,6 +42,11 @@ int perf_event_enable(int fd);
 int perf_event_disable(int fd);
 int perf_event_reset(int fd);
 
+struct perf_event_read {
+       __u64 nr;
+       __u64 l1d_misses;
+};
+
 #if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30)
 #include <unistd.h>
 #include <sys/syscall.h>
index eadbbff..f25e854 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0+
 
-TEST_GEN_PROGS := rfi_flush spectre_v2
+TEST_GEN_PROGS := rfi_flush entry_flush spectre_v2
 top_srcdir = ../../../../..
 
 CFLAGS += -I../../../../../usr/include
@@ -11,3 +11,5 @@ $(TEST_GEN_PROGS): ../harness.c ../utils.c
 
 $(OUTPUT)/spectre_v2: CFLAGS += -m64
 $(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S
+$(OUTPUT)/rfi_flush: flush_utils.c
+$(OUTPUT)/entry_flush: flush_utils.c
diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c
new file mode 100644 (file)
index 0000000..78cf914
--- /dev/null
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utils.h"
+#include "flush_utils.h"
+
+int entry_flush_test(void)
+{
+       char *p;
+       int repetitions = 10;
+       int fd, passes = 0, iter, rc = 0;
+       struct perf_event_read v;
+       __u64 l1d_misses_total = 0;
+       unsigned long iterations = 100000, zero_size = 24 * 1024;
+       unsigned long l1d_misses_expected;
+       int rfi_flush_orig;
+       int entry_flush, entry_flush_orig;
+
+       SKIP_IF(geteuid() != 0);
+
+       // The PMU event we use only works on Power7 or later
+       SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+       if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) {
+               perror("Unable to read powerpc/rfi_flush debugfs file");
+               SKIP_IF(1);
+       }
+
+       if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) {
+               perror("Unable to read powerpc/entry_flush debugfs file");
+               SKIP_IF(1);
+       }
+
+       if (rfi_flush_orig != 0) {
+               if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) {
+                       perror("error writing to powerpc/rfi_flush debugfs file");
+                       FAIL_IF(1);
+               }
+       }
+
+       entry_flush = entry_flush_orig;
+
+       fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
+       FAIL_IF(fd < 0);
+
+       p = (char *)memalign(zero_size, CACHELINE_SIZE);
+
+       FAIL_IF(perf_event_enable(fd));
+
+       // disable L1 prefetching
+       set_dscr(1);
+
+       iter = repetitions;
+
+       /*
+        * We expect to see l1d miss for each cacheline access when entry_flush
+        * is set. Allow a small variation on this.
+        */
+       l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2);
+
+again:
+       FAIL_IF(perf_event_reset(fd));
+
+       syscall_loop(p, iterations, zero_size);
+
+       FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v));
+
+       if (entry_flush && v.l1d_misses >= l1d_misses_expected)
+               passes++;
+       else if (!entry_flush && v.l1d_misses < (l1d_misses_expected / 2))
+               passes++;
+
+       l1d_misses_total += v.l1d_misses;
+
+       while (--iter)
+               goto again;
+
+       if (passes < repetitions) {
+               printf("FAIL (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d failures]\n",
+                      entry_flush, l1d_misses_total, entry_flush ? '<' : '>',
+                      entry_flush ? repetitions * l1d_misses_expected :
+                      repetitions * l1d_misses_expected / 2,
+                      repetitions - passes, repetitions);
+               rc = 1;
+       } else {
+               printf("PASS (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d pass]\n",
+                      entry_flush, l1d_misses_total, entry_flush ? '>' : '<',
+                      entry_flush ? repetitions * l1d_misses_expected :
+                      repetitions * l1d_misses_expected / 2,
+                      passes, repetitions);
+       }
+
+       if (entry_flush == entry_flush_orig) {
+               entry_flush = !entry_flush_orig;
+               if (write_debugfs_file("powerpc/entry_flush", entry_flush) < 0) {
+                       perror("error writing to powerpc/entry_flush debugfs file");
+                       return 1;
+               }
+               iter = repetitions;
+               l1d_misses_total = 0;
+               passes = 0;
+               goto again;
+       }
+
+       perf_event_disable(fd);
+       close(fd);
+
+       set_dscr(0);
+
+       if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) {
+               perror("unable to restore original value of powerpc/rfi_flush debugfs file");
+               return 1;
+       }
+
+       if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) {
+               perror("unable to restore original value of powerpc/entry_flush debugfs file");
+               return 1;
+       }
+
+       return rc;
+}
+
+int main(int argc, char *argv[])
+{
+       return test_harness(entry_flush_test, "entry_flush_test");
+}
diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c
new file mode 100644 (file)
index 0000000..0c3c4c4
--- /dev/null
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utils.h"
+#include "flush_utils.h"
+
+static inline __u64 load(void *addr)
+{
+       __u64 tmp;
+
+       asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
+
+       return tmp;
+}
+
+void syscall_loop(char *p, unsigned long iterations,
+                 unsigned long zero_size)
+{
+       for (unsigned long i = 0; i < iterations; i++) {
+               for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
+                       load(p + j);
+               getppid();
+       }
+}
+
+static void sigill_handler(int signr, siginfo_t *info, void *unused)
+{
+       static int warned;
+       ucontext_t *ctx = (ucontext_t *)unused;
+       unsigned long *pc = &UCONTEXT_NIA(ctx);
+
+       /* mtspr 3,RS to check for move to DSCR below */
+       if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
+               if (!warned++)
+                       printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
+               *pc += 4;
+       } else {
+               printf("SIGILL at %p\n", pc);
+               abort();
+       }
+}
+
+void set_dscr(unsigned long val)
+{
+       static int init;
+       struct sigaction sa;
+
+       if (!init) {
+               memset(&sa, 0, sizeof(sa));
+               sa.sa_sigaction = sigill_handler;
+               sa.sa_flags = SA_SIGINFO;
+               if (sigaction(SIGILL, &sa, NULL))
+                       perror("sigill_handler");
+               init = 1;
+       }
+
+       asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h
new file mode 100644 (file)
index 0000000..07a5eb3
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#ifndef _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H
+#define _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H
+
+#define CACHELINE_SIZE 128
+
+void syscall_loop(char *p, unsigned long iterations,
+                 unsigned long zero_size);
+
+void set_dscr(unsigned long val);
+
+#endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */
index 93a65bd..7565fd7 100644 (file)
 #include <stdint.h>
 #include <malloc.h>
 #include <unistd.h>
-#include <signal.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include "utils.h"
+#include "flush_utils.h"
 
-#define CACHELINE_SIZE 128
-
-struct perf_event_read {
-       __u64 nr;
-       __u64 l1d_misses;
-};
-
-static inline __u64 load(void *addr)
-{
-       __u64 tmp;
-
-       asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
-
-       return tmp;
-}
-
-static void syscall_loop(char *p, unsigned long iterations,
-                        unsigned long zero_size)
-{
-       for (unsigned long i = 0; i < iterations; i++) {
-               for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
-                       load(p + j);
-               getppid();
-       }
-}
-
-static void sigill_handler(int signr, siginfo_t *info, void *unused)
-{
-       static int warned = 0;
-       ucontext_t *ctx = (ucontext_t *)unused;
-       unsigned long *pc = &UCONTEXT_NIA(ctx);
-
-       /* mtspr 3,RS to check for move to DSCR below */
-       if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
-               if (!warned++)
-                       printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
-               *pc += 4;
-       } else {
-               printf("SIGILL at %p\n", pc);
-               abort();
-       }
-}
-
-static void set_dscr(unsigned long val)
-{
-       static int init = 0;
-       struct sigaction sa;
-
-       if (!init) {
-               memset(&sa, 0, sizeof(sa));
-               sa.sa_sigaction = sigill_handler;
-               sa.sa_flags = SA_SIGINFO;
-               if (sigaction(SIGILL, &sa, NULL))
-                       perror("sigill_handler");
-               init = 1;
-       }
-
-       asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
-}
 
 int rfi_flush_test(void)
 {
@@ -85,19 +26,33 @@ int rfi_flush_test(void)
        __u64 l1d_misses_total = 0;
        unsigned long iterations = 100000, zero_size = 24 * 1024;
        unsigned long l1d_misses_expected;
-       int rfi_flush_org, rfi_flush;
+       int rfi_flush_orig, rfi_flush;
+       int have_entry_flush, entry_flush_orig;
 
        SKIP_IF(geteuid() != 0);
 
        // The PMU event we use only works on Power7 or later
        SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
 
-       if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) {
+       if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) {
                perror("Unable to read powerpc/rfi_flush debugfs file");
                SKIP_IF(1);
        }
 
-       rfi_flush = rfi_flush_org;
+       if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) {
+               have_entry_flush = 0;
+       } else {
+               have_entry_flush = 1;
+
+               if (entry_flush_orig != 0) {
+                       if (write_debugfs_file("powerpc/entry_flush", 0) < 0) {
+                               perror("error writing to powerpc/entry_flush debugfs file");
+                               return 1;
+                       }
+               }
+       }
+
+       rfi_flush = rfi_flush_orig;
 
        fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
        FAIL_IF(fd < 0);
@@ -106,6 +61,7 @@ int rfi_flush_test(void)
 
        FAIL_IF(perf_event_enable(fd));
 
+       // disable L1 prefetching
        set_dscr(1);
 
        iter = repetitions;
@@ -147,8 +103,8 @@ again:
                       repetitions * l1d_misses_expected / 2,
                       passes, repetitions);
 
-       if (rfi_flush == rfi_flush_org) {
-               rfi_flush = !rfi_flush_org;
+       if (rfi_flush == rfi_flush_orig) {
+               rfi_flush = !rfi_flush_orig;
                if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) {
                        perror("error writing to powerpc/rfi_flush debugfs file");
                        return 1;
@@ -164,11 +120,19 @@ again:
 
        set_dscr(0);
 
-       if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_org) < 0) {
+       if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) {
                perror("unable to restore original value of powerpc/rfi_flush debugfs file");
                return 1;
        }
 
+       if (have_entry_flush) {
+               if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) {
+                       perror("unable to restore original value of powerpc/entry_flush "
+                              "debugfs file");
+                       return 1;
+               }
+       }
+
        return rc;
 }
 
index 4a18043..26c72f2 100644 (file)
@@ -1758,10 +1758,10 @@ TEST_F(TRACE_poke, getpid_runs_normally)
                 * and the code is stored as a positive value.  \
                 */                                             \
                if (_result < 0) {                              \
-                       SYSCALL_RET(_regs) = -result;           \
+                       SYSCALL_RET(_regs) = -_result;          \
                        (_regs).ccr |= 0x10000000;              \
                } else {                                        \
-                       SYSCALL_RET(_regs) = result;            \
+                       SYSCALL_RET(_regs) = _result;           \
                        (_regs).ccr &= ~0x10000000;             \
                }                                               \
        } while (0)
@@ -1804,8 +1804,8 @@ TEST_F(TRACE_poke, getpid_runs_normally)
 #define SYSCALL_RET(_regs)     (_regs).a[(_regs).windowbase * 4 + 2]
 #elif defined(__sh__)
 # define ARCH_REGS             struct pt_regs
-# define SYSCALL_NUM(_regs)    (_regs).gpr[3]
-# define SYSCALL_RET(_regs)    (_regs).gpr[0]
+# define SYSCALL_NUM(_regs)    (_regs).regs[3]
+# define SYSCALL_RET(_regs)    (_regs).regs[0]
 #else
 # error "Do not know how to find your architecture's registers and syscalls"
 #endif
index c33a7aa..b71828d 100644 (file)
@@ -59,6 +59,7 @@ CONFIG_NET_IFE_SKBPRIO=m
 CONFIG_NET_IFE_SKBTCINDEX=m
 CONFIG_NET_SCH_FIFO=y
 CONFIG_NET_SCH_ETS=m
+CONFIG_NET_SCH_RED=m
 
 #
 ## Network testing
index 30873b1..691893a 100644 (file)
@@ -60,9 +60,13 @@ ifeq ($(CAN_BUILD_X86_64),1)
 TEST_GEN_FILES += $(BINARIES_64)
 endif
 else
+
+ifneq (,$(findstring $(ARCH),powerpc))
 TEST_GEN_FILES += protection_keys
 endif
 
+endif
+
 ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64))
 TEST_GEN_FILES += va_128TBswitch
 TEST_GEN_FILES += virtual_address_range
index 9b0912a..c442559 100644 (file)
@@ -206,19 +206,19 @@ static int hugetlb_release_pages(char *rel_area)
        return ret;
 }
 
-
 static void hugetlb_allocate_area(void **alloc_area)
 {
        void *area_alias = NULL;
        char **alloc_area_alias;
+
        *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
                           (map_shared ? MAP_SHARED : MAP_PRIVATE) |
                           MAP_HUGETLB,
                           huge_fd, *alloc_area == area_src ? 0 :
                           nr_pages * page_size);
        if (*alloc_area == MAP_FAILED) {
-               fprintf(stderr, "mmap of hugetlbfs file failed\n");
-               *alloc_area = NULL;
+               perror("mmap of hugetlbfs file failed");
+               goto fail;
        }
 
        if (map_shared) {
@@ -227,14 +227,11 @@ static void hugetlb_allocate_area(void **alloc_area)
                                  huge_fd, *alloc_area == area_src ? 0 :
                                  nr_pages * page_size);
                if (area_alias == MAP_FAILED) {
-                       if (munmap(*alloc_area, nr_pages * page_size) < 0) {
-                               perror("hugetlb munmap");
-                               exit(1);
-                       }
-                       *alloc_area = NULL;
-                       return;
+                       perror("mmap of hugetlb file alias failed");
+                       goto fail_munmap;
                }
        }
+
        if (*alloc_area == area_src) {
                huge_fd_off0 = *alloc_area;
                alloc_area_alias = &area_src_alias;
@@ -243,6 +240,16 @@ static void hugetlb_allocate_area(void **alloc_area)
        }
        if (area_alias)
                *alloc_area_alias = area_alias;
+
+       return;
+
+fail_munmap:
+       if (munmap(*alloc_area, nr_pages * page_size) < 0) {
+               perror("hugetlb munmap");
+               exit(1);
+       }
+fail:
+       *alloc_area = NULL;
 }
 
 static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset)
diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c
new file mode 100644 (file)
index 0000000..9d01299
--- /dev/null
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * KVM dirty ring implementation
+ *
+ * Copyright 2019 Red Hat, Inc.
+ */
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/vmalloc.h>
+#include <linux/kvm_dirty_ring.h>
+#include <trace/events/kvm.h>
+
+int __weak kvm_cpu_dirty_log_size(void)
+{
+       return 0;
+}
+
+u32 kvm_dirty_ring_get_rsvd_entries(void)
+{
+       return KVM_DIRTY_RING_RSVD_ENTRIES + kvm_cpu_dirty_log_size();
+}
+
+static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring)
+{
+       return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index);
+}
+
+bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
+{
+       return kvm_dirty_ring_used(ring) >= ring->soft_limit;
+}
+
+static bool kvm_dirty_ring_full(struct kvm_dirty_ring *ring)
+{
+       return kvm_dirty_ring_used(ring) >= ring->size;
+}
+
+struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm)
+{
+       struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
+
+       WARN_ON_ONCE(vcpu->kvm != kvm);
+
+       return &vcpu->dirty_ring;
+}
+
+static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask)
+{
+       struct kvm_memory_slot *memslot;
+       int as_id, id;
+
+       as_id = slot >> 16;
+       id = (u16)slot;
+
+       if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
+               return;
+
+       memslot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
+
+       if (!memslot || (offset + __fls(mask)) >= memslot->npages)
+               return;
+
+       spin_lock(&kvm->mmu_lock);
+       kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask);
+       spin_unlock(&kvm->mmu_lock);
+}
+
+int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size)
+{
+       ring->dirty_gfns = vmalloc(size);
+       if (!ring->dirty_gfns)
+               return -ENOMEM;
+       memset(ring->dirty_gfns, 0, size);
+
+       ring->size = size / sizeof(struct kvm_dirty_gfn);
+       ring->soft_limit = ring->size - kvm_dirty_ring_get_rsvd_entries();
+       ring->dirty_index = 0;
+       ring->reset_index = 0;
+       ring->index = index;
+
+       return 0;
+}
+
+static inline void kvm_dirty_gfn_set_invalid(struct kvm_dirty_gfn *gfn)
+{
+       gfn->flags = 0;
+}
+
+static inline void kvm_dirty_gfn_set_dirtied(struct kvm_dirty_gfn *gfn)
+{
+       gfn->flags = KVM_DIRTY_GFN_F_DIRTY;
+}
+
+static inline bool kvm_dirty_gfn_invalid(struct kvm_dirty_gfn *gfn)
+{
+       return gfn->flags == 0;
+}
+
+static inline bool kvm_dirty_gfn_harvested(struct kvm_dirty_gfn *gfn)
+{
+       return gfn->flags & KVM_DIRTY_GFN_F_RESET;
+}
+
+int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring)
+{
+       u32 cur_slot, next_slot;
+       u64 cur_offset, next_offset;
+       unsigned long mask;
+       int count = 0;
+       struct kvm_dirty_gfn *entry;
+       bool first_round = true;
+
+       /* This is only needed to make compilers happy */
+       cur_slot = cur_offset = mask = 0;
+
+       while (true) {
+               entry = &ring->dirty_gfns[ring->reset_index & (ring->size - 1)];
+
+               if (!kvm_dirty_gfn_harvested(entry))
+                       break;
+
+               next_slot = READ_ONCE(entry->slot);
+               next_offset = READ_ONCE(entry->offset);
+
+               /* Update the flags to reflect that this GFN is reset */
+               kvm_dirty_gfn_set_invalid(entry);
+
+               ring->reset_index++;
+               count++;
+               /*
+                * Try to coalesce the reset operations when the guest is
+                * scanning pages in the same slot.
+                */
+               if (!first_round && next_slot == cur_slot) {
+                       s64 delta = next_offset - cur_offset;
+
+                       if (delta >= 0 && delta < BITS_PER_LONG) {
+                               mask |= 1ull << delta;
+                               continue;
+                       }
+
+                       /* Backwards visit, careful about overflows!  */
+                       if (delta > -BITS_PER_LONG && delta < 0 &&
+                           (mask << -delta >> -delta) == mask) {
+                               cur_offset = next_offset;
+                               mask = (mask << -delta) | 1;
+                               continue;
+                       }
+               }
+               kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
+               cur_slot = next_slot;
+               cur_offset = next_offset;
+               mask = 1;
+               first_round = false;
+       }
+
+       kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
+
+       trace_kvm_dirty_ring_reset(ring);
+
+       return count;
+}
+
+void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset)
+{
+       struct kvm_dirty_gfn *entry;
+
+       /* It should never get full */
+       WARN_ON_ONCE(kvm_dirty_ring_full(ring));
+
+       entry = &ring->dirty_gfns[ring->dirty_index & (ring->size - 1)];
+
+       entry->slot = slot;
+       entry->offset = offset;
+       /*
+        * Make sure the data is filled in before we publish this to
+        * the userspace program.  There's no paired kernel-side reader.
+        */
+       smp_wmb();
+       kvm_dirty_gfn_set_dirtied(entry);
+       ring->dirty_index++;
+       trace_kvm_dirty_ring_push(ring, slot, offset);
+}
+
+struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset)
+{
+       return vmalloc_to_page((void *)ring->dirty_gfns + offset * PAGE_SIZE);
+}
+
+void kvm_dirty_ring_free(struct kvm_dirty_ring *ring)
+{
+       vfree(ring->dirty_gfns);
+       ring->dirty_gfns = NULL;
+}
index c2323c2..e996989 100644 (file)
@@ -191,8 +191,12 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
        struct kvm *kvm = irqfd->kvm;
        unsigned seq;
        int idx;
+       int ret = 0;
 
        if (flags & EPOLLIN) {
+               u64 cnt;
+               eventfd_ctx_do_read(irqfd->eventfd, &cnt);
+
                idx = srcu_read_lock(&kvm->irq_srcu);
                do {
                        seq = read_seqcount_begin(&irqfd->irq_entry_sc);
@@ -204,6 +208,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
                                              false) == -EWOULDBLOCK)
                        schedule_work(&irqfd->inject);
                srcu_read_unlock(&kvm->irq_srcu, idx);
+               ret = 1;
        }
 
        if (flags & EPOLLHUP) {
@@ -227,7 +232,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
                spin_unlock_irqrestore(&kvm->irqfds.lock, iflags);
        }
 
-       return 0;
+       return ret;
 }
 
 static void
@@ -236,7 +241,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 {
        struct kvm_kernel_irqfd *irqfd =
                container_of(pt, struct kvm_kernel_irqfd, pt);
-       add_wait_queue(wqh, &irqfd->wait);
+       add_wait_queue_priority(wqh, &irqfd->wait);
 }
 
 /* Must be called under irqfds.lock */
index 2541a17..19dae28 100644 (file)
@@ -63,6 +63,8 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/kvm.h>
 
+#include <linux/kvm_dirty_ring.h>
+
 /* Worst case buffer size needed for holding an integer. */
 #define ITOA_MAX_LEN 12
 
@@ -415,6 +417,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 
 void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
+       kvm_dirty_ring_free(&vcpu->dirty_ring);
        kvm_arch_vcpu_destroy(vcpu);
 
        /*
@@ -482,9 +485,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
        kvm->mmu_notifier_count++;
        need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end,
                                             range->flags);
-       need_tlb_flush |= kvm->tlbs_dirty;
        /* we've to flush the tlb before the pages can be freed */
-       if (need_tlb_flush)
+       if (need_tlb_flush || kvm->tlbs_dirty)
                kvm_flush_remote_tlbs(kvm);
 
        spin_unlock(&kvm->mmu_lock);
@@ -1362,7 +1364,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
        /* Allocate/free page dirty bitmap as needed */
        if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
                new.dirty_bitmap = NULL;
-       else if (!new.dirty_bitmap) {
+       else if (!new.dirty_bitmap && !kvm->dirty_ring_size) {
                r = kvm_alloc_dirty_bitmap(&new);
                if (r)
                        return r;
@@ -1423,6 +1425,10 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
        unsigned long n;
        unsigned long any = 0;
 
+       /* Dirty ring tracking is exclusive to dirty log tracking */
+       if (kvm->dirty_ring_size)
+               return -ENXIO;
+
        *memslot = NULL;
        *is_dirty = 0;
 
@@ -1484,6 +1490,10 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
        unsigned long *dirty_bitmap_buffer;
        bool flush;
 
+       /* Dirty ring tracking is exclusive to dirty log tracking */
+       if (kvm->dirty_ring_size)
+               return -ENXIO;
+
        as_id = log->slot >> 16;
        id = (u16)log->slot;
        if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
@@ -1592,6 +1602,10 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
        unsigned long *dirty_bitmap_buffer;
        bool flush;
 
+       /* Dirty ring tracking is exclusive to dirty log tracking */
+       if (kvm->dirty_ring_size)
+               return -ENXIO;
+
        as_id = log->slot >> 16;
        id = (u16)log->slot;
        if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
@@ -2196,7 +2210,8 @@ int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_map);
 
-static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot,
+static void __kvm_unmap_gfn(struct kvm *kvm,
+                       struct kvm_memory_slot *memslot,
                        struct kvm_host_map *map,
                        struct gfn_to_pfn_cache *cache,
                        bool dirty, bool atomic)
@@ -2221,7 +2236,7 @@ static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot,
 #endif
 
        if (dirty)
-               mark_page_dirty_in_slot(memslot, map->gfn);
+               mark_page_dirty_in_slot(kvm, memslot, map->gfn);
 
        if (cache)
                cache->dirty |= dirty;
@@ -2235,7 +2250,7 @@ static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot,
 int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, 
                  struct gfn_to_pfn_cache *cache, bool dirty, bool atomic)
 {
-       __kvm_unmap_gfn(gfn_to_memslot(vcpu->kvm, map->gfn), map,
+       __kvm_unmap_gfn(vcpu->kvm, gfn_to_memslot(vcpu->kvm, map->gfn), map,
                        cache, dirty, atomic);
        return 0;
 }
@@ -2243,8 +2258,8 @@ EXPORT_SYMBOL_GPL(kvm_unmap_gfn);
 
 void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
 {
-       __kvm_unmap_gfn(kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), map, NULL,
-                       dirty, false);
+       __kvm_unmap_gfn(vcpu->kvm, kvm_vcpu_gfn_to_memslot(vcpu, map->gfn),
+                       map, NULL, dirty, false);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
 
@@ -2418,7 +2433,8 @@ int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);
 
-static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
+static int __kvm_write_guest_page(struct kvm *kvm,
+                                 struct kvm_memory_slot *memslot, gfn_t gfn,
                                  const void *data, int offset, int len)
 {
        int r;
@@ -2430,7 +2446,7 @@ static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
        r = __copy_to_user((void __user *)addr + offset, data, len);
        if (r)
                return -EFAULT;
-       mark_page_dirty_in_slot(memslot, gfn);
+       mark_page_dirty_in_slot(kvm, memslot, gfn);
        return 0;
 }
 
@@ -2439,7 +2455,7 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn,
 {
        struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
 
-       return __kvm_write_guest_page(slot, gfn, data, offset, len);
+       return __kvm_write_guest_page(kvm, slot, gfn, data, offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_write_guest_page);
 
@@ -2448,7 +2464,7 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
 {
        struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 
-       return __kvm_write_guest_page(slot, gfn, data, offset, len);
+       return __kvm_write_guest_page(vcpu->kvm, slot, gfn, data, offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);
 
@@ -2567,7 +2583,7 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
        r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
        if (r)
                return -EFAULT;
-       mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT);
+       mark_page_dirty_in_slot(kvm, ghc->memslot, gpa >> PAGE_SHIFT);
 
        return 0;
 }
@@ -2616,23 +2632,16 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 }
 EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
 
-int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
-{
-       const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
-
-       return kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
-}
-EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
-
 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
 {
+       const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
        gfn_t gfn = gpa >> PAGE_SHIFT;
        int seg;
        int offset = offset_in_page(gpa);
        int ret;
 
        while ((seg = next_segment(len, offset)) != 0) {
-               ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
+               ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
                if (ret < 0)
                        return ret;
                offset = 0;
@@ -2643,12 +2652,19 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest);
 
-void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn)
+void mark_page_dirty_in_slot(struct kvm *kvm,
+                            struct kvm_memory_slot *memslot,
+                            gfn_t gfn)
 {
-       if (memslot && memslot->dirty_bitmap) {
+       if (memslot && kvm_slot_dirty_track_enabled(memslot)) {
                unsigned long rel_gfn = gfn - memslot->base_gfn;
+               u32 slot = (memslot->as_id << 16) | memslot->id;
 
-               set_bit_le(rel_gfn, memslot->dirty_bitmap);
+               if (kvm->dirty_ring_size)
+                       kvm_dirty_ring_push(kvm_dirty_ring_get(kvm),
+                                           slot, rel_gfn);
+               else
+                       set_bit_le(rel_gfn, memslot->dirty_bitmap);
        }
 }
 EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot);
@@ -2658,7 +2674,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
        struct kvm_memory_slot *memslot;
 
        memslot = gfn_to_memslot(kvm, gfn);
-       mark_page_dirty_in_slot(memslot, gfn);
+       mark_page_dirty_in_slot(kvm, memslot, gfn);
 }
 EXPORT_SYMBOL_GPL(mark_page_dirty);
 
@@ -2667,7 +2683,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
        struct kvm_memory_slot *memslot;
 
        memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-       mark_page_dirty_in_slot(memslot, gfn);
+       mark_page_dirty_in_slot(vcpu->kvm, memslot, gfn);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
 
@@ -3008,6 +3024,17 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
 
+static bool kvm_page_in_dirty_ring(struct kvm *kvm, unsigned long pgoff)
+{
+#if KVM_DIRTY_LOG_PAGE_OFFSET > 0
+       return (pgoff >= KVM_DIRTY_LOG_PAGE_OFFSET) &&
+           (pgoff < KVM_DIRTY_LOG_PAGE_OFFSET +
+            kvm->dirty_ring_size / PAGE_SIZE);
+#else
+       return false;
+#endif
+}
+
 static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf)
 {
        struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data;
@@ -3023,6 +3050,10 @@ static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf)
        else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
                page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
 #endif
+       else if (kvm_page_in_dirty_ring(vcpu->kvm, vmf->pgoff))
+               page = kvm_dirty_ring_get_page(
+                   &vcpu->dirty_ring,
+                   vmf->pgoff - KVM_DIRTY_LOG_PAGE_OFFSET);
        else
                return kvm_arch_vcpu_fault(vcpu, vmf);
        get_page(page);
@@ -3036,6 +3067,14 @@ static const struct vm_operations_struct kvm_vcpu_vm_ops = {
 
 static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
 {
+       struct kvm_vcpu *vcpu = file->private_data;
+       unsigned long pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+       if ((kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff) ||
+            kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff + pages - 1)) &&
+           ((vma->vm_flags & VM_EXEC) || !(vma->vm_flags & VM_SHARED)))
+               return -EINVAL;
+
        vma->vm_ops = &kvm_vcpu_vm_ops;
        return 0;
 }
@@ -3129,6 +3168,13 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
        if (r)
                goto vcpu_free_run_page;
 
+       if (kvm->dirty_ring_size) {
+               r = kvm_dirty_ring_alloc(&vcpu->dirty_ring,
+                                        id, kvm->dirty_ring_size);
+               if (r)
+                       goto arch_vcpu_destroy;
+       }
+
        mutex_lock(&kvm->lock);
        if (kvm_get_vcpu_by_id(kvm, id)) {
                r = -EEXIST;
@@ -3162,6 +3208,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 
 unlock_vcpu_destroy:
        mutex_unlock(&kvm->lock);
+       kvm_dirty_ring_free(&vcpu->dirty_ring);
+arch_vcpu_destroy:
        kvm_arch_vcpu_destroy(vcpu);
 vcpu_free_run_page:
        free_page((unsigned long)vcpu->run);
@@ -3634,12 +3682,78 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #endif
        case KVM_CAP_NR_MEMSLOTS:
                return KVM_USER_MEM_SLOTS;
+       case KVM_CAP_DIRTY_LOG_RING:
+#if KVM_DIRTY_LOG_PAGE_OFFSET > 0
+               return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn);
+#else
+               return 0;
+#endif
        default:
                break;
        }
        return kvm_vm_ioctl_check_extension(kvm, arg);
 }
 
+static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, u32 size)
+{
+       int r;
+
+       if (!KVM_DIRTY_LOG_PAGE_OFFSET)
+               return -EINVAL;
+
+       /* the size should be power of 2 */
+       if (!size || (size & (size - 1)))
+               return -EINVAL;
+
+       /* Should be bigger to keep the reserved entries, or a page */
+       if (size < kvm_dirty_ring_get_rsvd_entries() *
+           sizeof(struct kvm_dirty_gfn) || size < PAGE_SIZE)
+               return -EINVAL;
+
+       if (size > KVM_DIRTY_RING_MAX_ENTRIES *
+           sizeof(struct kvm_dirty_gfn))
+               return -E2BIG;
+
+       /* We only allow it to set once */
+       if (kvm->dirty_ring_size)
+               return -EINVAL;
+
+       mutex_lock(&kvm->lock);
+
+       if (kvm->created_vcpus) {
+               /* We don't allow to change this value after vcpu created */
+               r = -EINVAL;
+       } else {
+               kvm->dirty_ring_size = size;
+               r = 0;
+       }
+
+       mutex_unlock(&kvm->lock);
+       return r;
+}
+
+static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm)
+{
+       int i;
+       struct kvm_vcpu *vcpu;
+       int cleared = 0;
+
+       if (!kvm->dirty_ring_size)
+               return -EINVAL;
+
+       mutex_lock(&kvm->slots_lock);
+
+       kvm_for_each_vcpu(i, vcpu, kvm)
+               cleared += kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring);
+
+       mutex_unlock(&kvm->slots_lock);
+
+       if (cleared)
+               kvm_flush_remote_tlbs(kvm);
+
+       return cleared;
+}
+
 int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm,
                                                  struct kvm_enable_cap *cap)
 {
@@ -3670,6 +3784,8 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
                kvm->max_halt_poll_ns = cap->args[0];
                return 0;
        }
+       case KVM_CAP_DIRTY_LOG_RING:
+               return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]);
        default:
                return kvm_vm_ioctl_enable_cap(kvm, cap);
        }
@@ -3854,6 +3970,9 @@ static long kvm_vm_ioctl(struct file *filp,
        case KVM_CHECK_EXTENSION:
                r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
                break;
+       case KVM_RESET_DIRTY_RINGS:
+               r = kvm_vm_ioctl_reset_dirty_pages(kvm);
+               break;
        default:
                r = kvm_arch_vm_ioctl(filp, ioctl, arg);
        }