Merge tag 'kvm-arm64/for-3.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorPaolo Bonzini <pbonzini@redhat.com>
Mon, 11 Nov 2013 11:05:20 +0000 (12:05 +0100)
committerPaolo Bonzini <pbonzini@redhat.com>
Mon, 11 Nov 2013 11:05:20 +0000 (12:05 +0100)
A handful of fixes for KVM/arm64:

- A couple a basic fixes for running BE guests on a LE host
- A performance improvement for overcommitted VMs (same as the equivalent
  patch for ARM)

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflicts:
arch/arm/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_emulate.h

821 files changed:
CREDITS
Documentation/ABI/stable/sysfs-bus-usb
Documentation/ABI/testing/sysfs-devices-power
Documentation/ABI/testing/sysfs-power
Documentation/acpi/dsdt-override.txt
Documentation/block/00-INDEX
Documentation/block/cmdline-partition.txt
Documentation/devicetree/bindings/memory.txt [deleted file]
Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt
Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt
Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt [moved from Documentation/devicetree/bindings/mmc/synopsis-dw-mshc.txt with 93% similarity]
Documentation/devicetree/bindings/mmc/tmio_mmc.txt
Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
Documentation/devicetree/bindings/pci/designware-pcie.txt
Documentation/devicetree/bindings/serial/qca,ar9330-uart.txt [moved from Documentation/devicetree/bindings/tty/serial/qca,ar9330-uart.txt with 100% similarity]
Documentation/kernel-parameters.txt
Documentation/sound/alsa/HD-Audio-Models.txt
Documentation/virtual/kvm/00-INDEX [new file with mode: 0644]
Documentation/virtual/kvm/api.txt
Documentation/virtual/kvm/cpuid.txt
Documentation/virtual/kvm/devices/vfio.txt [new file with mode: 0644]
Documentation/virtual/kvm/locking.txt
MAINTAINERS
Makefile
arch/Kconfig
arch/arc/include/asm/spinlock.h
arch/arc/include/asm/uaccess.h
arch/arc/kernel/ptrace.c
arch/arc/kernel/signal.c
arch/arc/kernel/time.c
arch/arc/kernel/unaligned.c
arch/arm/Kconfig
arch/arm/Makefile
arch/arm/boot/Makefile
arch/arm/boot/dts/Makefile
arch/arm/boot/dts/armada-370-netgear-rn102.dts
arch/arm/boot/dts/armada-xp.dtsi
arch/arm/boot/dts/at91sam9x5.dtsi
arch/arm/boot/dts/atlas6.dtsi
arch/arm/boot/dts/exynos5250.dtsi
arch/arm/boot/dts/kirkwood.dtsi
arch/arm/boot/dts/omap3-beagle-xm.dts
arch/arm/boot/dts/omap3.dtsi
arch/arm/boot/dts/prima2.dtsi
arch/arm/boot/dts/r8a73a4.dtsi
arch/arm/boot/dts/r8a7778.dtsi
arch/arm/boot/dts/r8a7779.dtsi
arch/arm/boot/dts/r8a7790.dtsi
arch/arm/boot/dts/sh73a0.dtsi
arch/arm/boot/install.sh
arch/arm/common/edma.c
arch/arm/common/mcpm_entry.c
arch/arm/common/sharpsl_param.c
arch/arm/configs/multi_v7_defconfig
arch/arm/crypto/aes-armv4.S
arch/arm/include/asm/Kbuild
arch/arm/include/asm/jump_label.h
arch/arm/include/asm/kvm_arm.h
arch/arm/include/asm/kvm_asm.h
arch/arm/include/asm/kvm_emulate.h
arch/arm/include/asm/kvm_host.h
arch/arm/include/asm/kvm_mmu.h
arch/arm/include/asm/mcpm.h
arch/arm/include/asm/pgtable-3level.h
arch/arm/include/asm/syscall.h
arch/arm/include/asm/uaccess.h
arch/arm/include/uapi/asm/kvm.h
arch/arm/kernel/entry-common.S
arch/arm/kernel/entry-header.S
arch/arm/kernel/head.S
arch/arm/kvm/Kconfig
arch/arm/kvm/Makefile
arch/arm/kvm/arm.c
arch/arm/kvm/coproc.c
arch/arm/kvm/coproc_a15.c
arch/arm/kvm/coproc_a7.c [new file with mode: 0644]
arch/arm/kvm/emulate.c
arch/arm/kvm/guest.c
arch/arm/kvm/handle_exit.c
arch/arm/kvm/mmu.c
arch/arm/kvm/psci.c
arch/arm/kvm/reset.c
arch/arm/mach-at91/at91rm9200_time.c
arch/arm/mach-at91/at91sam926x_time.c
arch/arm/mach-at91/at91sam9g45_reset.S
arch/arm/mach-at91/at91x40_time.c
arch/arm/mach-davinci/board-dm365-evm.c
arch/arm/mach-davinci/include/mach/serial.h
arch/arm/mach-integrator/pci_v3.h
arch/arm/mach-mvebu/coherency.c
arch/arm/mach-mvebu/pmsu.c
arch/arm/mach-mvebu/system-controller.c
arch/arm/mach-omap2/board-generic.c
arch/arm/mach-omap2/board-rx51-peripherals.c
arch/arm/mach-omap2/gpmc-onenand.c
arch/arm/mach-omap2/mux.h
arch/arm/mach-omap2/timer.c
arch/arm/mach-shmobile/board-armadillo800eva.c
arch/arm/mach-shmobile/board-lager.c
arch/arm/mach-vexpress/tc2_pm.c
arch/arm/mm/dma-mapping.c
arch/arm/mm/init.c
arch/arm64/Kconfig.debug
arch/arm64/configs/defconfig
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/pgtable-hwdef.h
arch/arm64/include/asm/uaccess.h
arch/arm64/kernel/fpsimd.c
arch/arm64/kvm/guest.c
arch/arm64/mm/tlb.S
arch/avr32/include/asm/Kbuild
arch/avr32/include/asm/cputime.h [deleted file]
arch/avr32/include/asm/delay.h [deleted file]
arch/avr32/include/asm/device.h [deleted file]
arch/avr32/include/asm/div64.h [deleted file]
arch/avr32/include/asm/emergency-restart.h [deleted file]
arch/avr32/include/asm/futex.h [deleted file]
arch/avr32/include/asm/irq_regs.h [deleted file]
arch/avr32/include/asm/local.h [deleted file]
arch/avr32/include/asm/local64.h [deleted file]
arch/avr32/include/asm/percpu.h [deleted file]
arch/avr32/include/asm/scatterlist.h [deleted file]
arch/avr32/include/asm/sections.h [deleted file]
arch/avr32/include/asm/topology.h [deleted file]
arch/avr32/include/asm/xor.h [deleted file]
arch/avr32/kernel/process.c
arch/avr32/kernel/time.c
arch/ia64/include/asm/kvm_host.h
arch/ia64/kvm/kvm-ia64.c
arch/mips/alchemy/board-mtx1.c
arch/mips/include/asm/cpu-features.h
arch/mips/include/asm/jump_label.h
arch/mips/include/asm/kvm_host.h
arch/mips/kernel/octeon_switch.S
arch/mips/kernel/r2300_switch.S
arch/mips/kernel/r4k_switch.S
arch/mips/kvm/kvm_mips.c
arch/mips/mm/c-r4k.c
arch/mips/mm/dma-default.c
arch/openrisc/include/asm/prom.h
arch/parisc/include/asm/traps.h
arch/parisc/kernel/cache.c
arch/parisc/kernel/smp.c
arch/parisc/kernel/traps.c
arch/parisc/lib/memcpy.c
arch/parisc/mm/fault.c
arch/powerpc/boot/Makefile
arch/powerpc/boot/epapr-wrapper.c [new file with mode: 0644]
arch/powerpc/boot/epapr.c
arch/powerpc/boot/of.c
arch/powerpc/boot/wrapper
arch/powerpc/include/asm/disassemble.h
arch/powerpc/include/asm/exception-64s.h
arch/powerpc/include/asm/irq.h
arch/powerpc/include/asm/jump_label.h
arch/powerpc/include/asm/kvm_asm.h
arch/powerpc/include/asm/kvm_book3s.h
arch/powerpc/include/asm/kvm_book3s_32.h
arch/powerpc/include/asm/kvm_book3s_64.h
arch/powerpc/include/asm/kvm_book3s_asm.h
arch/powerpc/include/asm/kvm_booke.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/include/asm/paca.h
arch/powerpc/include/asm/processor.h
arch/powerpc/include/asm/pte-book3e.h
arch/powerpc/include/asm/reg.h
arch/powerpc/include/asm/reg_booke.h
arch/powerpc/include/asm/switch_to.h
arch/powerpc/include/uapi/asm/kvm.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/idle_power7.S
arch/powerpc/kernel/iommu.c
arch/powerpc/kernel/irq.c
arch/powerpc/kernel/misc_32.S
arch/powerpc/kernel/misc_64.S
arch/powerpc/kernel/process.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/ptrace.c
arch/powerpc/kernel/ptrace32.c
arch/powerpc/kernel/signal_32.c
arch/powerpc/kernel/sysfs.c
arch/powerpc/kernel/tm.S
arch/powerpc/kernel/traps.c
arch/powerpc/kernel/vio.c
arch/powerpc/kvm/44x.c
arch/powerpc/kvm/44x_emulate.c
arch/powerpc/kvm/44x_tlb.c
arch/powerpc/kvm/Kconfig
arch/powerpc/kvm/Makefile
arch/powerpc/kvm/book3s.c
arch/powerpc/kvm/book3s.h [new file with mode: 0644]
arch/powerpc/kvm/book3s_32_mmu.c
arch/powerpc/kvm/book3s_32_mmu_host.c
arch/powerpc/kvm/book3s_64_mmu.c
arch/powerpc/kvm/book3s_64_mmu_host.c
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_64_vio_hv.c
arch/powerpc/kvm/book3s_emulate.c
arch/powerpc/kvm/book3s_exports.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_interrupts.S
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_interrupts.S
arch/powerpc/kvm/book3s_mmu_hpte.c
arch/powerpc/kvm/book3s_pr.c
arch/powerpc/kvm/book3s_pr_papr.c
arch/powerpc/kvm/book3s_rmhandlers.S
arch/powerpc/kvm/book3s_rtas.c
arch/powerpc/kvm/book3s_segment.S
arch/powerpc/kvm/book3s_xics.c
arch/powerpc/kvm/booke.c
arch/powerpc/kvm/booke.h
arch/powerpc/kvm/e500.c
arch/powerpc/kvm/e500.h
arch/powerpc/kvm/e500_emulate.c
arch/powerpc/kvm/e500_mmu.c
arch/powerpc/kvm/e500_mmu_host.c
arch/powerpc/kvm/e500mc.c
arch/powerpc/kvm/emulate.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/kvm/trace.h
arch/powerpc/kvm/trace_booke.h [new file with mode: 0644]
arch/powerpc/kvm/trace_pr.h [new file with mode: 0644]
arch/powerpc/lib/checksum_64.S
arch/powerpc/lib/sstep.c
arch/powerpc/mm/init_64.c
arch/powerpc/mm/mem.c
arch/powerpc/perf/power8-pmu.c
arch/powerpc/platforms/pseries/smp.c
arch/s390/Kconfig
arch/s390/include/asm/jump_label.h
arch/s390/include/asm/kvm_host.h
arch/s390/include/asm/mutex.h
arch/s390/include/asm/processor.h
arch/s390/include/asm/spinlock.h
arch/s390/kernel/crash_dump.c
arch/s390/kernel/entry.S
arch/s390/kernel/entry64.S
arch/s390/kernel/kprobes.c
arch/s390/kvm/diag.c
arch/s390/kvm/gaccess.h
arch/s390/kvm/intercept.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h
arch/s390/kvm/priv.c
arch/score/Kconfig
arch/score/Makefile
arch/score/include/asm/checksum.h
arch/score/include/asm/io.h
arch/score/include/asm/pgalloc.h
arch/score/kernel/entry.S
arch/score/kernel/process.c
arch/sparc/Kconfig
arch/sparc/include/asm/floppy_64.h
arch/sparc/include/asm/jump_label.h
arch/sparc/kernel/Makefile
arch/sparc/kernel/ds.c
arch/sparc/kernel/ldc.c
arch/tile/include/asm/atomic.h
arch/tile/include/asm/atomic_32.h
arch/tile/include/asm/cmpxchg.h
arch/tile/include/asm/percpu.h
arch/tile/kernel/hardwall.c
arch/tile/kernel/intvec_32.S
arch/tile/kernel/intvec_64.S
arch/tile/kernel/stack.c
arch/tile/lib/atomic_32.c
arch/x86/Kconfig
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/jump_label.h
arch/x86/include/asm/kvm_emulate.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/mutex_64.h
arch/x86/include/asm/pvclock.h
arch/x86/include/asm/xen/page.h
arch/x86/include/uapi/asm/kvm.h
arch/x86/include/uapi/asm/msr-index.h
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_uncore.c
arch/x86/kernel/kvm.c
arch/x86/kernel/kvmclock.c
arch/x86/kernel/microcode_amd.c
arch/x86/kernel/pvclock.c
arch/x86/kernel/reboot.c
arch/x86/kernel/sysfb_simplefb.c
arch/x86/kvm/Kconfig
arch/x86/kvm/Makefile
arch/x86/kvm/cpuid.c
arch/x86/kvm/cpuid.h
arch/x86/kvm/emulate.c
arch/x86/kvm/mmu.c
arch/x86/kvm/mmu.h
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
arch/x86/pci/mmconfig-shared.c
arch/x86/platform/efi/efi.c
arch/x86/xen/p2m.c
arch/x86/xen/smp.c
arch/x86/xen/spinlock.c
block/Kconfig
block/Makefile
block/partitions/Kconfig
block/partitions/cmdline.c
drivers/acpi/Kconfig
drivers/acpi/acpi_ipmi.c
drivers/acpi/scan.c
drivers/ata/sata_promise.c
drivers/base/core.c
drivers/bcma/driver_pci.c
drivers/block/cciss.c
drivers/block/cpqarray.c
drivers/bluetooth/ath3k.c
drivers/bluetooth/btusb.c
drivers/bus/mvebu-mbus.c
drivers/char/random.c
drivers/char/tpm/xen-tpmfront.c
drivers/clocksource/Kconfig
drivers/clocksource/clksrc-of.c
drivers/clocksource/em_sti.c
drivers/clocksource/exynos_mct.c
drivers/cpufreq/acpi-cpufreq.c
drivers/cpufreq/cpufreq-cpu0.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/exynos5440-cpufreq.c
drivers/cpufreq/intel_pstate.c
drivers/cpufreq/spear-cpufreq.c
drivers/dma/Kconfig
drivers/dma/edma.c
drivers/dma/imx-dma.c
drivers/dma/sh/rcar-hpbdma.c
drivers/gpio/gpio-omap.c
drivers/gpio/gpio-rcar.c
drivers/gpu/drm/drm_edid.c
drivers/gpu/drm/drm_fb_helper.c
drivers/gpu/drm/gma500/gtt.c
drivers/gpu/drm/i2c/tda998x_drv.c
drivers/gpu/drm/i915/i915_dma.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gpu_error.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_dp.c
drivers/gpu/drm/i915/intel_drv.h
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/intel_tv.c
drivers/gpu/drm/msm/mdp4/mdp4_kms.c
drivers/gpu/drm/msm/msm_drv.c
drivers/gpu/drm/msm/msm_gem.c
drivers/gpu/drm/nouveau/core/subdev/mc/base.c
drivers/gpu/drm/radeon/btc_dpm.c
drivers/gpu/drm/radeon/btc_dpm.h
drivers/gpu/drm/radeon/ci_dpm.c
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/evergreen_hdmi.c
drivers/gpu/drm/radeon/evergreend.h
drivers/gpu/drm/radeon/ni_dpm.c
drivers/gpu/drm/radeon/r100.c
drivers/gpu/drm/radeon/r600_dpm.c
drivers/gpu/drm/radeon/r600_hdmi.c
drivers/gpu/drm/radeon/r600d.h
drivers/gpu/drm/radeon/radeon_asic.c
drivers/gpu/drm/radeon/radeon_atombios.c
drivers/gpu/drm/radeon/radeon_cs.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_pm.c
drivers/gpu/drm/radeon/radeon_ring.c
drivers/gpu/drm/radeon/radeon_test.c
drivers/gpu/drm/radeon/radeon_uvd.c
drivers/gpu/drm/radeon/si.c
drivers/gpu/drm/radeon/si_dpm.c
drivers/gpu/drm/radeon/sid.h
drivers/gpu/drm/radeon/trinity_dpm.c
drivers/gpu/drm/radeon/uvd_v1_0.c
drivers/hid/Kconfig
drivers/hid/hid-core.c
drivers/hid/hid-holtek-mouse.c
drivers/hid/hid-ids.h
drivers/hid/hid-roccat-kone.c
drivers/hid/hid-roccat-koneplus.c
drivers/hid/hid-roccat-kovaplus.c
drivers/hid/hid-roccat-pyra.c
drivers/hid/hid-wiimote-modules.c
drivers/hid/hid-wiimote.h
drivers/hid/hidraw.c
drivers/hid/uhid.c
drivers/hv/connection.c
drivers/hv/hv_kvp.c
drivers/hv/hv_snapshot.c
drivers/hv/hv_util.c
drivers/hwmon/applesmc.c
drivers/i2c/busses/i2c-designware-core.c
drivers/i2c/busses/i2c-designware-platdrv.c
drivers/i2c/busses/i2c-imx.c
drivers/i2c/busses/i2c-ismt.c
drivers/i2c/busses/i2c-mv64xxx.c
drivers/i2c/busses/i2c-mxs.c
drivers/i2c/busses/i2c-omap.c
drivers/i2c/busses/i2c-s3c2410.c
drivers/i2c/busses/i2c-stu300.c
drivers/i2c/i2c-core.c
drivers/i2c/muxes/i2c-arb-gpio-challenge.c
drivers/i2c/muxes/i2c-mux-gpio.c
drivers/i2c/muxes/i2c-mux-pinctrl.c
drivers/iio/amplifiers/ad8366.c
drivers/iio/industrialio-core.c
drivers/iio/magnetometer/st_magn_core.c
drivers/infiniband/hw/amso1100/c2_ae.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mlx5/srq.c
drivers/infiniband/hw/mthca/mthca_eq.c
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
drivers/infiniband/hw/ocrdma/ocrdma_main.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/iommu/Kconfig
drivers/iommu/arm-smmu.c
drivers/md/bcache/bcache.h
drivers/md/bcache/bset.c
drivers/md/bcache/btree.c
drivers/md/bcache/journal.c
drivers/md/bcache/request.c
drivers/md/bcache/sysfs.c
drivers/md/bcache/util.c
drivers/md/bcache/util.h
drivers/md/bcache/writeback.c
drivers/md/dm-io.c
drivers/md/dm-mpath.c
drivers/md/dm-snap-persistent.c
drivers/md/dm-snap.c
drivers/md/dm-stats.c
drivers/md/dm-thin.c
drivers/md/dm.c
drivers/md/dm.h
drivers/misc/mei/amthif.c
drivers/misc/mei/bus.c
drivers/misc/mei/client.h
drivers/misc/mei/hbm.c
drivers/misc/mei/init.c
drivers/misc/mei/main.c
drivers/misc/mei/mei_dev.h
drivers/mmc/host/sh_mobile_sdhi.c
drivers/mtd/devices/m25p80.c
drivers/mtd/nand/nand_base.c
drivers/net/bonding/bond_main.c
drivers/net/can/flexcan.c
drivers/net/can/slcan.c
drivers/net/can/usb/peak_usb/pcan_usb_core.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
drivers/net/ethernet/emulex/benet/be.h
drivers/net/ethernet/emulex/benet/be_cmds.c
drivers/net/ethernet/emulex/benet/be_cmds.h
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/freescale/gianfar_ptp.c
drivers/net/ethernet/intel/i40e/i40e_adminq.c
drivers/net/ethernet/intel/i40e/i40e_common.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/igb/igb_ethtool.c
drivers/net/ethernet/marvell/skge.c
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
drivers/net/ethernet/moxa/moxart_ether.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
drivers/net/ethernet/qlogic/qlge/qlge_dbg.c
drivers/net/ethernet/qlogic/qlge/qlge_mpi.c
drivers/net/ethernet/sfc/mcdi.c
drivers/net/ethernet/via/via-rhine.c
drivers/net/ethernet/xilinx/ll_temac_main.c
drivers/net/slip/slip.c
drivers/net/usb/dm9601.c
drivers/net/usb/qmi_wwan.c
drivers/net/usb/usbnet.c
drivers/net/vxlan.c
drivers/net/wireless/ath/ath9k/recv.c
drivers/net/wireless/ath/ath9k/xmit.c
drivers/net/wireless/brcm80211/brcmfmac/bcmsdh_sdmmc.c
drivers/net/wireless/brcm80211/brcmfmac/dhd_bus.h
drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c
drivers/net/wireless/brcm80211/brcmfmac/usb.c
drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
drivers/net/wireless/cw1200/cw1200_spi.c
drivers/net/wireless/cw1200/fwio.c
drivers/net/wireless/cw1200/hwbus.h
drivers/net/wireless/cw1200/hwio.c
drivers/net/wireless/mwifiex/11n_aggr.c
drivers/net/wireless/mwifiex/11n_aggr.h
drivers/net/wireless/mwifiex/cmdevt.c
drivers/net/wireless/mwifiex/usb.c
drivers/net/wireless/mwifiex/wmm.c
drivers/net/wireless/p54/p54usb.c
drivers/net/wireless/rtlwifi/wifi.h
drivers/net/xen-netback/xenbus.c
drivers/of/Kconfig
drivers/of/Makefile
drivers/of/base.c
drivers/of/fdt.c
drivers/of/of_reserved_mem.c [deleted file]
drivers/of/platform.c
drivers/pci/hotplug/acpiphp_glue.c
drivers/pci/pci.c
drivers/pinctrl/pinconf.c
drivers/pinctrl/pinctrl-exynos.c
drivers/pinctrl/pinctrl-palmas.c
drivers/pinctrl/pinctrl-tegra114.c
drivers/regulator/da9063-regulator.c
drivers/regulator/palmas-regulator.c
drivers/regulator/ti-abb-regulator.c
drivers/regulator/wm831x-ldo.c
drivers/regulator/wm8350-regulator.c
drivers/s390/char/sclp_cmd.c
drivers/s390/char/tty3270.c
drivers/spi/spi-atmel.c
drivers/spi/spi-clps711x.c
drivers/spi/spi-fsl-dspi.c
drivers/spi/spi-mpc512x-psc.c
drivers/spi/spi-pxa2xx.c
drivers/spi/spi-s3c64xx.c
drivers/spi/spi-sh-hspi.c
drivers/staging/comedi/drivers/ni_65xx.c
drivers/staging/imx-drm/imx-drm-core.c
drivers/staging/lustre/lustre/obdecho/echo_client.c
drivers/staging/octeon-usb/cvmx-usb.c
drivers/staging/rtl8188eu/core/rtw_mp.c
drivers/staging/rtl8188eu/hal/rtl8188e_dm.c
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
drivers/staging/rtl8188eu/os_dep/usb_intf.c
drivers/staging/rtl8192u/r819xU_cmdpkt.c
drivers/staging/vt6656/iwctl.c
drivers/staging/vt6656/main_usb.c
drivers/staging/vt6656/rxtx.c
drivers/target/iscsi/iscsi_target.c
drivers/target/iscsi/iscsi_target_nego.c
drivers/target/iscsi/iscsi_target_util.c
drivers/target/target_core_sbc.c
drivers/target/target_core_transport.c
drivers/target/target_core_xcopy.c
drivers/tty/hvc/hvc_xen.c
drivers/tty/n_tty.c
drivers/tty/serial/pch_uart.c
drivers/tty/serial/serial-tegra.c
drivers/tty/tty_ioctl.c
drivers/usb/chipidea/Kconfig
drivers/usb/chipidea/ci_hdrc_imx.c
drivers/usb/chipidea/ci_hdrc_pci.c
drivers/usb/chipidea/core.c
drivers/usb/chipidea/udc.c
drivers/usb/core/devio.c
drivers/usb/core/hub.c
drivers/usb/dwc3/dwc3-pci.c
drivers/usb/gadget/f_fs.c
drivers/usb/gadget/pxa25x_udc.c
drivers/usb/gadget/s3c-hsotg.c
drivers/usb/host/ehci-fsl.c
drivers/usb/host/ehci-pci.c
drivers/usb/host/imx21-hcd.c
drivers/usb/host/ohci-hcd.c
drivers/usb/host/ohci-q.c
drivers/usb/host/uhci-pci.c
drivers/usb/host/uhci-q.c
drivers/usb/host/xhci-hub.c
drivers/usb/host/xhci-mem.c
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci-ring.c
drivers/usb/host/xhci.c
drivers/usb/host/xhci.h
drivers/usb/musb/musb_dsps.c
drivers/usb/musb/musb_gadget.c
drivers/usb/phy/phy-gpio-vbus-usb.c
drivers/usb/serial/option.c
drivers/vfio/vfio_iommu_type1.c
drivers/vhost/scsi.c
drivers/video/mmp/hw/mmp_ctrl.c
drivers/video/mxsfb.c
drivers/video/neofb.c
drivers/video/of_display_timing.c
drivers/video/omap2/displays-new/Kconfig
drivers/video/omap2/displays-new/connector-analog-tv.c
drivers/video/omap2/displays-new/connector-dvi.c
drivers/video/omap2/displays-new/connector-hdmi.c
drivers/video/omap2/dss/dispc.c
drivers/video/s3fb.c
drivers/watchdog/hpwdt.c
drivers/watchdog/kempld_wdt.c
drivers/watchdog/sunxi_wdt.c
drivers/watchdog/ts72xx_wdt.c
drivers/xen/balloon.c
fs/afs/dir.c
fs/aio.c
fs/binfmt_elf.c
fs/bio.c
fs/btrfs/async-thread.c
fs/btrfs/async-thread.h
fs/btrfs/dev-replace.c
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent_io.c
fs/btrfs/inode.c
fs/btrfs/relocation.c
fs/btrfs/root-tree.c
fs/btrfs/transaction.c
fs/btrfs/volumes.c
fs/cifs/cifsfs.h
fs/cifs/cifsglob.h
fs/cifs/cifspdu.h
fs/cifs/cifssmb.c
fs/cifs/file.c
fs/cifs/fscache.c
fs/cifs/fscache.h
fs/cifs/inode.c
fs/cifs/readdir.c
fs/cifs/sess.c
fs/ext4/inode.c
fs/ext4/xattr.c
fs/fuse/dir.c
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/nfs/dir.c
fs/nfs/nfs4file.c
fs/nfs/nfs4filelayoutdev.c
fs/nfs/nfs4proc.c
fs/nilfs2/page.c
fs/nilfs2/segment.c
fs/ocfs2/dcache.c
fs/ocfs2/super.c
fs/reiserfs/journal.c
fs/statfs.c
fs/super.c
fs/sysv/super.c
fs/udf/ialloc.c
fs/udf/super.c
fs/udf/udf_sb.h
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_da_btree.c
fs/xfs/xfs_dir2_block.c
fs/xfs/xfs_dir2_format.h
fs/xfs/xfs_dir2_readdir.c
fs/xfs/xfs_dir2_sf.c
fs/xfs/xfs_dquot.c
fs/xfs/xfs_fs.h
fs/xfs/xfs_icache.c
fs/xfs/xfs_log_recover.c
include/asm-generic/hugetlb.h
include/asm-generic/vtime.h
include/dt-bindings/pinctrl/omap.h
include/linux/balloon_compaction.h
include/linux/bcma/bcma_driver_pci.h
include/linux/compiler-gcc4.h
include/linux/device-mapper.h
include/linux/hyperv.h
include/linux/intel-iommu.h
include/linux/kernel.h
include/linux/kvm_host.h
include/linux/memcontrol.h
include/linux/miscdevice.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mutex.h
include/linux/nfs_xdr.h
include/linux/of_irq.h
include/linux/of_reserved_mem.h [deleted file]
include/linux/perf_event.h
include/linux/random.h
include/linux/regulator/driver.h
include/linux/sched.h
include/linux/skbuff.h
include/linux/smp.h
include/linux/srcu.h
include/linux/timex.h
include/linux/usb/usbnet.h
include/linux/vgaarb.h
include/net/addrconf.h
include/net/bluetooth/hci.h
include/net/ip_vs.h
include/net/mrp.h
include/net/net_namespace.h
include/net/netfilter/nf_conntrack_synproxy.h
include/net/secure_seq.h
include/net/sock.h
include/trace/events/kvm.h
include/uapi/drm/radeon_drm.h
include/uapi/linux/kvm.h
include/uapi/linux/perf_event.h
init/main.c
ipc/msg.c
ipc/sem.c
ipc/shm.c
ipc/util.c
ipc/util.h
kernel/audit.c
kernel/context_tracking.c
kernel/events/core.c
kernel/hung_task.c
kernel/kmod.c
kernel/params.c
kernel/pid.c
kernel/power/snapshot.c
kernel/power/user.c
kernel/reboot.c
kernel/sched/fair.c
kernel/softirq.c
kernel/watchdog.c
lib/hexdump.c
lib/kobject.c
lib/lockref.c
mm/Kconfig
mm/bounce.c
mm/compaction.c
mm/hwpoison-inject.c
mm/madvise.c
mm/memcontrol.c
mm/memory-failure.c
mm/migrate.c
mm/mlock.c
mm/page_alloc.c
mm/slab_common.c
mm/vmscan.c
net/802/mrp.c
net/bluetooth/hci_core.c
net/bluetooth/hci_event.c
net/bluetooth/l2cap_core.c
net/bluetooth/rfcomm/tty.c
net/core/dev.c
net/core/flow_dissector.c
net/core/secure_seq.c
net/ipv4/af_inet.c
net/ipv4/igmp.c
net/ipv4/ip_tunnel.c
net/ipv4/ip_tunnel_core.c
net/ipv4/netfilter/ipt_SYNPROXY.c
net/ipv4/raw.c
net/ipv4/tcp_output.c
net/ipv4/udp.c
net/ipv6/addrconf.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_output.c
net/ipv6/ip6_tunnel.c
net/ipv6/mcast.c
net/ipv6/netfilter/ip6t_SYNPROXY.c
net/ipv6/raw.c
net/ipv6/sit.c
net/ipv6/udp.c
net/lapb/lapb_timer.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/ipvs/ip_vs_est.c
net/netfilter/ipvs/ip_vs_lblc.c
net/netfilter/ipvs/ip_vs_lblcr.c
net/netfilter/ipvs/ip_vs_nq.c
net/netfilter/ipvs/ip_vs_sed.c
net/netfilter/ipvs/ip_vs_wlc.c
net/netfilter/nf_synproxy_core.c
net/sched/sch_fq.c
net/sysctl_net.c
scripts/checkpatch.pl
security/apparmor/crypto.c
security/apparmor/include/policy.h
security/apparmor/policy.c
security/selinux/avc.c
security/selinux/hooks.c
security/selinux/include/avc.h
sound/core/compress_offload.c
sound/pci/ac97/ac97_codec.c
sound/pci/hda/patch_cirrus.c
sound/pci/hda/patch_conexant.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/soc/blackfin/bf6xx-i2s.c
sound/soc/codecs/88pm860x-codec.c
sound/soc/codecs/ab8500-codec.c
sound/soc/codecs/max98095.c
sound/soc/fsl/imx-sgtl5000.c
sound/soc/soc-core.c
sound/usb/usx2y/usbusx2yaudio.c
sound/usb/usx2y/usx2yhwdeppcm.c
tools/lib/lk/debugfs.c
tools/perf/Makefile
tools/perf/arch/x86/util/tsc.c
tools/perf/builtin-inject.c
tools/perf/builtin-kmem.c
tools/perf/builtin-report.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/builtin-trace.c
tools/perf/config/Makefile
tools/perf/config/feature-tests.mak
tools/perf/util/annotate.c
tools/perf/util/dwarf-aux.c
tools/perf/util/dwarf-aux.h
tools/perf/util/header.c
tools/perf/util/hist.c
tools/perf/util/machine.c
tools/perf/util/probe-finder.c
tools/perf/util/probe-finder.h
tools/perf/util/session.c
tools/perf/util/session.h
tools/perf/util/symbol-elf.c
tools/perf/util/trace-event-parse.c
virt/kvm/Kconfig
virt/kvm/async_pf.c
virt/kvm/iommu.c
virt/kvm/kvm_main.c
virt/kvm/vfio.c [new file with mode: 0644]

diff --git a/CREDITS b/CREDITS
index 9416a9a..0640e16 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -2808,8 +2808,7 @@ S: Ottawa, Ontario
 S: Canada K2P 0X8
 
 N: Mikael Pettersson
-E: mikpe@it.uu.se
-W: http://user.it.uu.se/~mikpe/linux/
+E: mikpelinux@gmail.com
 D: Miscellaneous fixes
 
 N: Reed H. Petty
index 2be603c..a6b6857 100644 (file)
@@ -37,8 +37,8 @@ Description:
                that the USB device has been connected to the machine.  This
                file is read-only.
 Users:
-               PowerTOP <power@bughost.org>
-               http://www.lesswatts.org/projects/powertop/
+               PowerTOP <powertop@lists.01.org>
+               https://01.org/powertop/
 
 What:          /sys/bus/usb/device/.../power/active_duration
 Date:          January 2008
@@ -57,8 +57,8 @@ Description:
                will give an integer percentage.  Note that this does not
                account for counter wrap.
 Users:
-               PowerTOP <power@bughost.org>
-               http://www.lesswatts.org/projects/powertop/
+               PowerTOP <powertop@lists.01.org>
+               https://01.org/powertop/
 
 What:          /sys/bus/usb/devices/<busnum>-<port[.port]>...:<config num>-<interface num>/supports_autosuspend
 Date:          January 2008
index 9d43e76..efe449b 100644 (file)
@@ -1,6 +1,6 @@
 What:          /sys/devices/.../power/
 Date:          January 2009
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/devices/.../power directory contains attributes
                allowing the user space to check and modify some power
@@ -8,7 +8,7 @@ Description:
 
 What:          /sys/devices/.../power/wakeup
 Date:          January 2009
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/devices/.../power/wakeup attribute allows the user
                space to check if the device is enabled to wake up the system
@@ -34,7 +34,7 @@ Description:
 
 What:          /sys/devices/.../power/control
 Date:          January 2009
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/devices/.../power/control attribute allows the user
                space to control the run-time power management of the device.
@@ -53,7 +53,7 @@ Description:
 
 What:          /sys/devices/.../power/async
 Date:          January 2009
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/devices/.../async attribute allows the user space to
                enable or diasble the device's suspend and resume callbacks to
@@ -79,7 +79,7 @@ Description:
 
 What:          /sys/devices/.../power/wakeup_count
 Date:          September 2010
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/devices/.../wakeup_count attribute contains the number
                of signaled wakeup events associated with the device.  This
@@ -88,7 +88,7 @@ Description:
 
 What:          /sys/devices/.../power/wakeup_active_count
 Date:          September 2010
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/devices/.../wakeup_active_count attribute contains the
                number of times the processing of wakeup events associated with
@@ -98,7 +98,7 @@ Description:
 
 What:          /sys/devices/.../power/wakeup_abort_count
 Date:          February 2012
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/devices/.../wakeup_abort_count attribute contains the
                number of times the processing of a wakeup event associated with
@@ -109,7 +109,7 @@ Description:
 
 What:          /sys/devices/.../power/wakeup_expire_count
 Date:          February 2012
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/devices/.../wakeup_expire_count attribute contains the
                number of times a wakeup event associated with the device has
@@ -119,7 +119,7 @@ Description:
 
 What:          /sys/devices/.../power/wakeup_active
 Date:          September 2010
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/devices/.../wakeup_active attribute contains either 1,
                or 0, depending on whether or not a wakeup event associated with
@@ -129,7 +129,7 @@ Description:
 
 What:          /sys/devices/.../power/wakeup_total_time_ms
 Date:          September 2010
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/devices/.../wakeup_total_time_ms attribute contains
                the total time of processing wakeup events associated with the
@@ -139,7 +139,7 @@ Description:
 
 What:          /sys/devices/.../power/wakeup_max_time_ms
 Date:          September 2010
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/devices/.../wakeup_max_time_ms attribute contains
                the maximum time of processing a single wakeup event associated
@@ -149,7 +149,7 @@ Description:
 
 What:          /sys/devices/.../power/wakeup_last_time_ms
 Date:          September 2010
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/devices/.../wakeup_last_time_ms attribute contains
                the value of the monotonic clock corresponding to the time of
@@ -160,7 +160,7 @@ Description:
 
 What:          /sys/devices/.../power/wakeup_prevent_sleep_time_ms
 Date:          February 2012
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/devices/.../wakeup_prevent_sleep_time_ms attribute
                contains the total time the device has been preventing
@@ -189,7 +189,7 @@ Description:
 
 What:          /sys/devices/.../power/pm_qos_latency_us
 Date:          March 2012
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/devices/.../power/pm_qos_resume_latency_us attribute
                contains the PM QoS resume latency limit for the given device,
@@ -207,7 +207,7 @@ Description:
 
 What:          /sys/devices/.../power/pm_qos_no_power_off
 Date:          September 2012
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/devices/.../power/pm_qos_no_power_off attribute
                is used for manipulating the PM QoS "no power off" flag.  If
@@ -222,7 +222,7 @@ Description:
 
 What:          /sys/devices/.../power/pm_qos_remote_wakeup
 Date:          September 2012
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/devices/.../power/pm_qos_remote_wakeup attribute
                is used for manipulating the PM QoS "remote wakeup required"
index 2177726..205a738 100644 (file)
@@ -1,6 +1,6 @@
 What:          /sys/power/
 Date:          August 2006
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/power directory will contain files that will
                provide a unified interface to the power management
@@ -8,7 +8,7 @@ Description:
 
 What:          /sys/power/state
 Date:          August 2006
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/power/state file controls the system power state.
                Reading from this file returns what states are supported,
@@ -22,7 +22,7 @@ Description:
 
 What:          /sys/power/disk
 Date:          September 2006
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/power/disk file controls the operating mode of the
                suspend-to-disk mechanism.  Reading from this file returns
@@ -67,7 +67,7 @@ Description:
 
 What:          /sys/power/image_size
 Date:          August 2006
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/power/image_size file controls the size of the image
                created by the suspend-to-disk mechanism.  It can be written a
@@ -84,7 +84,7 @@ Description:
 
 What:          /sys/power/pm_trace
 Date:          August 2006
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/power/pm_trace file controls the code which saves the
                last PM event point in the RTC across reboots, so that you can
@@ -133,7 +133,7 @@ Description:
 
 What:          /sys/power/pm_async
 Date:          January 2009
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/power/pm_async file controls the switch allowing the
                user space to enable or disable asynchronous suspend and resume
@@ -146,7 +146,7 @@ Description:
 
 What:          /sys/power/wakeup_count
 Date:          July 2010
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/power/wakeup_count file allows user space to put the
                system into a sleep state while taking into account the
@@ -161,7 +161,7 @@ Description:
 
 What:          /sys/power/reserved_size
 Date:          May 2011
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/power/reserved_size file allows user space to control
                the amount of memory reserved for allocations made by device
@@ -175,7 +175,7 @@ Description:
 
 What:          /sys/power/autosleep
 Date:          April 2012
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/power/autosleep file can be written one of the strings
                returned by reads from /sys/power/state.  If that happens, a
@@ -192,7 +192,7 @@ Description:
 
 What:          /sys/power/wake_lock
 Date:          February 2012
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/power/wake_lock file allows user space to create
                wakeup source objects and activate them on demand (if one of
@@ -219,7 +219,7 @@ Description:
 
 What:          /sys/power/wake_unlock
 Date:          February 2012
-Contact:       Rafael J. Wysocki <rjw@sisk.pl>
+Contact:       Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
                The /sys/power/wake_unlock file allows user space to deactivate
                wakeup sources created with the help of /sys/power/wake_lock.
index febbb1b..784841c 100644 (file)
@@ -4,4 +4,4 @@ CONFIG_ACPI_CUSTOM_DSDT builds the image into the kernel.
 
 When to use this method is described in detail on the
 Linux/ACPI home page:
-http://www.lesswatts.org/projects/acpi/overridingDSDT.php
+https://01.org/linux-acpi/documentation/overriding-dsdt
index d18ecd8..929d990 100644 (file)
@@ -6,6 +6,8 @@ capability.txt
        - Generic Block Device Capability (/sys/block/<device>/capability)
 cfq-iosched.txt
        - CFQ IO scheduler tunables
+cmdline-partition.txt
+       - how to specify block device partitions on kernel command line
 data-integrity.txt
        - Block data integrity
 deadline-iosched.txt
index 2bbf4cc..525b9f6 100644 (file)
@@ -1,9 +1,9 @@
-Embedded device command line partition
+Embedded device command line partition parsing
 =====================================================================
 
-Read block device partition table from command line.
-The partition used for fixed block device (eMMC) embedded device.
-It is no MBR, save storage space. Bootloader can be easily accessed
+Support for reading the block device partition table from the command line.
+It is typically used for fixed block (eMMC) embedded devices.
+It has no MBR, so saves storage space. Bootloader can be easily accessed
 by absolute address of data on the block device.
 Users can easily change the partition.
 
diff --git a/Documentation/devicetree/bindings/memory.txt b/Documentation/devicetree/bindings/memory.txt
deleted file mode 100644 (file)
index eb24693..0000000
+++ /dev/null
@@ -1,168 +0,0 @@
-*** Memory binding ***
-
-The /memory node provides basic information about the address and size
-of the physical memory. This node is usually filled or updated by the
-bootloader, depending on the actual memory configuration of the given
-hardware.
-
-The memory layout is described by the following node:
-
-/ {
-       #address-cells = <(n)>;
-       #size-cells = <(m)>;
-       memory {
-               device_type = "memory";
-               reg =  <(baseaddr1) (size1)
-                       (baseaddr2) (size2)
-                       ...
-                       (baseaddrN) (sizeN)>;
-       };
-       ...
-};
-
-A memory node follows the typical device tree rules for "reg" property:
-n:             number of cells used to store base address value
-m:             number of cells used to store size value
-baseaddrX:     defines a base address of the defined memory bank
-sizeX:         the size of the defined memory bank
-
-
-More than one memory bank can be defined.
-
-
-*** Reserved memory regions ***
-
-In /memory/reserved-memory node one can create child nodes describing
-particular reserved (excluded from normal use) memory regions. Such
-memory regions are usually designed for the special usage by various
-device drivers. A good example are contiguous memory allocations or
-memory sharing with other operating system on the same hardware board.
-Those special memory regions might depend on the board configuration and
-devices used on the target system.
-
-Parameters for each memory region can be encoded into the device tree
-with the following convention:
-
-[(label):] (name) {
-       compatible = "linux,contiguous-memory-region", "reserved-memory-region";
-       reg = <(address) (size)>;
-       (linux,default-contiguous-region);
-};
-
-compatible:    one or more of:
-       - "linux,contiguous-memory-region" - enables binding of this
-         region to Contiguous Memory Allocator (special region for
-         contiguous memory allocations, shared with movable system
-         memory, Linux kernel-specific).
-       - "reserved-memory-region" - compatibility is defined, given
-         region is assigned for exclusive usage for by the respective
-         devices.
-
-reg:   standard property defining the base address and size of
-       the memory region
-
-linux,default-contiguous-region: property indicating that the region
-       is the default region for all contiguous memory
-       allocations, Linux specific (optional)
-
-It is optional to specify the base address, so if one wants to use
-autoconfiguration of the base address, '0' can be specified as a base
-address in the 'reg' property.
-
-The /memory/reserved-memory node must contain the same #address-cells
-and #size-cells value as the root node.
-
-
-*** Device node's properties ***
-
-Once regions in the /memory/reserved-memory node have been defined, they
-may be referenced by other device nodes. Bindings that wish to reference
-memory regions should explicitly document their use of the following
-property:
-
-memory-region = <&phandle_to_defined_region>;
-
-This property indicates that the device driver should use the memory
-region pointed by the given phandle.
-
-
-*** Example ***
-
-This example defines a memory consisting of 4 memory banks. 3 contiguous
-regions are defined for Linux kernel, one default of all device drivers
-(named contig_mem, placed at 0x72000000, 64MiB), one dedicated to the
-framebuffer device (labelled display_mem, placed at 0x78000000, 8MiB)
-and one for multimedia processing (labelled multimedia_mem, placed at
-0x77000000, 64MiB). 'display_mem' region is then assigned to fb@12300000
-device for DMA memory allocations (Linux kernel drivers will use CMA is
-available or dma-exclusive usage otherwise). 'multimedia_mem' is
-assigned to scaler@12500000 and codec@12600000 devices for contiguous
-memory allocations when CMA driver is enabled.
-
-The reason for creating a separate region for framebuffer device is to
-match the framebuffer base address to the one configured by bootloader,
-so once Linux kernel drivers starts no glitches on the displayed boot
-logo appears. Scaller and codec drivers should share the memory
-allocations.
-
-/ {
-       #address-cells = <1>;
-       #size-cells = <1>;
-
-       /* ... */
-
-       memory {
-               reg =  <0x40000000 0x10000000
-                       0x50000000 0x10000000
-                       0x60000000 0x10000000
-                       0x70000000 0x10000000>;
-
-               reserved-memory {
-                       #address-cells = <1>;
-                       #size-cells = <1>;
-
-                       /*
-                        * global autoconfigured region for contiguous allocations
-                        * (used only with Contiguous Memory Allocator)
-                        */
-                       contig_region@0 {
-                               compatible = "linux,contiguous-memory-region";
-                               reg = <0x0 0x4000000>;
-                               linux,default-contiguous-region;
-                       };
-
-                       /*
-                        * special region for framebuffer
-                        */
-                       display_region: region@78000000 {
-                               compatible = "linux,contiguous-memory-region", "reserved-memory-region";
-                               reg = <0x78000000 0x800000>;
-                       };
-
-                       /*
-                        * special region for multimedia processing devices
-                        */
-                       multimedia_region: region@77000000 {
-                               compatible = "linux,contiguous-memory-region";
-                               reg = <0x77000000 0x4000000>;
-                       };
-               };
-       };
-
-       /* ... */
-
-       fb0: fb@12300000 {
-               status = "okay";
-               memory-region = <&display_region>;
-       };
-
-       scaler: scaler@12500000 {
-               status = "okay";
-               memory-region = <&multimedia_region>;
-       };
-
-       codec: codec@12600000 {
-               status = "okay";
-               memory-region = <&multimedia_region>;
-       };
-};
index 6d1c098..c67b975 100644 (file)
@@ -1,11 +1,11 @@
-* Samsung Exynos specific extensions to the Synopsis Designware Mobile
+* Samsung Exynos specific extensions to the Synopsys Designware Mobile
   Storage Host Controller
 
-The Synopsis designware mobile storage host controller is used to interface
+The Synopsys designware mobile storage host controller is used to interface
 a SoC with storage medium such as eMMC or SD/MMC cards. This file documents
-differences between the core Synopsis dw mshc controller properties described
-by synopsis-dw-mshc.txt and the properties used by the Samsung Exynos specific
-extensions to the Synopsis Designware Mobile Storage Host Controller.
+differences between the core Synopsys dw mshc controller properties described
+by synopsys-dw-mshc.txt and the properties used by the Samsung Exynos specific
+extensions to the Synopsys Designware Mobile Storage Host Controller.
 
 Required Properties:
 
index 8a3d91d..c559f3f 100644 (file)
@@ -1,11 +1,11 @@
-* Rockchip specific extensions to the Synopsis Designware Mobile
+* Rockchip specific extensions to the Synopsys Designware Mobile
   Storage Host Controller
 
-The Synopsis designware mobile storage host controller is used to interface
+The Synopsys designware mobile storage host controller is used to interface
 a SoC with storage medium such as eMMC or SD/MMC cards. This file documents
-differences between the core Synopsis dw mshc controller properties described
-by synopsis-dw-mshc.txt and the properties used by the Rockchip specific
-extensions to the Synopsis Designware Mobile Storage Host Controller.
+differences between the core Synopsys dw mshc controller properties described
+by synopsys-dw-mshc.txt and the properties used by the Rockchip specific
+extensions to the Synopsys Designware Mobile Storage Host Controller.
 
 Required Properties:
 
@@ -1,14 +1,14 @@
-* Synopsis Designware Mobile Storage Host Controller
+* Synopsys Designware Mobile Storage Host Controller
 
-The Synopsis designware mobile storage host controller is used to interface
+The Synopsys designware mobile storage host controller is used to interface
 a SoC with storage medium such as eMMC or SD/MMC cards. This file documents
 differences between the core mmc properties described by mmc.txt and the
-properties used by the Synopsis Designware Mobile Storage Host Controller.
+properties used by the Synopsys Designware Mobile Storage Host Controller.
 
 Required Properties:
 
 * compatible: should be
-       - snps,dw-mshc: for controllers compliant with synopsis dw-mshc.
+       - snps,dw-mshc: for controllers compliant with synopsys dw-mshc.
 * #address-cells: should be 1.
 * #size-cells: should be 0.
 
index df204e1..6a2a116 100644 (file)
@@ -9,12 +9,15 @@ compulsory and any optional properties, common to all SD/MMC drivers, as
 described in mmc.txt, can be used. Additionally the following tmio_mmc-specific
 optional bindings can be used.
 
+Required properties:
+- compatible:  "renesas,sdhi-shmobile" - a generic sh-mobile SDHI unit
+               "renesas,sdhi-sh7372" - SDHI IP on SH7372 SoC
+               "renesas,sdhi-sh73a0" - SDHI IP on SH73A0 SoC
+               "renesas,sdhi-r8a73a4" - SDHI IP on R8A73A4 SoC
+               "renesas,sdhi-r8a7740" - SDHI IP on R8A7740 SoC
+               "renesas,sdhi-r8a7778" - SDHI IP on R8A7778 SoC
+               "renesas,sdhi-r8a7779" - SDHI IP on R8A7779 SoC
+               "renesas,sdhi-r8a7790" - SDHI IP on R8A7790 SoC
+
 Optional properties:
 - toshiba,mmc-wrprotect-disable: write-protect detection is unavailable
-
-When used with Renesas SDHI hardware, the following compatibility strings
-configure various model-specific properties:
-
-"renesas,sh7372-sdhi": (default) compatible with SH7372
-"renesas,r8a7740-sdhi":        compatible with R8A7740: certain MMC/SD commands have to
-                       wait for the interface to become idle.
index 2c6be03..d2ea460 100644 (file)
@@ -86,6 +86,7 @@ General Properties:
 
 Clock Properties:
 
+  - fsl,cksel        Timer reference clock source.
   - fsl,tclk-period  Timer reference clock period in nanoseconds.
   - fsl,tmr-prsc     Prescaler, divides the output clock.
   - fsl,tmr-add      Frequency compensation value.
@@ -97,7 +98,7 @@ Clock Properties:
   clock. You must choose these carefully for the clock to work right.
   Here is how to figure good values:
 
-  TimerOsc     = system clock               MHz
+  TimerOsc     = selected reference clock   MHz
   tclk_period  = desired clock period       nanoseconds
   NominalFreq  = 1000 / tclk_period         MHz
   FreqDivRatio = TimerOsc / NominalFreq     (must be greater that 1.0)
@@ -114,6 +115,20 @@ Clock Properties:
   Pulse Per Second (PPS) signal, since this will be offered to the PPS
   subsystem to synchronize the Linux clock.
 
+  Reference clock source is determined by the value, which is holded
+  in CKSEL bits in TMR_CTRL register. "fsl,cksel" property keeps the
+  value, which will be directly written in those bits, that is why,
+  according to reference manual, the next clock sources can be used:
+
+  <0> - external high precision timer reference clock (TSEC_TMR_CLK
+        input is used for this purpose);
+  <1> - eTSEC system clock;
+  <2> - eTSEC1 transmit clock;
+  <3> - RTC clock input.
+
+  When this attribute is not used, eTSEC system clock will serve as
+  IEEE 1588 timer reference clock.
+
 Example:
 
        ptp_clock@24E00 {
@@ -121,6 +136,7 @@ Example:
                reg = <0x24E00 0xB0>;
                interrupts = <12 0x8 13 0x8>;
                interrupt-parent = < &ipic >;
+               fsl,cksel       = <1>;
                fsl,tclk-period = <10>;
                fsl,tmr-prsc    = <100>;
                fsl,tmr-add     = <0x999999A4>;
index eabcb4b..e216af3 100644 (file)
@@ -1,4 +1,4 @@
-* Synopsis Designware PCIe interface
+* Synopsys Designware PCIe interface
 
 Required properties:
 - compatible: should contain "snps,dw-pcie" to identify the
index 1a036cd..fcbb736 100644 (file)
@@ -480,6 +480,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        Format: <io>,<irq>,<mode>
                        See header of drivers/net/hamradio/baycom_ser_hdx.c.
 
+       blkdevparts=    Manual partition parsing of block device(s) for
+                       embedded devices based on command line input.
+                       See Documentation/block/cmdline-partition.txt
+
        boot_delay=     Milliseconds to delay each printk during boot.
                        Values larger than 10 seconds (10000) are changed to
                        no delay (0).
@@ -1357,7 +1361,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        pages. In the event, a node is too small to have both
                        kernelcore and Movable pages, kernelcore pages will
                        take priority and other nodes will have a larger number
-                       of kernelcore pages.  The Movable zone is used for the
+                       of Movable pages.  The Movable zone is used for the
                        allocation of pages that may be reclaimed or moved
                        by the page migration subsystem.  This means that
                        HugeTLB pages may not be allocated from this zone.
@@ -3485,6 +3489,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                                the unplug protocol
                        never -- do not unplug even if version check succeeds
 
+       xen_nopvspin    [X86,XEN]
+                       Disables the ticketlock slowpath using Xen PV
+                       optimizations.
+
        xirc2ps_cs=     [NET,PCMCIA]
                        Format:
                        <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
index a46ddb8..85c362d 100644 (file)
@@ -28,6 +28,7 @@ ALC269/270/275/276/28x/29x
   alc269-dmic          Enable ALC269(VA) digital mic workaround
   alc271-dmic          Enable ALC271X digital mic workaround
   inv-dmic             Inverted internal mic workaround
+  headset-mic          Indicates a combined headset (headphone+mic) jack
   lenovo-dock          Enables docking station I/O for some Lenovos
   dell-headset-multi   Headset jack, which can also be used as mic-in
   dell-headset-dock    Headset jack (without mic-in), and also dock I/O
@@ -296,6 +297,12 @@ Cirrus Logic CS4206/4207
   imac27       IMac 27 Inch
   auto         BIOS setup (default)
 
+Cirrus Logic CS4208
+===================
+  mba6         MacBook Air 6,1 and 6,2
+  gpio0                Enable GPIO 0 amp
+  auto         BIOS setup (default)
+
 VIA VT17xx/VT18xx/VT20xx
 ========================
   auto         BIOS setup (default)
diff --git a/Documentation/virtual/kvm/00-INDEX b/Documentation/virtual/kvm/00-INDEX
new file mode 100644 (file)
index 0000000..641ec92
--- /dev/null
@@ -0,0 +1,24 @@
+00-INDEX
+       - this file.
+api.txt
+       - KVM userspace API.
+cpuid.txt
+       - KVM-specific cpuid leaves (x86).
+devices/
+       - KVM_CAP_DEVICE_CTRL userspace API.
+hypercalls.txt
+       - KVM hypercalls.
+locking.txt
+       - notes on KVM locks.
+mmu.txt
+       - the x86 kvm shadow mmu.
+msr.txt
+       - KVM-specific MSRs (x86).
+nested-vmx.txt
+       - notes on nested virtualization for Intel x86 processors.
+ppc-pv.txt
+       - the paravirtualization interface on PowerPC.
+review-checklist.txt
+       - review checklist for KVM patches.
+timekeeping.txt
+       - timekeeping virtualization for x86-based architectures.
index 858aecf..a30035d 100644 (file)
@@ -1122,9 +1122,9 @@ struct kvm_cpuid2 {
        struct kvm_cpuid_entry2 entries[0];
 };
 
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
-#define KVM_CPUID_FLAG_STATEFUL_FUNC    2
-#define KVM_CPUID_FLAG_STATE_READ_NEXT  4
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX                BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC           BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT         BIT(2)
 
 struct kvm_cpuid_entry2 {
        __u32 function;
@@ -1810,6 +1810,50 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_TLB3PS   | 32
   PPC   | KVM_REG_PPC_EPTCFG   | 32
   PPC   | KVM_REG_PPC_ICP_STATE | 64
+  PPC   | KVM_REG_PPC_TB_OFFSET        | 64
+  PPC   | KVM_REG_PPC_SPMC1    | 32
+  PPC   | KVM_REG_PPC_SPMC2    | 32
+  PPC   | KVM_REG_PPC_IAMR     | 64
+  PPC   | KVM_REG_PPC_TFHAR    | 64
+  PPC   | KVM_REG_PPC_TFIAR    | 64
+  PPC   | KVM_REG_PPC_TEXASR   | 64
+  PPC   | KVM_REG_PPC_FSCR     | 64
+  PPC   | KVM_REG_PPC_PSPB     | 32
+  PPC   | KVM_REG_PPC_EBBHR    | 64
+  PPC   | KVM_REG_PPC_EBBRR    | 64
+  PPC   | KVM_REG_PPC_BESCR    | 64
+  PPC   | KVM_REG_PPC_TAR      | 64
+  PPC   | KVM_REG_PPC_DPDES    | 64
+  PPC   | KVM_REG_PPC_DAWR     | 64
+  PPC   | KVM_REG_PPC_DAWRX    | 64
+  PPC   | KVM_REG_PPC_CIABR    | 64
+  PPC   | KVM_REG_PPC_IC       | 64
+  PPC   | KVM_REG_PPC_VTB      | 64
+  PPC   | KVM_REG_PPC_CSIGR    | 64
+  PPC   | KVM_REG_PPC_TACR     | 64
+  PPC   | KVM_REG_PPC_TCSCR    | 64
+  PPC   | KVM_REG_PPC_PID      | 64
+  PPC   | KVM_REG_PPC_ACOP     | 64
+  PPC   | KVM_REG_PPC_VRSAVE   | 32
+  PPC   | KVM_REG_PPC_LPCR     | 64
+  PPC   | KVM_REG_PPC_PPR      | 64
+  PPC   | KVM_REG_PPC_ARCH_COMPAT 32
+  PPC   | KVM_REG_PPC_TM_GPR0  | 64
+          ...
+  PPC   | KVM_REG_PPC_TM_GPR31 | 64
+  PPC   | KVM_REG_PPC_TM_VSR0  | 128
+          ...
+  PPC   | KVM_REG_PPC_TM_VSR63 | 128
+  PPC   | KVM_REG_PPC_TM_CR    | 64
+  PPC   | KVM_REG_PPC_TM_LR    | 64
+  PPC   | KVM_REG_PPC_TM_CTR   | 64
+  PPC   | KVM_REG_PPC_TM_FPSCR | 64
+  PPC   | KVM_REG_PPC_TM_AMR   | 64
+  PPC   | KVM_REG_PPC_TM_PPR   | 64
+  PPC   | KVM_REG_PPC_TM_VRSAVE        | 64
+  PPC   | KVM_REG_PPC_TM_VSCR  | 32
+  PPC   | KVM_REG_PPC_TM_DSCR  | 64
+  PPC   | KVM_REG_PPC_TM_TAR   | 64
 
 ARM registers are mapped using the lower 32 bits.  The upper 16 of that
 is the register group type, or coprocessor number:
@@ -2304,7 +2348,31 @@ Possible features:
          Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
 
 
-4.83 KVM_GET_REG_LIST
+4.83 KVM_ARM_PREFERRED_TARGET
+
+Capability: basic
+Architectures: arm, arm64
+Type: vm ioctl
+Parameters: struct struct kvm_vcpu_init (out)
+Returns: 0 on success; -1 on error
+Errors:
+  ENODEV:    no preferred target available for the host
+
+This queries KVM for preferred CPU target type which can be emulated
+by KVM on underlying host.
+
+The ioctl returns struct kvm_vcpu_init instance containing information
+about preferred CPU target type and recommended features for it.  The
+kvm_vcpu_init->features bitmap returned will have feature bits set if
+the preferred target recommends setting these features, but this is
+not mandatory.
+
+The information returned by this ioctl can be used to prepare an instance
+of struct kvm_vcpu_init for KVM_ARM_VCPU_INIT ioctl which will result in
+in VCPU matching underlying host.
+
+
+4.84 KVM_GET_REG_LIST
 
 Capability: basic
 Architectures: arm, arm64
@@ -2323,8 +2391,7 @@ struct kvm_reg_list {
 This ioctl returns the guest registers that are supported for the
 KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
 
-
-4.84 KVM_ARM_SET_DEVICE_ADDR
+4.85 KVM_ARM_SET_DEVICE_ADDR
 
 Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
 Architectures: arm, arm64
@@ -2362,7 +2429,7 @@ must be called after calling KVM_CREATE_IRQCHIP, but before calling
 KVM_RUN on any of the VCPUs.  Calling this ioctl twice for any of the
 base addresses will return -EEXIST.
 
-4.85 KVM_PPC_RTAS_DEFINE_TOKEN
+4.86 KVM_PPC_RTAS_DEFINE_TOKEN
 
 Capability: KVM_CAP_PPC_RTAS
 Architectures: ppc
@@ -2661,6 +2728,77 @@ and usually define the validity of a groups of registers. (e.g. one bit
 };
 
 
+4.81 KVM_GET_EMULATED_CPUID
+
+Capability: KVM_CAP_EXT_EMUL_CPUID
+Architectures: x86
+Type: system ioctl
+Parameters: struct kvm_cpuid2 (in/out)
+Returns: 0 on success, -1 on error
+
+struct kvm_cpuid2 {
+       __u32 nent;
+       __u32 flags;
+       struct kvm_cpuid_entry2 entries[0];
+};
+
+The member 'flags' is used for passing flags from userspace.
+
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX                BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC           BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT         BIT(2)
+
+struct kvm_cpuid_entry2 {
+       __u32 function;
+       __u32 index;
+       __u32 flags;
+       __u32 eax;
+       __u32 ebx;
+       __u32 ecx;
+       __u32 edx;
+       __u32 padding[3];
+};
+
+This ioctl returns x86 cpuid features which are emulated by
+kvm.Userspace can use the information returned by this ioctl to query
+which features are emulated by kvm instead of being present natively.
+
+Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2
+structure with the 'nent' field indicating the number of entries in
+the variable-size array 'entries'. If the number of entries is too low
+to describe the cpu capabilities, an error (E2BIG) is returned. If the
+number is too high, the 'nent' field is adjusted and an error (ENOMEM)
+is returned. If the number is just right, the 'nent' field is adjusted
+to the number of valid entries in the 'entries' array, which is then
+filled.
+
+The entries returned are the set CPUID bits of the respective features
+which kvm emulates, as returned by the CPUID instruction, with unknown
+or unsupported feature bits cleared.
+
+Features like x2apic, for example, may not be present in the host cpu
+but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be
+emulated efficiently and thus not included here.
+
+The fields in each entry are defined as follows:
+
+  function: the eax value used to obtain the entry
+  index: the ecx value used to obtain the entry (for entries that are
+         affected by ecx)
+  flags: an OR of zero or more of the following:
+        KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
+           if the index field is valid
+        KVM_CPUID_FLAG_STATEFUL_FUNC:
+           if cpuid for this function returns different values for successive
+           invocations; there will be several entries with the same function,
+           all with this flag set
+        KVM_CPUID_FLAG_STATE_READ_NEXT:
+           for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
+           the first entry to be read by a cpu
+   eax, ebx, ecx, edx: the values returned by the cpuid instruction for
+         this function/index combination
+
+
 6. Capabilities that can be enabled
 -----------------------------------
 
index 22ff659..3c65feb 100644 (file)
@@ -43,6 +43,13 @@ KVM_FEATURE_CLOCKSOURCE2           ||     3 || kvmclock available at msrs
 KVM_FEATURE_ASYNC_PF               ||     4 || async pf can be enabled by
                                    ||       || writing to msr 0x4b564d02
 ------------------------------------------------------------------------------
+KVM_FEATURE_STEAL_TIME             ||     5 || steal time can be enabled by
+                                   ||       || writing to msr 0x4b564d03.
+------------------------------------------------------------------------------
+KVM_FEATURE_PV_EOI                 ||     6 || paravirtualized end of interrupt
+                                   ||       || handler can be enabled by writing
+                                   ||       || to msr 0x4b564d04.
+------------------------------------------------------------------------------
 KVM_FEATURE_PV_UNHALT              ||     7 || guest checks this feature bit
                                    ||       || before enabling paravirtualized
                                    ||       || spinlock support.
diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt
new file mode 100644 (file)
index 0000000..ef51740
--- /dev/null
@@ -0,0 +1,22 @@
+VFIO virtual device
+===================
+
+Device types supported:
+  KVM_DEV_TYPE_VFIO
+
+Only one VFIO instance may be created per VM.  The created device
+tracks VFIO groups in use by the VM and features of those groups
+important to the correctness and acceleration of the VM.  As groups
+are enabled and disabled for use by the VM, KVM should be updated
+about their presence.  When registered with KVM, a reference to the
+VFIO-group is held by KVM.
+
+Groups:
+  KVM_DEV_VFIO_GROUP
+
+KVM_DEV_VFIO_GROUP attributes:
+  KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking
+  KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking
+
+For each, kvm_device_attr.addr points to an int32_t file descriptor
+for the VFIO group.
index 41b7ac9..f886941 100644 (file)
@@ -132,10 +132,14 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update().
 ------------
 
 Name:          kvm_lock
-Type:          raw_spinlock
+Type:          spinlock_t
 Arch:          any
 Protects:      - vm_list
-               - hardware virtualization enable/disable
+
+Name:          kvm_count_lock
+Type:          raw_spinlock_t
+Arch:          any
+Protects:      - hardware virtualization enable/disable
 Comment:       'raw' because hardware enabling/disabling must be atomic /wrt
                migration.
 
@@ -151,3 +155,14 @@ Type:              spinlock_t
 Arch:          any
 Protects:      -shadow page/shadow tlb entry
 Comment:       it is a spinlock since it is used in mmu notifier.
+
+Name:          kvm->srcu
+Type:          srcu lock
+Arch:          any
+Protects:      - kvm->memslots
+               - kvm->buses
+Comment:       The srcu read lock must be held while accessing memslots (e.g.
+               when using gfn_to_* functions) and while accessing in-kernel
+               MMIO/PIO address->device structure mapping (kvm->buses).
+               The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
+               if it is needed by multiple functions.
index e61c2e8..366f90c 100644 (file)
@@ -237,11 +237,11 @@ F:        drivers/platform/x86/acer-wmi.c
 
 ACPI
 M:     Len Brown <lenb@kernel.org>
-M:     Rafael J. Wysocki <rjw@sisk.pl>
+M:     Rafael J. Wysocki <rjw@rjwysocki.net>
 L:     linux-acpi@vger.kernel.org
-W:     http://www.lesswatts.org/projects/acpi/
-Q:     http://patchwork.kernel.org/project/linux-acpi/list/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux
+W:     https://01.org/linux-acpi
+Q:     https://patchwork.kernel.org/project/linux-acpi/list/
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
 S:     Supported
 F:     drivers/acpi/
 F:     drivers/pnp/pnpacpi/
@@ -256,21 +256,21 @@ F:        drivers/pci/*/*/*acpi*
 ACPI FAN DRIVER
 M:     Zhang Rui <rui.zhang@intel.com>
 L:     linux-acpi@vger.kernel.org
-W:     http://www.lesswatts.org/projects/acpi/
+W:     https://01.org/linux-acpi
 S:     Supported
 F:     drivers/acpi/fan.c
 
 ACPI THERMAL DRIVER
 M:     Zhang Rui <rui.zhang@intel.com>
 L:     linux-acpi@vger.kernel.org
-W:     http://www.lesswatts.org/projects/acpi/
+W:     https://01.org/linux-acpi
 S:     Supported
 F:     drivers/acpi/*thermal*
 
 ACPI VIDEO DRIVER
 M:     Zhang Rui <rui.zhang@intel.com>
 L:     linux-acpi@vger.kernel.org
-W:     http://www.lesswatts.org/projects/acpi/
+W:     https://01.org/linux-acpi
 S:     Supported
 F:     drivers/acpi/video.c
 
@@ -824,15 +824,21 @@ S:        Maintained
 F:     arch/arm/mach-gemini/
 
 ARM/CSR SIRFPRIMA2 MACHINE SUPPORT
-M:     Barry Song <baohua.song@csr.com>
+M:     Barry Song <baohua@kernel.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/baohua/linux.git
 S:     Maintained
 F:     arch/arm/mach-prima2/
+F:     drivers/clk/clk-prima2.c
+F:     drivers/clocksource/timer-prima2.c
+F:     drivers/clocksource/timer-marco.c
 F:     drivers/dma/sirf-dma.c
 F:     drivers/i2c/busses/i2c-sirf.c
+F:     drivers/input/misc/sirfsoc-onkey.c
+F:     drivers/irqchip/irq-sirfsoc.c
 F:     drivers/mmc/host/sdhci-sirf.c
 F:     drivers/pinctrl/sirf/
+F:     drivers/rtc/rtc-sirfsoc.c
 F:     drivers/spi/spi-sirf.c
 
 ARM/EBSA110 MACHINE SUPPORT
@@ -1812,7 +1818,8 @@ S:        Supported
 F:     drivers/net/ethernet/broadcom/bnx2x/
 
 BROADCOM BCM281XX/BCM11XXX ARM ARCHITECTURE
-M:     Christian Daudt <csd@broadcom.com>
+M:     Christian Daudt <bcm@fixthebug.org>
+L:     bcm-kernel-feedback-list@broadcom.com
 T:     git git://git.github.com/broadcom/bcm11351
 S:     Maintained
 F:     arch/arm/mach-bcm/
@@ -2293,7 +2300,7 @@ S:        Maintained
 F:     drivers/net/ethernet/ti/cpmac.c
 
 CPU FREQUENCY DRIVERS
-M:     Rafael J. Wysocki <rjw@sisk.pl>
+M:     Rafael J. Wysocki <rjw@rjwysocki.net>
 M:     Viresh Kumar <viresh.kumar@linaro.org>
 L:     cpufreq@vger.kernel.org
 L:     linux-pm@vger.kernel.org
@@ -2324,7 +2331,7 @@ S:      Maintained
 F:      drivers/cpuidle/cpuidle-big_little.c
 
 CPUIDLE DRIVERS
-M:     Rafael J. Wysocki <rjw@sisk.pl>
+M:     Rafael J. Wysocki <rjw@rjwysocki.net>
 M:     Daniel Lezcano <daniel.lezcano@linaro.org>
 L:     linux-pm@vger.kernel.org
 S:     Maintained
@@ -2639,6 +2646,18 @@ F:       include/linux/device-mapper.h
 F:     include/linux/dm-*.h
 F:     include/uapi/linux/dm-*.h
 
+DIGI NEO AND CLASSIC PCI PRODUCTS
+M:     Lidza Louina <lidza.louina@gmail.com>
+L:     driverdev-devel@linuxdriverproject.org
+S:     Maintained
+F:     drivers/staging/dgnc/
+
+DIGI EPCA PCI PRODUCTS
+M:     Lidza Louina <lidza.louina@gmail.com>
+L:     driverdev-devel@linuxdriverproject.org
+S:     Maintained
+F:     drivers/staging/dgap/
+
 DIOLAN U2C-12 I2C DRIVER
 M:     Guenter Roeck <linux@roeck-us.net>
 L:     linux-i2c@vger.kernel.org
@@ -3534,7 +3553,7 @@ F:        fs/freevxfs/
 
 FREEZER
 M:     Pavel Machek <pavel@ucw.cz>
-M:     "Rafael J. Wysocki" <rjw@sisk.pl>
+M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
 L:     linux-pm@vger.kernel.org
 S:     Supported
 F:     Documentation/power/freezing-of-tasks.txt
@@ -3870,7 +3889,7 @@ F:        drivers/video/hgafb.c
 
 HIBERNATION (aka Software Suspend, aka swsusp)
 M:     Pavel Machek <pavel@ucw.cz>
-M:     "Rafael J. Wysocki" <rjw@sisk.pl>
+M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
 L:     linux-pm@vger.kernel.org
 S:     Supported
 F:     arch/x86/power/
@@ -4320,7 +4339,7 @@ F:        drivers/video/i810/
 INTEL MENLOW THERMAL DRIVER
 M:     Sujith Thomas <sujith.thomas@intel.com>
 L:     platform-driver-x86@vger.kernel.org
-W:     http://www.lesswatts.org/projects/acpi/
+W:     https://01.org/linux-acpi
 S:     Supported
 F:     drivers/platform/x86/intel_menlow.c
 
@@ -4457,6 +4476,13 @@ L:       linux-serial@vger.kernel.org
 S:     Maintained
 F:     drivers/tty/serial/ioc3_serial.c
 
+IOMMU DRIVERS
+M:     Joerg Roedel <joro@8bytes.org>
+L:     iommu@lists.linux-foundation.org
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
+S:     Maintained
+F:     drivers/iommu/
+
 IP MASQUERADING
 M:     Juanjo Ciarlante <jjciarla@raiz.uncu.edu.ar>
 S:     Maintained
@@ -4788,7 +4814,8 @@ KERNEL VIRTUAL MACHINE (KVM)
 M:     Gleb Natapov <gleb@redhat.com>
 M:     Paolo Bonzini <pbonzini@redhat.com>
 L:     kvm@vger.kernel.org
-W:     http://linux-kvm.org
+W:     http://www.linux-kvm.org
+T:     git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
 S:     Supported
 F:     Documentation/*/kvm*.txt
 F:     Documentation/virtual/kvm/
@@ -6595,7 +6622,7 @@ S:        Obsolete
 F:     drivers/net/wireless/prism54/
 
 PROMISE SATA TX2/TX4 CONTROLLER LIBATA DRIVER
-M:     Mikael Pettersson <mikpe@it.uu.se>
+M:     Mikael Pettersson <mikpelinux@gmail.com>
 L:     linux-ide@vger.kernel.org
 S:     Maintained
 F:     drivers/ata/sata_promise.*
@@ -7258,9 +7285,9 @@ F:        include/linux/sched.h
 F:     include/uapi/linux/sched.h
 
 SCORE ARCHITECTURE
-M:     Chen Liqin <liqin.chen@sunplusct.com>
+M:     Chen Liqin <liqin.linux@gmail.com>
 M:     Lennox Wu <lennox.wu@gmail.com>
-W:     http://www.sunplusct.com
+W:     http://www.sunplus.com
 S:     Supported
 F:     arch/score/
 
@@ -8069,7 +8096,7 @@ F:        drivers/sh/
 SUSPEND TO RAM
 M:     Len Brown <len.brown@intel.com>
 M:     Pavel Machek <pavel@ucw.cz>
-M:     "Rafael J. Wysocki" <rjw@sisk.pl>
+M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
 L:     linux-pm@vger.kernel.org
 S:     Supported
 F:     Documentation/power/
@@ -8724,9 +8751,8 @@ F:        Documentation/hid/hiddev.txt
 F:     drivers/hid/usbhid/
 
 USB/IP DRIVERS
-M:     Matt Mooney <mfm@muteddisk.com>
 L:     linux-usb@vger.kernel.org
-S:     Maintained
+S:     Orphan
 F:     drivers/staging/usbip/
 
 USB ISP116X DRIVER
@@ -9366,6 +9392,7 @@ F:        arch/arm64/include/asm/xen/
 
 XEN NETWORK BACKEND DRIVER
 M:     Ian Campbell <ian.campbell@citrix.com>
+M:     Wei Liu <wei.liu2@citrix.com>
 L:     xen-devel@lists.xenproject.org (moderated for non-subscribers)
 L:     netdev@vger.kernel.org
 S:     Supported
index 8d0668f..deec08b 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 12
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc5
 NAME = One Giant Leap for Frogkind
 
 # *DOCUMENTATION*
index 1feb169..af2cc6e 100644 (file)
@@ -286,9 +286,6 @@ config HAVE_PERF_USER_STACK_DUMP
 config HAVE_ARCH_JUMP_LABEL
        bool
 
-config HAVE_ARCH_MUTEX_CPU_RELAX
-       bool
-
 config HAVE_RCU_TABLE_FREE
        bool
 
index f158197..b6a8c2d 100644 (file)
@@ -45,7 +45,14 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
-       lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
+       unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__;
+
+       __asm__ __volatile__(
+       "       ex  %0, [%1]            \n"
+       : "+r" (tmp)
+       : "r"(&(lock->slock))
+       : "memory");
+
        smp_mb();
 }
 
index 3242082..30c9baf 100644 (file)
@@ -43,7 +43,7 @@
  * Because it essentially checks if buffer end is within limit and @len is
  * non-ngeative, which implies that buffer start will be within limit too.
  *
- * The reason for rewriting being, for majorit yof cases, @len is generally
+ * The reason for rewriting being, for majoritof cases, @len is generally
  * compile time constant, causing first sub-expression to be compile time
  * subsumed.
  *
@@ -53,7 +53,7 @@
  *
  */
 #define __user_ok(addr, sz)    (((sz) <= TASK_SIZE) && \
-                                (((addr)+(sz)) <= get_fs()))
+                                ((addr) <= (get_fs() - (sz))))
 #define __access_ok(addr, sz)  (unlikely(__kernel_ok) || \
                                 likely(__user_ok((addr), (sz))))
 
index 3332385..5d76706 100644 (file)
@@ -102,7 +102,7 @@ static int genregs_set(struct task_struct *target,
        REG_IGNORE_ONE(pad2);
        REG_IN_CHUNK(callee, efa, cregs);       /* callee_regs[r25..r13] */
        REG_IGNORE_ONE(efa);                    /* efa update invalid */
-       REG_IN_ONE(stop_pc, &ptregs->ret);      /* stop_pc: PC update */
+       REG_IGNORE_ONE(stop_pc);                        /* PC updated via @ret */
 
        return ret;
 }
index ee6ef2f..7e95e1a 100644 (file)
@@ -101,7 +101,6 @@ SYSCALL_DEFINE0(rt_sigreturn)
 {
        struct rt_sigframe __user *sf;
        unsigned int magic;
-       int err;
        struct pt_regs *regs = current_pt_regs();
 
        /* Always make any pending restarted system calls return -EINTR */
@@ -119,15 +118,16 @@ SYSCALL_DEFINE0(rt_sigreturn)
        if (!access_ok(VERIFY_READ, sf, sizeof(*sf)))
                goto badframe;
 
-       err = restore_usr_regs(regs, sf);
-       err |= __get_user(magic, &sf->sigret_magic);
-       if (err)
+       if (__get_user(magic, &sf->sigret_magic))
                goto badframe;
 
        if (unlikely(is_do_ss_needed(magic)))
                if (restore_altstack(&sf->uc.uc_stack))
                        goto badframe;
 
+       if (restore_usr_regs(regs, sf))
+               goto badframe;
+
        /* Don't restart from sigreturn */
        syscall_wont_restart(regs);
 
@@ -191,6 +191,15 @@ setup_rt_frame(int signo, struct k_sigaction *ka, siginfo_t *info,
                return 1;
 
        /*
+        * w/o SA_SIGINFO, struct ucontext is partially populated (only
+        * uc_mcontext/uc_sigmask) for kernel's normal user state preservation
+        * during signal handler execution. This works for SA_SIGINFO as well
+        * although the semantics are now overloaded (the same reg state can be
+        * inspected by userland: but are they allowed to fiddle with it ?
+        */
+       err |= stash_usr_regs(sf, regs, set);
+
+       /*
         * SA_SIGINFO requires 3 args to signal handler:
         *  #1: sig-no (common to any handler)
         *  #2: struct siginfo
@@ -213,14 +222,6 @@ setup_rt_frame(int signo, struct k_sigaction *ka, siginfo_t *info,
                magic = MAGIC_SIGALTSTK;
        }
 
-       /*
-        * w/o SA_SIGINFO, struct ucontext is partially populated (only
-        * uc_mcontext/uc_sigmask) for kernel's normal user state preservation
-        * during signal handler execution. This works for SA_SIGINFO as well
-        * although the semantics are now overloaded (the same reg state can be
-        * inspected by userland: but are they allowed to fiddle with it ?
-        */
-       err |= stash_usr_regs(sf, regs, set);
        err |= __put_user(magic, &sf->sigret_magic);
        if (err)
                return err;
index 0e51e69..3fde7de 100644 (file)
@@ -227,12 +227,9 @@ void __attribute__((weak)) arc_local_timer_setup(unsigned int cpu)
 {
        struct clock_event_device *clk = &per_cpu(arc_clockevent_device, cpu);
 
-       clockevents_calc_mult_shift(clk, arc_get_core_freq(), 5);
-
-       clk->max_delta_ns = clockevent_delta2ns(ARC_TIMER_MAX, clk);
        clk->cpumask = cpumask_of(cpu);
-
-       clockevents_register_device(clk);
+       clockevents_config_and_register(clk, arc_get_core_freq(),
+                                       0, ARC_TIMER_MAX);
 
        /*
         * setup the per-cpu timer IRQ handler - for all cpus
index 28d1700..7ff5b5c 100644 (file)
@@ -245,6 +245,12 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs,
                regs->status32 &= ~STATUS_DE_MASK;
        } else {
                regs->ret += state.instr_len;
+
+               /* handle zero-overhead-loop */
+               if ((regs->ret == regs->lp_end) && (regs->lp_count)) {
+                       regs->ret = regs->lp_start;
+                       regs->lp_count--;
+               }
        }
 
        return 0;
index 3f7714d..1ad6fb6 100644 (file)
@@ -2217,8 +2217,7 @@ config NEON
 
 config KERNEL_MODE_NEON
        bool "Support for NEON in kernel mode"
-       default n
-       depends on NEON
+       depends on NEON && AEABI
        help
          Say Y to include support for NEON in kernel mode.
 
index a37a50f..db50b62 100644 (file)
@@ -296,10 +296,15 @@ archprepare:
 # Convert bzImage to zImage
 bzImage: zImage
 
-zImage Image xipImage bootpImage uImage: vmlinux
+BOOT_TARGETS   = zImage Image xipImage bootpImage uImage
+INSTALL_TARGETS        = zinstall uinstall install
+
+PHONY += bzImage $(BOOT_TARGETS) $(INSTALL_TARGETS)
+
+$(BOOT_TARGETS): vmlinux
        $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@
 
-zinstall uinstall install: vmlinux
+$(INSTALL_TARGETS):
        $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@
 
 %.dtb: | scripts
index 84aa2ca..ec2f806 100644 (file)
@@ -95,24 +95,24 @@ initrd:
        @test "$(INITRD)" != "" || \
        (echo You must specify INITRD; exit -1)
 
-install: $(obj)/Image
-       $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
+install:
+       $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
        $(obj)/Image System.map "$(INSTALL_PATH)"
 
-zinstall: $(obj)/zImage
-       $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
+zinstall:
+       $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
        $(obj)/zImage System.map "$(INSTALL_PATH)"
 
-uinstall: $(obj)/uImage
-       $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
+uinstall:
+       $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
        $(obj)/uImage System.map "$(INSTALL_PATH)"
 
 zi:
-       $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
+       $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
        $(obj)/zImage System.map "$(INSTALL_PATH)"
 
 i:
-       $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
+       $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
        $(obj)/Image System.map "$(INSTALL_PATH)"
 
 subdir-            := bootp compressed dts
index e95af3f..802720e 100644 (file)
@@ -41,6 +41,8 @@ dtb-$(CONFIG_ARCH_AT91)       += sama5d33ek.dtb
 dtb-$(CONFIG_ARCH_AT91)        += sama5d34ek.dtb
 dtb-$(CONFIG_ARCH_AT91)        += sama5d35ek.dtb
 
+dtb-$(CONFIG_ARCH_ATLAS6) += atlas6-evb.dtb
+
 dtb-$(CONFIG_ARCH_BCM2835) += bcm2835-rpi-b.dtb
 dtb-$(CONFIG_ARCH_BCM) += bcm11351-brt.dtb \
        bcm28155-ap.dtb
index 05e4485..8ac2ac1 100644 (file)
        };
 
        soc {
+               ranges = <MBUS_ID(0xf0, 0x01) 0 0xd0000000 0x100000
+                         MBUS_ID(0x01, 0xe0) 0 0xfff00000 0x100000>;
+
+               pcie-controller {
+                       status = "okay";
+
+                       /* Connected to Marvell SATA controller */
+                       pcie@1,0 {
+                               /* Port 0, Lane 0 */
+                               status = "okay";
+                       };
+
+                       /* Connected to FL1009 USB 3.0 controller */
+                       pcie@2,0 {
+                               /* Port 1, Lane 0 */
+                               status = "okay";
+                       };
+               };
+
                internal-regs {
                        serial@12000 {
                                clock-frequency = <200000000>;
                                        marvell,pins = "mpp56";
                                        marvell,function = "gpio";
                                };
+
+                               poweroff: poweroff {
+                                       marvell,pins = "mpp8";
+                                       marvell,function = "gpio";
+                               };
                        };
 
                        mdio {
                                        pwm_polarity = <0>;
                                };
                        };
-
-                       pcie-controller {
-                               status = "okay";
-
-                               /* Connected to Marvell SATA controller */
-                               pcie@1,0 {
-                                       /* Port 0, Lane 0 */
-                                       status = "okay";
-                               };
-
-                               /* Connected to FL1009 USB 3.0 controller */
-                               pcie@2,0 {
-                                       /* Port 1, Lane 0 */
-                                       status = "okay";
-                               };
-                       };
                };
        };
 
                button@1 {
                        label = "Power Button";
                        linux,code = <116>;     /* KEY_POWER */
-                       gpios = <&gpio1 30 1>;
+                       gpios = <&gpio1 30 0>;
                };
 
                button@2 {
                };
        };
 
+       gpio_poweroff {
+               compatible = "gpio-poweroff";
+               pinctrl-0 = <&poweroff>;
+               pinctrl-names = "default";
+               gpios = <&gpio0 8 1>;
+       };
+
 };
index def125c..3058522 100644 (file)
@@ -70,6 +70,8 @@
 
                        timer@20300 {
                                compatible = "marvell,armada-xp-timer";
+                               clocks = <&coreclk 2>, <&refclk>;
+                               clock-names = "nbclk", "fixed";
                        };
 
                        coreclk: mvebu-sar@18230 {
                        };
                };
        };
+
+       clocks {
+               /* 25 MHz reference crystal */
+               refclk: oscillator {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+                       clock-frequency = <25000000>;
+               };
+       };
 };
index cf78ac0..e74dc15 100644 (file)
                                                         AT91_PIOA 8 AT91_PERIPH_A AT91_PINCTRL_NONE>;  /* PA8 periph A */
                                        };
 
-                                       pinctrl_uart2_rts: uart2_rts-0 {
+                                       pinctrl_usart2_rts: usart2_rts-0 {
                                                atmel,pins =
                                                        <AT91_PIOB 0 AT91_PERIPH_B AT91_PINCTRL_NONE>;  /* PB0 periph B */
                                        };
 
-                                       pinctrl_uart2_cts: uart2_cts-0 {
+                                       pinctrl_usart2_cts: usart2_cts-0 {
                                                atmel,pins =
                                                        <AT91_PIOB 1 AT91_PERIPH_B AT91_PINCTRL_NONE>;  /* PB1 periph B */
                                        };
                                interrupts = <12 IRQ_TYPE_LEVEL_HIGH 0>;
                                dmas = <&dma0 1 AT91_DMA_CFG_PER_ID(0)>;
                                dma-names = "rxtx";
+                               pinctrl-names = "default";
                                #address-cells = <1>;
                                #size-cells = <0>;
                                status = "disabled";
                                interrupts = <26 IRQ_TYPE_LEVEL_HIGH 0>;
                                dmas = <&dma1 1 AT91_DMA_CFG_PER_ID(0)>;
                                dma-names = "rxtx";
+                               pinctrl-names = "default";
                                #address-cells = <1>;
                                #size-cells = <0>;
                                status = "disabled";
index 8678e0c..6db4f81 100644 (file)
                                interrupts = <17>;
                                fifosize = <128>;
                                clocks = <&clks 13>;
+                               sirf,uart-dma-rx-channel = <21>;
+                               sirf,uart-dma-tx-channel = <2>;
                        };
 
                        uart1: uart@b0060000 {
                                interrupts = <19>;
                                fifosize = <128>;
                                clocks = <&clks 15>;
+                               sirf,uart-dma-rx-channel = <6>;
+                               sirf,uart-dma-tx-channel = <7>;
                        };
 
                        usp0: usp@b0080000 {
                                compatible = "sirf,prima2-usp";
                                reg = <0xb0080000 0x10000>;
                                interrupts = <20>;
+                               fifosize = <128>;
                                clocks = <&clks 28>;
+                               sirf,usp-dma-rx-channel = <17>;
+                               sirf,usp-dma-tx-channel = <18>;
                        };
 
                        usp1: usp@b0090000 {
                                compatible = "sirf,prima2-usp";
                                reg = <0xb0090000 0x10000>;
                                interrupts = <21>;
+                               fifosize = <128>;
                                clocks = <&clks 29>;
+                               sirf,usp-dma-rx-channel = <14>;
+                               sirf,usp-dma-tx-channel = <15>;
                        };
 
                        dmac0: dma-controller@b00b0000 {
                                compatible = "sirf,prima2-vip";
                                reg = <0xb00C0000 0x10000>;
                                clocks = <&clks 31>;
+                               interrupts = <14>;
+                               sirf,vip-dma-rx-channel = <16>;
                        };
 
                        spi0: spi@b00d0000 {
index 7d7cc77..bbac42a 100644 (file)
                             <1 14 0xf08>,
                             <1 11 0xf08>,
                             <1 10 0xf08>;
+               /* Unfortunately we need this since some versions of U-Boot
+                * on Exynos don't set the CNTFRQ register, so we need the
+                * value from DT.
+                */
+               clock-frequency = <24000000>;
        };
 
        mct@101C0000 {
index cf7aeaf..1335b2e 100644 (file)
@@ -13,6 +13,7 @@
                cpu@0 {
                        device_type = "cpu";
                        compatible = "marvell,feroceon";
+                       reg = <0>;
                        clocks = <&core_clk 1>, <&core_clk 3>, <&gate_clk 11>;
                        clock-names = "cpu_clk", "ddrclk", "powersave";
                };
                xor@60900 {
                        compatible = "marvell,orion-xor";
                        reg = <0x60900 0x100
-                              0xd0B00 0x100>;
+                              0x60B00 0x100>;
                        status = "okay";
                        clocks = <&gate_clk 16>;
 
index 0c514dc..2816bf6 100644 (file)
@@ -11,7 +11,7 @@
 
 / {
        model = "TI OMAP3 BeagleBoard xM";
-       compatible = "ti,omap3-beagle-xm", "ti,omap3-beagle", "ti,omap3";
+       compatible = "ti,omap3-beagle-xm", "ti,omap36xx", "ti,omap3";
 
        cpus {
                cpu@0 {
index 7d95cda..b41bd57 100644 (file)
                        #address-cells = <1>;
                        #size-cells = <0>;
                        pinctrl-single,register-width = <16>;
-                       pinctrl-single,function-mask = <0x7f1f>;
+                       pinctrl-single,function-mask = <0xff1f>;
                };
 
                omap3_pmx_wkup: pinmux@0x48002a00 {
                        #address-cells = <1>;
                        #size-cells = <0>;
                        pinctrl-single,register-width = <16>;
-                       pinctrl-single,function-mask = <0x7f1f>;
+                       pinctrl-single,function-mask = <0xff1f>;
                };
 
                gpio1: gpio@48310000 {
index bbeb623..27ed9f5 100644 (file)
                        compatible = "simple-bus";
                        #address-cells = <1>;
                        #size-cells = <1>;
-                       ranges = <0xb0000000 0xb0000000 0x180000>;
+                       ranges = <0xb0000000 0xb0000000 0x180000>,
+                              <0x56000000 0x56000000 0x1b00000>;
 
                        timer@b0020000 {
                                compatible = "sirf,prima2-tick";
                        uart0: uart@b0050000 {
                                cell-index = <0>;
                                compatible = "sirf,prima2-uart";
-                               reg = <0xb0050000 0x10000>;
+                               reg = <0xb0050000 0x1000>;
                                interrupts = <17>;
+                               fifosize = <128>;
                                clocks = <&clks 13>;
+                               sirf,uart-dma-rx-channel = <21>;
+                               sirf,uart-dma-tx-channel = <2>;
                        };
 
                        uart1: uart@b0060000 {
                                cell-index = <1>;
                                compatible = "sirf,prima2-uart";
-                               reg = <0xb0060000 0x10000>;
+                               reg = <0xb0060000 0x1000>;
                                interrupts = <18>;
+                               fifosize = <32>;
                                clocks = <&clks 14>;
                        };
 
                        uart2: uart@b0070000 {
                                cell-index = <2>;
                                compatible = "sirf,prima2-uart";
-                               reg = <0xb0070000 0x10000>;
+                               reg = <0xb0070000 0x1000>;
                                interrupts = <19>;
+                               fifosize = <128>;
                                clocks = <&clks 15>;
+                               sirf,uart-dma-rx-channel = <6>;
+                               sirf,uart-dma-tx-channel = <7>;
                        };
 
                        usp0: usp@b0080000 {
                                compatible = "sirf,prima2-usp";
                                reg = <0xb0080000 0x10000>;
                                interrupts = <20>;
+                               fifosize = <128>;
                                clocks = <&clks 28>;
+                               sirf,usp-dma-rx-channel = <17>;
+                               sirf,usp-dma-tx-channel = <18>;
                        };
 
                        usp1: usp@b0090000 {
                                compatible = "sirf,prima2-usp";
                                reg = <0xb0090000 0x10000>;
                                interrupts = <21>;
+                               fifosize = <128>;
                                clocks = <&clks 29>;
+                               sirf,usp-dma-rx-channel = <14>;
+                               sirf,usp-dma-tx-channel = <15>;
                        };
 
                        usp2: usp@b00a0000 {
                                compatible = "sirf,prima2-usp";
                                reg = <0xb00a0000 0x10000>;
                                interrupts = <22>;
+                               fifosize = <128>;
                                clocks = <&clks 30>;
+                               sirf,usp-dma-rx-channel = <10>;
+                               sirf,usp-dma-tx-channel = <11>;
                        };
 
                        dmac0: dma-controller@b00b0000 {
                                compatible = "sirf,prima2-vip";
                                reg = <0xb00C0000 0x10000>;
                                clocks = <&clks 31>;
+                               interrupts = <14>;
+                               sirf,vip-dma-rx-channel = <16>;
                        };
 
                        spi0: spi@b00d0000 {
index 6c26caa..658fcc5 100644 (file)
        };
 
        sdhi0: sdhi@ee100000 {
-               compatible = "renesas,r8a73a4-sdhi";
+               compatible = "renesas,sdhi-r8a73a4";
                reg = <0 0xee100000 0 0x100>;
                interrupt-parent = <&gic>;
                interrupts = <0 165 4>;
        };
 
        sdhi1: sdhi@ee120000 {
-               compatible = "renesas,r8a73a4-sdhi";
+               compatible = "renesas,sdhi-r8a73a4";
                reg = <0 0xee120000 0 0x100>;
                interrupt-parent = <&gic>;
                interrupts = <0 166 4>;
        };
 
        sdhi2: sdhi@ee140000 {
-               compatible = "renesas,r8a73a4-sdhi";
+               compatible = "renesas,sdhi-r8a73a4";
                reg = <0 0xee140000 0 0x100>;
                interrupt-parent = <&gic>;
                interrupts = <0 167 4>;
index 45ac404..3577aba 100644 (file)
@@ -96,6 +96,5 @@
        pfc: pfc@fffc0000 {
                compatible = "renesas,pfc-r8a7778";
                reg = <0xfffc000 0x118>;
-               #gpio-range-cells = <3>;
        };
 };
index 23a6244..ebbe507 100644 (file)
        pfc: pfc@fffc0000 {
                compatible = "renesas,pfc-r8a7779";
                reg = <0xfffc0000 0x23c>;
-               #gpio-range-cells = <3>;
        };
 
        thermal@ffc48000 {
index 3b879e7..413b4c2 100644 (file)
        pfc: pfc@e6060000 {
                compatible = "renesas,pfc-r8a7790";
                reg = <0 0xe6060000 0 0x250>;
-               #gpio-range-cells = <3>;
        };
 
        sdhi0: sdhi@ee100000 {
-               compatible = "renesas,r8a7790-sdhi";
+               compatible = "renesas,sdhi-r8a7790";
                reg = <0 0xee100000 0 0x100>;
                interrupt-parent = <&gic>;
                interrupts = <0 165 4>;
        };
 
        sdhi1: sdhi@ee120000 {
-               compatible = "renesas,r8a7790-sdhi";
+               compatible = "renesas,sdhi-r8a7790";
                reg = <0 0xee120000 0 0x100>;
                interrupt-parent = <&gic>;
                interrupts = <0 166 4>;
        };
 
        sdhi2: sdhi@ee140000 {
-               compatible = "renesas,r8a7790-sdhi";
+               compatible = "renesas,sdhi-r8a7790";
                reg = <0 0xee140000 0 0x100>;
                interrupt-parent = <&gic>;
                interrupts = <0 167 4>;
        };
 
        sdhi3: sdhi@ee160000 {
-               compatible = "renesas,r8a7790-sdhi";
+               compatible = "renesas,sdhi-r8a7790";
                reg = <0 0xee160000 0 0x100>;
                interrupt-parent = <&gic>;
                interrupts = <0 168 4>;
index ba59a58..3955c76 100644 (file)
        };
 
        sdhi0: sdhi@ee100000 {
-               compatible = "renesas,r8a7740-sdhi";
+               compatible = "renesas,sdhi-r8a7740";
                reg = <0xee100000 0x100>;
                interrupt-parent = <&gic>;
                interrupts = <0 83 4
 
        /* SDHI1 and SDHI2 have no CD pins, no need for CD IRQ */
        sdhi1: sdhi@ee120000 {
-               compatible = "renesas,r8a7740-sdhi";
+               compatible = "renesas,sdhi-r8a7740";
                reg = <0xee120000 0x100>;
                interrupt-parent = <&gic>;
                interrupts = <0 88 4
        };
 
        sdhi2: sdhi@ee140000 {
-               compatible = "renesas,r8a7740-sdhi";
+               compatible = "renesas,sdhi-r8a7740";
                reg = <0xee140000 0x100>;
                interrupt-parent = <&gic>;
                interrupts = <0 104 4
index 06ea7d4..2a45092 100644 (file)
 #   $4 - default install path (blank if root directory)
 #
 
+verify () {
+       if [ ! -f "$1" ]; then
+               echo ""                                                   1>&2
+               echo " *** Missing file: $1"                              1>&2
+               echo ' *** You need to run "make" before "make install".' 1>&2
+               echo ""                                                   1>&2
+               exit 1
+       fi
+}
+
+# Make sure the files actually exist
+verify "$2"
+verify "$3"
+
 # User may have a custom install script
 if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
 if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
index 117f955..8e1a024 100644 (file)
@@ -269,6 +269,11 @@ static const struct edmacc_param dummy_paramset = {
        .ccnt = 1,
 };
 
+static const struct of_device_id edma_of_ids[] = {
+       { .compatible = "ti,edma3", },
+       {}
+};
+
 /*****************************************************************************/
 
 static void map_dmach_queue(unsigned ctlr, unsigned ch_no,
@@ -560,14 +565,38 @@ static int reserve_contiguous_slots(int ctlr, unsigned int id,
 static int prepare_unused_channel_list(struct device *dev, void *data)
 {
        struct platform_device *pdev = to_platform_device(dev);
-       int i, ctlr;
+       int i, count, ctlr;
+       struct of_phandle_args  dma_spec;
 
+       if (dev->of_node) {
+               count = of_property_count_strings(dev->of_node, "dma-names");
+               if (count < 0)
+                       return 0;
+               for (i = 0; i < count; i++) {
+                       if (of_parse_phandle_with_args(dev->of_node, "dmas",
+                                                      "#dma-cells", i,
+                                                      &dma_spec))
+                               continue;
+
+                       if (!of_match_node(edma_of_ids, dma_spec.np)) {
+                               of_node_put(dma_spec.np);
+                               continue;
+                       }
+
+                       clear_bit(EDMA_CHAN_SLOT(dma_spec.args[0]),
+                                 edma_cc[0]->edma_unused);
+                       of_node_put(dma_spec.np);
+               }
+               return 0;
+       }
+
+       /* For non-OF case */
        for (i = 0; i < pdev->num_resources; i++) {
                if ((pdev->resource[i].flags & IORESOURCE_DMA) &&
                                (int)pdev->resource[i].start >= 0) {
                        ctlr = EDMA_CTLR(pdev->resource[i].start);
                        clear_bit(EDMA_CHAN_SLOT(pdev->resource[i].start),
-                                       edma_cc[ctlr]->edma_unused);
+                                 edma_cc[ctlr]->edma_unused);
                }
        }
 
@@ -1762,11 +1791,6 @@ static int edma_probe(struct platform_device *pdev)
        return 0;
 }
 
-static const struct of_device_id edma_of_ids[] = {
-       { .compatible = "ti,edma3", },
-       {}
-};
-
 static struct platform_driver edma_driver = {
        .driver = {
                .name   = "edma",
index 370236d..9902509 100644 (file)
@@ -51,7 +51,8 @@ void mcpm_cpu_power_down(void)
 {
        phys_reset_t phys_reset;
 
-       BUG_ON(!platform_ops);
+       if (WARN_ON_ONCE(!platform_ops || !platform_ops->power_down))
+               return;
        BUG_ON(!irqs_disabled());
 
        /*
@@ -93,7 +94,8 @@ void mcpm_cpu_suspend(u64 expected_residency)
 {
        phys_reset_t phys_reset;
 
-       BUG_ON(!platform_ops);
+       if (WARN_ON_ONCE(!platform_ops || !platform_ops->suspend))
+               return;
        BUG_ON(!irqs_disabled());
 
        /* Very similar to mcpm_cpu_power_down() */
index d56c932..025f6ce 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <asm/mach/sharpsl_param.h>
+#include <asm/memory.h>
 
 /*
  * Certain hardware parameters determined at the time of device manufacture,
  */
 #ifdef CONFIG_ARCH_SA1100
 #define PARAM_BASE     0xe8ffc000
+#define param_start(x) (void *)(x)
 #else
 #define PARAM_BASE     0xa0000a00
+#define param_start(x) __va(x)
 #endif
 #define MAGIC_CHG(a,b,c,d) ( ( d << 24 ) | ( c << 16 )  | ( b << 8 ) | a )
 
@@ -41,7 +44,7 @@ EXPORT_SYMBOL(sharpsl_param);
 
 void sharpsl_save_param(void)
 {
-       memcpy(&sharpsl_param, (void *)PARAM_BASE, sizeof(struct sharpsl_param_info));
+       memcpy(&sharpsl_param, param_start(PARAM_BASE), sizeof(struct sharpsl_param_info));
 
        if (sharpsl_param.comadj_keyword != COMADJ_MAGIC)
                sharpsl_param.comadj=-1;
index f3935b4..119fc37 100644 (file)
@@ -135,6 +135,7 @@ CONFIG_MMC=y
 CONFIG_MMC_ARMMMCI=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_ESDHC_IMX=y
 CONFIG_MMC_SDHCI_TEGRA=y
 CONFIG_MMC_SDHCI_SPEAR=y
 CONFIG_MMC_OMAP=y
index 19d6cd6..3a14ea8 100644 (file)
@@ -148,7 +148,7 @@ AES_Te:
 @               const AES_KEY *key) {
 .align 5
 ENTRY(AES_encrypt)
-       sub     r3,pc,#8                @ AES_encrypt
+       adr     r3,AES_encrypt
        stmdb   sp!,{r1,r4-r12,lr}
        mov     r12,r0          @ inp
        mov     r11,r2
@@ -381,7 +381,7 @@ _armv4_AES_encrypt:
 .align 5
 ENTRY(private_AES_set_encrypt_key)
 _armv4_AES_set_encrypt_key:
-       sub     r3,pc,#8                @ AES_set_encrypt_key
+       adr     r3,_armv4_AES_set_encrypt_key
        teq     r0,#0
        moveq   r0,#-1
        beq     .Labrt
@@ -843,7 +843,7 @@ AES_Td:
 @               const AES_KEY *key) {
 .align 5
 ENTRY(AES_decrypt)
-       sub     r3,pc,#8                @ AES_decrypt
+       adr     r3,AES_decrypt
        stmdb   sp!,{r1,r4-r12,lr}
        mov     r12,r0          @ inp
        mov     r11,r2
index d3db398..59ceae8 100644 (file)
@@ -31,5 +31,4 @@ generic-y += termbits.h
 generic-y += termios.h
 generic-y += timex.h
 generic-y += trace_clock.h
-generic-y += types.h
 generic-y += unaligned.h
index bfc198c..863c892 100644 (file)
@@ -16,7 +16,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-       asm goto("1:\n\t"
+       asm_volatile_goto("1:\n\t"
                 JUMP_LABEL_NOP "\n\t"
                 ".pushsection __jump_table,  \"aw\"\n\t"
                 ".word 1b, %l[l_yes], %c0\n\t"
index 64e9696..1d3153c 100644 (file)
@@ -57,6 +57,7 @@
  * TSC:                Trap SMC
  * TSW:                Trap cache operations by set/way
  * TWI:                Trap WFI
+ * TWE:                Trap WFE
  * TIDCP:      Trap L2CTLR/L2ECTLR
  * BSU_IS:     Upgrade barriers to the inner shareable domain
  * FB:         Force broadcast of all maintainance operations
@@ -67,7 +68,7 @@
  */
 #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
                        HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
-                       HCR_SWIO | HCR_TIDCP)
+                       HCR_TWE | HCR_SWIO | HCR_TIDCP)
 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
 
 /* System Control Register (SCTLR) bits */
 #define TTBCR_IRGN1    (3 << 24)
 #define TTBCR_EPD1     (1 << 23)
 #define TTBCR_A1       (1 << 22)
-#define TTBCR_T1SZ     (3 << 16)
+#define TTBCR_T1SZ     (7 << 16)
 #define TTBCR_SH0      (3 << 12)
 #define TTBCR_ORGN0    (3 << 10)
 #define TTBCR_IRGN0    (3 << 8)
 #define TTBCR_EPD0     (1 << 7)
-#define TTBCR_T0SZ     3
+#define TTBCR_T0SZ     (7 << 0)
 #define HTCR_MASK      (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
 
 /* Hyp System Trap Register */
 #define HSR_EC_DABT    (0x24)
 #define HSR_EC_DABT_HYP        (0x25)
 
+#define HSR_WFI_IS_WFE         (1U << 0)
+
 #define HSR_HVC_IMM_MASK       ((1UL << 16) - 1)
 
 #define HSR_DABT_S1PTW         (1U << 7)
index a2f43dd..661da11 100644 (file)
@@ -39,7 +39,7 @@
 #define c6_IFAR                17      /* Instruction Fault Address Register */
 #define c7_PAR         18      /* Physical Address Register */
 #define c7_PAR_high    19      /* PAR top 32 bits */
-#define c9_L2CTLR      20      /* Cortex A15 L2 Control Register */
+#define c9_L2CTLR      20      /* Cortex A15/A7 L2 Control Register */
 #define c10_PRRR       21      /* Primary Region Remap Register */
 #define c10_NMRR       22      /* Normal Memory Remap Register */
 #define c12_VBAR       23      /* Vector Base Address Register */
index e844b33..0fa90c9 100644 (file)
@@ -157,6 +157,11 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
        return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
 }
 
+static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.cp15[c0_MPIDR];
+}
+
 static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
 {
        *vcpu_cpsr(vcpu) |= PSR_E_BIT;
index 7d22517..8a6f6db 100644 (file)
 
 #define KVM_VCPU_MAX_FEATURES 1
 
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-#define KVM_NR_PAGE_SIZES      1
-#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
-
 #include <kvm/arm_vgic.h>
 
 struct kvm_vcpu;
@@ -154,6 +149,7 @@ struct kvm_vcpu_stat {
 struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
                        const struct kvm_vcpu_init *init);
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 struct kvm_one_reg;
index 9b28c41..77de4a4 100644 (file)
@@ -62,6 +62,12 @@ phys_addr_t kvm_get_idmap_vector(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
+static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd)
+{
+       *pmd = new_pmd;
+       flush_pmd_entry(pmd);
+}
+
 static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
 {
        *pte = new_pte;
@@ -103,9 +109,15 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
        pte_val(*pte) |= L_PTE_S2_RDWR;
 }
 
+static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+{
+       pmd_val(*pmd) |= L_PMD_S2_RDWR;
+}
+
 struct kvm;
 
-static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
+                                             unsigned long size)
 {
        /*
         * If we are going to insert an instruction page and the icache is
@@ -120,8 +132,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
         * need any kind of flushing (DDI 0406C.b - Page B3-1392).
         */
        if (icache_is_pipt()) {
-               unsigned long hva = gfn_to_hva(kvm, gfn);
-               __cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
+               __cpuc_coherent_user_range(hva, hva + size);
        } else if (!icache_is_vivt_asid_tagged()) {
                /* any kind of VIPT cache */
                __flush_icache_all();
index 0f7b762..fc82a88 100644 (file)
@@ -76,8 +76,11 @@ int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster);
  *
  * This must be called with interrupts disabled.
  *
- * This does not return.  Re-entry in the kernel is expected via
- * mcpm_entry_point.
+ * On success this does not return.  Re-entry in the kernel is expected
+ * via mcpm_entry_point.
+ *
+ * This will return if mcpm_platform_register() has not been called
+ * previously in which case the caller should take appropriate action.
  */
 void mcpm_cpu_power_down(void);
 
@@ -98,8 +101,11 @@ void mcpm_cpu_power_down(void);
  *
  * This must be called with interrupts disabled.
  *
- * This does not return.  Re-entry in the kernel is expected via
- * mcpm_entry_point.
+ * On success this does not return.  Re-entry in the kernel is expected
+ * via mcpm_entry_point.
+ *
+ * This will return if mcpm_platform_register() has not been called
+ * previously in which case the caller should take appropriate action.
  */
 void mcpm_cpu_suspend(u64 expected_residency);
 
index 5689c18..a331d25 100644 (file)
 #define L_PTE_S2_RDONLY                 (_AT(pteval_t, 1) << 6)   /* HAP[1]   */
 #define L_PTE_S2_RDWR           (_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
 
+#define L_PMD_S2_RDWR           (_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
+
 /*
  * Hyp-mode PL2 PTE definitions for LPAE.
  */
index f1d96d4..73ddd72 100644 (file)
@@ -57,6 +57,9 @@ static inline void syscall_get_arguments(struct task_struct *task,
                                         unsigned int i, unsigned int n,
                                         unsigned long *args)
 {
+       if (n == 0)
+               return;
+
        if (i + n > SYSCALL_MAX_ARGS) {
                unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
                unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
@@ -81,6 +84,9 @@ static inline void syscall_set_arguments(struct task_struct *task,
                                         unsigned int i, unsigned int n,
                                         const unsigned long *args)
 {
+       if (n == 0)
+               return;
+
        if (i + n > SYSCALL_MAX_ARGS) {
                pr_warning("%s called with max args %d, handling only %d\n",
                           __func__, i + n, SYSCALL_MAX_ARGS);
index 7e1f760..72abdc5 100644 (file)
 #include <asm/unified.h>
 #include <asm/compiler.h>
 
+#if __LINUX_ARM_ARCH__ < 6
+#include <asm-generic/uaccess-unaligned.h>
+#else
+#define __get_user_unaligned __get_user
+#define __put_user_unaligned __put_user
+#endif
+
 #define VERIFY_READ 0
 #define VERIFY_WRITE 1
 
index c1ee007..c498b60 100644 (file)
@@ -63,7 +63,8 @@ struct kvm_regs {
 
 /* Supported Processor Types */
 #define KVM_ARM_TARGET_CORTEX_A15      0
-#define KVM_ARM_NUM_TARGETS            1
+#define KVM_ARM_TARGET_CORTEX_A7       1
+#define KVM_ARM_NUM_TARGETS            2
 
 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
 #define KVM_ARM_DEVICE_TYPE_SHIFT      0
index 74ad15d..bc6bd96 100644 (file)
@@ -442,10 +442,10 @@ local_restart:
        ldrcc   pc, [tbl, scno, lsl #2]         @ call sys_* routine
 
        add     r1, sp, #S_OFF
-       cmp     scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
+2:     cmp     scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
        eor     r0, scno, #__NR_SYSCALL_BASE    @ put OS number back
        bcs     arm_syscall
-2:     mov     why, #0                         @ no longer a real syscall
+       mov     why, #0                         @ no longer a real syscall
        b       sys_ni_syscall                  @ not private func
 
 #if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI)
index de23a9b..39f89fb 100644 (file)
 #ifdef CONFIG_CONTEXT_TRACKING
        .if     \save
        stmdb   sp!, {r0-r3, ip, lr}
-       bl      user_exit
+       bl      context_tracking_user_exit
        ldmia   sp!, {r0-r3, ip, lr}
        .else
-       bl      user_exit
+       bl      context_tracking_user_exit
        .endif
 #endif
        .endm
 #ifdef CONFIG_CONTEXT_TRACKING
        .if     \save
        stmdb   sp!, {r0-r3, ip, lr}
-       bl      user_enter
+       bl      context_tracking_user_enter
        ldmia   sp!, {r0-r3, ip, lr}
        .else
-       bl      user_enter
+       bl      context_tracking_user_enter
        .endif
 #endif
        .endm
index 2c7cc1e..476de57 100644 (file)
@@ -487,7 +487,26 @@ __fixup_smp:
        mrc     p15, 0, r0, c0, c0, 5   @ read MPIDR
        and     r0, r0, #0xc0000000     @ multiprocessing extensions and
        teq     r0, #0x80000000         @ not part of a uniprocessor system?
-       moveq   pc, lr                  @ yes, assume SMP
+       bne    __fixup_smp_on_up        @ no, assume UP
+
+       @ Core indicates it is SMP. Check for Aegis SOC where a single
+       @ Cortex-A9 CPU is present but SMP operations fault.
+       mov     r4, #0x41000000
+       orr     r4, r4, #0x0000c000
+       orr     r4, r4, #0x00000090
+       teq     r3, r4                  @ Check for ARM Cortex-A9
+       movne   pc, lr                  @ Not ARM Cortex-A9,
+
+       @ If a future SoC *does* use 0x0 as the PERIPH_BASE, then the
+       @ below address check will need to be #ifdef'd or equivalent
+       @ for the Aegis platform.
+       mrc     p15, 4, r0, c15, c0     @ get SCU base address
+       teq     r0, #0x0                @ '0' on actual UP A9 hardware
+       beq     __fixup_smp_on_up       @ So its an A9 UP
+       ldr     r0, [r0, #4]            @ read SCU Config
+       and     r0, r0, #0x3            @ number of CPUs
+       teq     r0, #0x0                @ is 1?
+       movne   pc, lr
 
 __fixup_smp_on_up:
        adr     r0, 1f
index ebf5015..466bd29 100644 (file)
@@ -20,6 +20,7 @@ config KVM
        bool "Kernel-based Virtual Machine (KVM) support"
        select PREEMPT_NOTIFIERS
        select ANON_INODES
+       select HAVE_KVM_CPU_RELAX_INTERCEPT
        select KVM_MMIO
        select KVM_ARM_HOST
        depends on ARM_VIRT_EXT && ARM_LPAE
index d99bee4..789bca9 100644 (file)
@@ -19,6 +19,6 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
+obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
 obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
 obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
index 9c697db..e312e4a 100644 (file)
@@ -152,12 +152,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
        return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
                           struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+                           unsigned long npages)
 {
        return 0;
 }
@@ -797,6 +798,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        return -EFAULT;
                return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
        }
+       case KVM_ARM_PREFERRED_TARGET: {
+               int err;
+               struct kvm_vcpu_init init;
+
+               err = kvm_vcpu_preferred_target(&init);
+               if (err)
+                       return err;
+
+               if (copy_to_user(argp, &init, sizeof(init)))
+                       return -EFAULT;
+
+               return 0;
+       }
        default:
                return -EINVAL;
        }
index db9cf69..78c0885 100644 (file)
@@ -71,6 +71,98 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
        return 1;
 }
 
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+       /*
+        * Compute guest MPIDR. We build a virtual cluster out of the
+        * vcpu_id, but we read the 'U' bit from the underlying
+        * hardware directly.
+        */
+       vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) |
+                                    ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) |
+                                    (vcpu->vcpu_id & 3));
+}
+
+/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */
+static bool access_actlr(struct kvm_vcpu *vcpu,
+                        const struct coproc_params *p,
+                        const struct coproc_reg *r)
+{
+       if (p->is_write)
+               return ignore_write(vcpu, p);
+
+       *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
+       return true;
+}
+
+/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */
+static bool access_cbar(struct kvm_vcpu *vcpu,
+                       const struct coproc_params *p,
+                       const struct coproc_reg *r)
+{
+       if (p->is_write)
+               return write_to_read_only(vcpu, p);
+       return read_zero(vcpu, p);
+}
+
+/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */
+static bool access_l2ctlr(struct kvm_vcpu *vcpu,
+                         const struct coproc_params *p,
+                         const struct coproc_reg *r)
+{
+       if (p->is_write)
+               return ignore_write(vcpu, p);
+
+       *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
+       return true;
+}
+
+static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+       u32 l2ctlr, ncores;
+
+       asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
+       l2ctlr &= ~(3 << 24);
+       ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
+       /* How many cores in the current cluster and the next ones */
+       ncores -= (vcpu->vcpu_id & ~3);
+       /* Cap it to the maximum number of cores in a single cluster */
+       ncores = min(ncores, 3U);
+       l2ctlr |= (ncores & 3) << 24;
+
+       vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+       u32 actlr;
+
+       /* ACTLR contains SMP bit: make sure you create all cpus first! */
+       asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
+       /* Make the SMP bit consistent with the guest configuration */
+       if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
+               actlr |= 1U << 6;
+       else
+               actlr &= ~(1U << 6);
+
+       vcpu->arch.cp15[c1_ACTLR] = actlr;
+}
+
+/*
+ * TRM entries: A7:4.3.50, A15:4.3.49
+ * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored).
+ */
+static bool access_l2ectlr(struct kvm_vcpu *vcpu,
+                          const struct coproc_params *p,
+                          const struct coproc_reg *r)
+{
+       if (p->is_write)
+               return ignore_write(vcpu, p);
+
+       *vcpu_reg(vcpu, p->Rt1) = 0;
+       return true;
+}
+
 /* See note at ARM ARM B1.14.4 */
 static bool access_dcsw(struct kvm_vcpu *vcpu,
                        const struct coproc_params *p,
@@ -153,10 +245,22 @@ static bool pm_fake(struct kvm_vcpu *vcpu,
  *            registers preceding 32-bit ones.
  */
 static const struct coproc_reg cp15_regs[] = {
+       /* MPIDR: we use VMPIDR for guest access. */
+       { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
+                       NULL, reset_mpidr, c0_MPIDR },
+
        /* CSSELR: swapped by interrupt.S. */
        { CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32,
                        NULL, reset_unknown, c0_CSSELR },
 
+       /* ACTLR: trapped by HCR.TAC bit. */
+       { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
+                       access_actlr, reset_actlr, c1_ACTLR },
+
+       /* CPACR: swapped by interrupt.S. */
+       { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
+                       NULL, reset_val, c1_CPACR, 0x00000000 },
+
        /* TTBR0/TTBR1: swapped by interrupt.S. */
        { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
        { CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
@@ -195,6 +299,13 @@ static const struct coproc_reg cp15_regs[] = {
        { CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw},
        { CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw},
        /*
+        * L2CTLR access (guest wants to know #CPUs).
+        */
+       { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
+                       access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
+       { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
+
+       /*
         * Dummy performance monitor implementation.
         */
        { CRn( 9), CRm(12), Op1( 0), Op2( 0), is32, access_pmcr},
@@ -234,6 +345,9 @@ static const struct coproc_reg cp15_regs[] = {
        /* CNTKCTL: swapped by interrupt.S. */
        { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
                        NULL, reset_val, c14_CNTKCTL, 0x00000000 },
+
+       /* The Configuration Base Address Register. */
+       { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
 };
 
 /* Target specific emulation tables */
@@ -241,6 +355,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];
 
 void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)
 {
+       unsigned int i;
+
+       for (i = 1; i < table->num; i++)
+               BUG_ON(cmp_reg(&table->table[i-1],
+                              &table->table[i]) >= 0);
+
        target_tables[table->target] = table;
 }
 
index cf93472..bb0cac1 100644 (file)
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 #include <linux/kvm_host.h>
-#include <asm/cputype.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_host.h>
-#include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
 #include <linux/init.h>
 
-static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-       /*
-        * Compute guest MPIDR:
-        * (Even if we present only one VCPU to the guest on an SMP
-        * host we don't set the U bit in the MPIDR, or vice versa, as
-        * revealing the underlying hardware properties is likely to
-        * be the best choice).
-        */
-       vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK)
-               | (vcpu->vcpu_id & MPIDR_LEVEL_MASK);
-}
-
 #include "coproc.h"
 
-/* A15 TRM 4.3.28: RO WI */
-static bool access_actlr(struct kvm_vcpu *vcpu,
-                        const struct coproc_params *p,
-                        const struct coproc_reg *r)
-{
-       if (p->is_write)
-               return ignore_write(vcpu, p);
-
-       *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
-       return true;
-}
-
-/* A15 TRM 4.3.60: R/O. */
-static bool access_cbar(struct kvm_vcpu *vcpu,
-                       const struct coproc_params *p,
-                       const struct coproc_reg *r)
-{
-       if (p->is_write)
-               return write_to_read_only(vcpu, p);
-       return read_zero(vcpu, p);
-}
-
-/* A15 TRM 4.3.48: R/O WI. */
-static bool access_l2ctlr(struct kvm_vcpu *vcpu,
-                         const struct coproc_params *p,
-                         const struct coproc_reg *r)
-{
-       if (p->is_write)
-               return ignore_write(vcpu, p);
-
-       *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
-       return true;
-}
-
-static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-       u32 l2ctlr, ncores;
-
-       asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
-       l2ctlr &= ~(3 << 24);
-       ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
-       l2ctlr |= (ncores & 3) << 24;
-
-       vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
-}
-
-static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-       u32 actlr;
-
-       /* ACTLR contains SMP bit: make sure you create all cpus first! */
-       asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
-       /* Make the SMP bit consistent with the guest configuration */
-       if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
-               actlr |= 1U << 6;
-       else
-               actlr &= ~(1U << 6);
-
-       vcpu->arch.cp15[c1_ACTLR] = actlr;
-}
-
-/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */
-static bool access_l2ectlr(struct kvm_vcpu *vcpu,
-                          const struct coproc_params *p,
-                          const struct coproc_reg *r)
-{
-       if (p->is_write)
-               return ignore_write(vcpu, p);
-
-       *vcpu_reg(vcpu, p->Rt1) = 0;
-       return true;
-}
-
 /*
  * A15-specific CP15 registers.
  * CRn denotes the primary register number, but is copied to the CRm in the
@@ -121,29 +32,9 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu,
  *            registers preceding 32-bit ones.
  */
 static const struct coproc_reg a15_regs[] = {
-       /* MPIDR: we use VMPIDR for guest access. */
-       { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
-                       NULL, reset_mpidr, c0_MPIDR },
-
        /* SCTLR: swapped by interrupt.S. */
        { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
                        NULL, reset_val, c1_SCTLR, 0x00C50078 },
-       /* ACTLR: trapped by HCR.TAC bit. */
-       { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
-                       access_actlr, reset_actlr, c1_ACTLR },
-       /* CPACR: swapped by interrupt.S. */
-       { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
-                       NULL, reset_val, c1_CPACR, 0x00000000 },
-
-       /*
-        * L2CTLR access (guest wants to know #CPUs).
-        */
-       { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
-                       access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
-       { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
-
-       /* The Configuration Base Address Register. */
-       { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
 };
 
 static struct kvm_coproc_target_table a15_target_table = {
@@ -154,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = {
 
 static int __init coproc_a15_init(void)
 {
-       unsigned int i;
-
-       for (i = 1; i < ARRAY_SIZE(a15_regs); i++)
-               BUG_ON(cmp_reg(&a15_regs[i-1],
-                              &a15_regs[i]) >= 0);
-
        kvm_register_target_coproc_table(&a15_target_table);
        return 0;
 }
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
new file mode 100644 (file)
index 0000000..1df7673
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Copyright (C) 2013 - ARM Ltd
+ *
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *          Jonathan Austin <jonathan.austin@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/kvm_host.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
+#include <linux/init.h>
+
+#include "coproc.h"
+
+/*
+ * Cortex-A7 specific CP15 registers.
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ *            registers preceding 32-bit ones.
+ */
+static const struct coproc_reg a7_regs[] = {
+       /* SCTLR: swapped by interrupt.S. */
+       { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
+                       NULL, reset_val, c1_SCTLR, 0x00C50878 },
+};
+
+static struct kvm_coproc_target_table a7_target_table = {
+       .target = KVM_ARM_TARGET_CORTEX_A7,
+       .table = a7_regs,
+       .num = ARRAY_SIZE(a7_regs),
+};
+
+static int __init coproc_a7_init(void)
+{
+       kvm_register_target_coproc_table(&a7_target_table);
+       return 0;
+}
+late_initcall(coproc_a7_init);
index bdede9e..d6c0052 100644 (file)
@@ -354,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
        *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
 
        if (is_pabt) {
-               /* Set DFAR and DFSR */
+               /* Set IFAR and IFSR */
                vcpu->arch.cp15[c6_IFAR] = addr;
                is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
                /* Always give debug fault for now - should give guest a clue */
index 152d036..20f8d97 100644 (file)
@@ -190,6 +190,8 @@ int __attribute_const__ kvm_target_cpu(void)
                return -EINVAL;
 
        switch (part_number) {
+       case ARM_CPU_PART_CORTEX_A7:
+               return KVM_ARM_TARGET_CORTEX_A7;
        case ARM_CPU_PART_CORTEX_A15:
                return KVM_ARM_TARGET_CORTEX_A15;
        default:
@@ -202,7 +204,7 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 {
        unsigned int i;
 
-       /* We can only do a cortex A15 for now. */
+       /* We can only cope with guest==host and only on A15/A7 (for now). */
        if (init->target != kvm_target_cpu())
                return -EINVAL;
 
@@ -222,6 +224,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
        return kvm_reset_vcpu(vcpu);
 }
 
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+       int target = kvm_target_cpu();
+
+       if (target < 0)
+               return -ENODEV;
+
+       memset(init, 0, sizeof(*init));
+
+       /*
+        * For now, we don't return any features.
+        * In future, we might use features to return target
+        * specific features available for the preferred
+        * target type.
+        */
+       init->target = (__u32)target;
+
+       return 0;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
        return -EINVAL;
index df4c82d..a920790 100644 (file)
@@ -73,23 +73,29 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 }
 
 /**
- * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests
  * @vcpu:      the vcpu pointer
  * @run:       the kvm_run structure pointer
  *
- * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
- * halt execution of world-switches and schedule other host processes until
- * there is an incoming IRQ or FIQ to the VM.
+ * WFE: Yield the CPU and come back to this vcpu when the scheduler
+ * decides to.
+ * WFI: Simply call kvm_vcpu_block(), which will halt execution of
+ * world-switches and schedule other host processes until there is an
+ * incoming IRQ or FIQ to the VM.
  */
-static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
        trace_kvm_wfi(*vcpu_pc(vcpu));
-       kvm_vcpu_block(vcpu);
+       if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE)
+               kvm_vcpu_on_spin(vcpu);
+       else
+               kvm_vcpu_block(vcpu);
+
        return 1;
 }
 
 static exit_handle_fn arm_exit_handlers[] = {
-       [HSR_EC_WFI]            = kvm_handle_wfi,
+       [HSR_EC_WFI]            = kvm_handle_wfx,
        [HSR_EC_CP15_32]        = kvm_handle_cp15_32,
        [HSR_EC_CP15_64]        = kvm_handle_cp15_64,
        [HSR_EC_CP14_MR]        = kvm_handle_cp14_access,
index b0de86b..3719583 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/mman.h>
 #include <linux/kvm_host.h>
 #include <linux/io.h>
+#include <linux/hugetlb.h>
 #include <trace/events/kvm.h>
 #include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
@@ -41,6 +42,8 @@ static unsigned long hyp_idmap_start;
 static unsigned long hyp_idmap_end;
 static phys_addr_t hyp_idmap_vector;
 
+#define kvm_pmd_huge(_x)       (pmd_huge(_x) || pmd_trans_huge(_x))
+
 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
        /*
@@ -93,19 +96,29 @@ static bool page_empty(void *ptr)
 
 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 {
-       pmd_t *pmd_table = pmd_offset(pud, 0);
-       pud_clear(pud);
-       kvm_tlb_flush_vmid_ipa(kvm, addr);
-       pmd_free(NULL, pmd_table);
+       if (pud_huge(*pud)) {
+               pud_clear(pud);
+               kvm_tlb_flush_vmid_ipa(kvm, addr);
+       } else {
+               pmd_t *pmd_table = pmd_offset(pud, 0);
+               pud_clear(pud);
+               kvm_tlb_flush_vmid_ipa(kvm, addr);
+               pmd_free(NULL, pmd_table);
+       }
        put_page(virt_to_page(pud));
 }
 
 static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 {
-       pte_t *pte_table = pte_offset_kernel(pmd, 0);
-       pmd_clear(pmd);
-       kvm_tlb_flush_vmid_ipa(kvm, addr);
-       pte_free_kernel(NULL, pte_table);
+       if (kvm_pmd_huge(*pmd)) {
+               pmd_clear(pmd);
+               kvm_tlb_flush_vmid_ipa(kvm, addr);
+       } else {
+               pte_t *pte_table = pte_offset_kernel(pmd, 0);
+               pmd_clear(pmd);
+               kvm_tlb_flush_vmid_ipa(kvm, addr);
+               pte_free_kernel(NULL, pte_table);
+       }
        put_page(virt_to_page(pmd));
 }
 
@@ -136,18 +149,32 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
                        continue;
                }
 
+               if (pud_huge(*pud)) {
+                       /*
+                        * If we are dealing with a huge pud, just clear it and
+                        * move on.
+                        */
+                       clear_pud_entry(kvm, pud, addr);
+                       addr = pud_addr_end(addr, end);
+                       continue;
+               }
+
                pmd = pmd_offset(pud, addr);
                if (pmd_none(*pmd)) {
                        addr = pmd_addr_end(addr, end);
                        continue;
                }
 
-               pte = pte_offset_kernel(pmd, addr);
-               clear_pte_entry(kvm, pte, addr);
-               next = addr + PAGE_SIZE;
+               if (!kvm_pmd_huge(*pmd)) {
+                       pte = pte_offset_kernel(pmd, addr);
+                       clear_pte_entry(kvm, pte, addr);
+                       next = addr + PAGE_SIZE;
+               }
 
-               /* If we emptied the pte, walk back up the ladder */
-               if (page_empty(pte)) {
+               /*
+                * If the pmd entry is to be cleared, walk back up the ladder
+                */
+               if (kvm_pmd_huge(*pmd) || page_empty(pte)) {
                        clear_pmd_entry(kvm, pmd, addr);
                        next = pmd_addr_end(addr, end);
                        if (page_empty(pmd) && !page_empty(pud)) {
@@ -420,29 +447,71 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
        kvm->arch.pgd = NULL;
 }
 
-
-static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
-                         phys_addr_t addr, const pte_t *new_pte, bool iomap)
+static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+                            phys_addr_t addr)
 {
        pgd_t *pgd;
        pud_t *pud;
        pmd_t *pmd;
-       pte_t *pte, old_pte;
 
-       /* Create 2nd stage page table mapping - Level 1 */
        pgd = kvm->arch.pgd + pgd_index(addr);
        pud = pud_offset(pgd, addr);
        if (pud_none(*pud)) {
                if (!cache)
-                       return 0; /* ignore calls from kvm_set_spte_hva */
+                       return NULL;
                pmd = mmu_memory_cache_alloc(cache);
                pud_populate(NULL, pud, pmd);
                get_page(virt_to_page(pud));
        }
 
-       pmd = pmd_offset(pud, addr);
+       return pmd_offset(pud, addr);
+}
+
+static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
+                              *cache, phys_addr_t addr, const pmd_t *new_pmd)
+{
+       pmd_t *pmd, old_pmd;
+
+       pmd = stage2_get_pmd(kvm, cache, addr);
+       VM_BUG_ON(!pmd);
+
+       /*
+        * Mapping in huge pages should only happen through a fault.  If a
+        * page is merged into a transparent huge page, the individual
+        * subpages of that huge page should be unmapped through MMU
+        * notifiers before we get here.
+        *
+        * Merging of CompoundPages is not supported; they should become
+        * splitting first, unmapped, merged, and mapped back in on-demand.
+        */
+       VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
+
+       old_pmd = *pmd;
+       kvm_set_pmd(pmd, *new_pmd);
+       if (pmd_present(old_pmd))
+               kvm_tlb_flush_vmid_ipa(kvm, addr);
+       else
+               get_page(virt_to_page(pmd));
+       return 0;
+}
+
+static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+                         phys_addr_t addr, const pte_t *new_pte, bool iomap)
+{
+       pmd_t *pmd;
+       pte_t *pte, old_pte;
 
-       /* Create 2nd stage page table mapping - Level 2 */
+       /* Create stage-2 page table mapping - Level 1 */
+       pmd = stage2_get_pmd(kvm, cache, addr);
+       if (!pmd) {
+               /*
+                * Ignore calls from kvm_set_spte_hva for unallocated
+                * address ranges.
+                */
+               return 0;
+       }
+
+       /* Create stage-2 page mappings - Level 2 */
        if (pmd_none(*pmd)) {
                if (!cache)
                        return 0; /* ignore calls from kvm_set_spte_hva */
@@ -507,16 +576,60 @@ out:
        return ret;
 }
 
+static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
+{
+       pfn_t pfn = *pfnp;
+       gfn_t gfn = *ipap >> PAGE_SHIFT;
+
+       if (PageTransCompound(pfn_to_page(pfn))) {
+               unsigned long mask;
+               /*
+                * The address we faulted on is backed by a transparent huge
+                * page.  However, because we map the compound huge page and
+                * not the individual tail page, we need to transfer the
+                * refcount to the head page.  We have to be careful that the
+                * THP doesn't start to split while we are adjusting the
+                * refcounts.
+                *
+                * We are sure this doesn't happen, because mmu_notifier_retry
+                * was successful and we are holding the mmu_lock, so if this
+                * THP is trying to split, it will be blocked in the mmu
+                * notifier before touching any of the pages, specifically
+                * before being able to call __split_huge_page_refcount().
+                *
+                * We can therefore safely transfer the refcount from PG_tail
+                * to PG_head and switch the pfn from a tail page to the head
+                * page accordingly.
+                */
+               mask = PTRS_PER_PMD - 1;
+               VM_BUG_ON((gfn & mask) != (pfn & mask));
+               if (pfn & mask) {
+                       *ipap &= PMD_MASK;
+                       kvm_release_pfn_clean(pfn);
+                       pfn &= ~mask;
+                       kvm_get_pfn(pfn);
+                       *pfnp = pfn;
+               }
+
+               return true;
+       }
+
+       return false;
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-                         gfn_t gfn, struct kvm_memory_slot *memslot,
+                         struct kvm_memory_slot *memslot,
                          unsigned long fault_status)
 {
-       pte_t new_pte;
-       pfn_t pfn;
        int ret;
-       bool write_fault, writable;
+       bool write_fault, writable, hugetlb = false, force_pte = false;
        unsigned long mmu_seq;
+       gfn_t gfn = fault_ipa >> PAGE_SHIFT;
+       unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
+       struct kvm *kvm = vcpu->kvm;
        struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
+       struct vm_area_struct *vma;
+       pfn_t pfn;
 
        write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
        if (fault_status == FSC_PERM && !write_fault) {
@@ -524,6 +637,26 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                return -EFAULT;
        }
 
+       /* Let's check if we will get back a huge page backed by hugetlbfs */
+       down_read(&current->mm->mmap_sem);
+       vma = find_vma_intersection(current->mm, hva, hva + 1);
+       if (is_vm_hugetlb_page(vma)) {
+               hugetlb = true;
+               gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
+       } else {
+               /*
+                * Pages belonging to VMAs not aligned to the PMD mapping
+                * granularity cannot be mapped using block descriptors even
+                * if the pages belong to a THP for the process, because the
+                * stage-2 block descriptor will cover more than a single THP
+                * and we loose atomicity for unmapping, updates, and splits
+                * of the THP or other pages in the stage-2 block range.
+                */
+               if (vma->vm_start & ~PMD_MASK)
+                       force_pte = true;
+       }
+       up_read(&current->mm->mmap_sem);
+
        /* We need minimum second+third level pages */
        ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
        if (ret)
@@ -541,26 +674,40 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
         */
        smp_rmb();
 
-       pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable);
+       pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
        if (is_error_pfn(pfn))
                return -EFAULT;
 
-       new_pte = pfn_pte(pfn, PAGE_S2);
-       coherent_icache_guest_page(vcpu->kvm, gfn);
-
-       spin_lock(&vcpu->kvm->mmu_lock);
-       if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+       spin_lock(&kvm->mmu_lock);
+       if (mmu_notifier_retry(kvm, mmu_seq))
                goto out_unlock;
-       if (writable) {
-               kvm_set_s2pte_writable(&new_pte);
-               kvm_set_pfn_dirty(pfn);
+       if (!hugetlb && !force_pte)
+               hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
+
+       if (hugetlb) {
+               pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2);
+               new_pmd = pmd_mkhuge(new_pmd);
+               if (writable) {
+                       kvm_set_s2pmd_writable(&new_pmd);
+                       kvm_set_pfn_dirty(pfn);
+               }
+               coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE);
+               ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
+       } else {
+               pte_t new_pte = pfn_pte(pfn, PAGE_S2);
+               if (writable) {
+                       kvm_set_s2pte_writable(&new_pte);
+                       kvm_set_pfn_dirty(pfn);
+               }
+               coherent_icache_guest_page(kvm, hva, PAGE_SIZE);
+               ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
        }
-       stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
+
 
 out_unlock:
-       spin_unlock(&vcpu->kvm->mmu_lock);
+       spin_unlock(&kvm->mmu_lock);
        kvm_release_pfn_clean(pfn);
-       return 0;
+       return ret;
 }
 
 /**
@@ -629,7 +776,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
        memslot = gfn_to_memslot(vcpu->kvm, gfn);
 
-       ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status);
+       ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
        if (ret == 0)
                ret = 1;
 out_unlock:
index ae0e06b..0881bf1 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/kvm_host.h>
 #include <linux/wait.h>
 
+#include <asm/cputype.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_psci.h>
 
@@ -34,22 +35,30 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 {
        struct kvm *kvm = source_vcpu->kvm;
-       struct kvm_vcpu *vcpu;
+       struct kvm_vcpu *vcpu = NULL, *tmp;
        wait_queue_head_t *wq;
        unsigned long cpu_id;
+       unsigned long mpidr;
        phys_addr_t target_pc;
+       int i;
 
        cpu_id = *vcpu_reg(source_vcpu, 1);
        if (vcpu_mode_is_32bit(source_vcpu))
                cpu_id &= ~((u32) 0);
 
-       if (cpu_id >= atomic_read(&kvm->online_vcpus))
+       kvm_for_each_vcpu(i, tmp, kvm) {
+               mpidr = kvm_vcpu_get_mpidr(tmp);
+               if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) {
+                       vcpu = tmp;
+                       break;
+               }
+       }
+
+       if (!vcpu)
                return KVM_PSCI_RET_INVAL;
 
        target_pc = *vcpu_reg(source_vcpu, 2);
 
-       vcpu = kvm_get_vcpu(kvm, cpu_id);
-
        wq = kvm_arch_vcpu_wq(vcpu);
        if (!waitqueue_active(wq))
                return KVM_PSCI_RET_INVAL;
index 71e08ba..f558c07 100644 (file)
 #include <kvm/arm_arch_timer.h>
 
 /******************************************************************************
- * Cortex-A15 Reset Values
+ * Cortex-A15 and Cortex-A7 Reset Values
  */
 
-static const int a15_max_cpu_idx = 3;
-
-static struct kvm_regs a15_regs_reset = {
+static struct kvm_regs cortexa_regs_reset = {
        .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
 };
 
-static const struct kvm_irq_level a15_vtimer_irq = {
+static const struct kvm_irq_level cortexa_vtimer_irq = {
        { .irq = 27 },
        .level = 1,
 };
@@ -58,23 +56,22 @@ static const struct kvm_irq_level a15_vtimer_irq = {
  */
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 {
-       struct kvm_regs *cpu_reset;
+       struct kvm_regs *reset_regs;
        const struct kvm_irq_level *cpu_vtimer_irq;
 
        switch (vcpu->arch.target) {
+       case KVM_ARM_TARGET_CORTEX_A7:
        case KVM_ARM_TARGET_CORTEX_A15:
-               if (vcpu->vcpu_id > a15_max_cpu_idx)
-                       return -EINVAL;
-               cpu_reset = &a15_regs_reset;
+               reset_regs = &cortexa_regs_reset;
                vcpu->arch.midr = read_cpuid_id();
-               cpu_vtimer_irq = &a15_vtimer_irq;
+               cpu_vtimer_irq = &cortexa_vtimer_irq;
                break;
        default:
                return -ENODEV;
        }
 
        /* Reset core registers */
-       memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs));
+       memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs));
 
        /* Reset CP15 registers */
        kvm_reset_coprocs(vcpu);
index 180b302..f607deb 100644 (file)
@@ -93,7 +93,7 @@ static irqreturn_t at91rm9200_timer_interrupt(int irq, void *dev_id)
 
 static struct irqaction at91rm9200_timer_irq = {
        .name           = "at91_tick",
-       .flags          = IRQF_SHARED | IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+       .flags          = IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
        .handler        = at91rm9200_timer_interrupt,
        .irq            = NR_IRQS_LEGACY + AT91_ID_SYS,
 };
index 3a4bc2e..bb39232 100644 (file)
@@ -171,7 +171,7 @@ static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id)
 
 static struct irqaction at91sam926x_pit_irq = {
        .name           = "at91_tick",
-       .flags          = IRQF_SHARED | IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+       .flags          = IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL,
        .handler        = at91sam926x_pit_interrupt,
        .irq            = NR_IRQS_LEGACY + AT91_ID_SYS,
 };
index 721a1a3..c40c1e2 100644 (file)
 #include "at91_rstc.h"
                        .arm
 
+/*
+ * at91_ramc_base is an array void*
+ * init at NULL if only one DDR controler is present in or DT
+ */
                        .globl  at91sam9g45_restart
 
 at91sam9g45_restart:
                        ldr     r5, =at91_ramc_base             @ preload constants
                        ldr     r0, [r5]
+                       ldr     r5, [r5, #4]                    @ ddr1
+                       cmp     r5, #0
                        ldr     r4, =at91_rstc_base
                        ldr     r1, [r4]
 
@@ -30,6 +36,8 @@ at91sam9g45_restart:
 
                        .balign 32                              @ align to cache line
 
+                       strne   r2, [r5, #AT91_DDRSDRC_RTR]     @ disable DDR1 access
+                       strne   r3, [r5, #AT91_DDRSDRC_LPR]     @ power down DDR1
                        str     r2, [r0, #AT91_DDRSDRC_RTR]     @ disable DDR0 access
                        str     r3, [r0, #AT91_DDRSDRC_LPR]     @ power down DDR0
                        str     r4, [r1, #AT91_RSTC_CR]         @ reset processor
index 2919eba..c0e637a 100644 (file)
@@ -57,7 +57,7 @@ static irqreturn_t at91x40_timer_interrupt(int irq, void *dev_id)
 
 static struct irqaction at91x40_timer_irq = {
        .name           = "at91_tick",
-       .flags          = IRQF_DISABLED | IRQF_TIMER,
+       .flags          = IRQF_TIMER,
        .handler        = at91x40_timer_interrupt
 };
 
index 92b7f77..4078ba9 100644 (file)
@@ -176,7 +176,7 @@ static struct at24_platform_data eeprom_info = {
        .context        = (void *)0x7f00,
 };
 
-static struct snd_platform_data dm365_evm_snd_data = {
+static struct snd_platform_data dm365_evm_snd_data __maybe_unused = {
        .asp_chan_q = EVENTQ_3,
 };
 
index 52b8571..ce402cd 100644 (file)
@@ -15,8 +15,6 @@
 
 #include <mach/hardware.h>
 
-#include <linux/platform_device.h>
-
 #define DAVINCI_UART0_BASE     (IO_PHYS + 0x20000)
 #define DAVINCI_UART1_BASE     (IO_PHYS + 0x20400)
 #define DAVINCI_UART2_BASE     (IO_PHYS + 0x20800)
@@ -39,6 +37,8 @@
 #define UART_DM646X_SCR_TX_WATERMARK   0x08
 
 #ifndef __ASSEMBLY__
+#include <linux/platform_device.h>
+
 extern int davinci_serial_init(struct platform_device *);
 #endif
 
index 755fd29..06a9e2e 100644 (file)
@@ -1,2 +1,9 @@
 /* Simple oneliner include to the PCIv3 early init */
+#ifdef CONFIG_PCI
 extern int pci_v3_early_init(void);
+#else
+static inline int pci_v3_early_init(void)
+{
+       return 0;
+}
+#endif
index 4c24303..58adf2f 100644 (file)
@@ -140,6 +140,7 @@ int __init coherency_init(void)
                coherency_base = of_iomap(np, 0);
                coherency_cpu_base = of_iomap(np, 1);
                set_cpu_coherent(cpu_logical_map(smp_processor_id()), 0);
+               of_node_put(np);
        }
 
        return 0;
@@ -147,9 +148,14 @@ int __init coherency_init(void)
 
 static int __init coherency_late_init(void)
 {
-       if (of_find_matching_node(NULL, of_coherency_table))
+       struct device_node *np;
+
+       np = of_find_matching_node(NULL, of_coherency_table);
+       if (np) {
                bus_register_notifier(&platform_bus_type,
                                      &mvebu_hwcc_platform_nb);
+               of_node_put(np);
+       }
        return 0;
 }
 
index 3cc4bef..27fc4f0 100644 (file)
@@ -67,6 +67,7 @@ int __init armada_370_xp_pmsu_init(void)
                pr_info("Initializing Power Management Service Unit\n");
                pmsu_mp_base = of_iomap(np, 0);
                pmsu_reset_base = of_iomap(np, 1);
+               of_node_put(np);
        }
 
        return 0;
index f875124..5175083 100644 (file)
@@ -98,6 +98,7 @@ static int __init mvebu_system_controller_init(void)
                BUG_ON(!match);
                system_controller_base = of_iomap(np, 0);
                mvebu_sc = (struct mvebu_system_controller *)match->data;
+               of_node_put(np);
        }
 
        return 0;
index 39c7838..87162e1 100644 (file)
@@ -129,6 +129,24 @@ DT_MACHINE_START(OMAP3_DT, "Generic OMAP3 (Flattened Device Tree)")
        .restart        = omap3xxx_restart,
 MACHINE_END
 
+static const char *omap36xx_boards_compat[] __initdata = {
+       "ti,omap36xx",
+       NULL,
+};
+
+DT_MACHINE_START(OMAP36XX_DT, "Generic OMAP36xx (Flattened Device Tree)")
+       .reserve        = omap_reserve,
+       .map_io         = omap3_map_io,
+       .init_early     = omap3630_init_early,
+       .init_irq       = omap_intc_of_init,
+       .handle_irq     = omap3_intc_handle_irq,
+       .init_machine   = omap_generic_init,
+       .init_late      = omap3_init_late,
+       .init_time      = omap3_sync32k_timer_init,
+       .dt_compat      = omap36xx_boards_compat,
+       .restart        = omap3xxx_restart,
+MACHINE_END
+
 static const char *omap3_gp_boards_compat[] __initdata = {
        "ti,omap3-beagle",
        "timll,omap3-devkit8000",
index c3270c0..f6fe388 100644 (file)
@@ -167,38 +167,47 @@ static struct lp55xx_led_config rx51_lp5523_led_config[] = {
                .name           = "lp5523:kb1",
                .chan_nr        = 0,
                .led_current    = 50,
+               .max_current    = 100,
        }, {
                .name           = "lp5523:kb2",
                .chan_nr        = 1,
                .led_current    = 50,
+               .max_current    = 100,
        }, {
                .name           = "lp5523:kb3",
                .chan_nr        = 2,
                .led_current    = 50,
+               .max_current    = 100,
        }, {
                .name           = "lp5523:kb4",
                .chan_nr        = 3,
                .led_current    = 50,
+               .max_current    = 100,
        }, {
                .name           = "lp5523:b",
                .chan_nr        = 4,
                .led_current    = 50,
+               .max_current    = 100,
        }, {
                .name           = "lp5523:g",
                .chan_nr        = 5,
                .led_current    = 50,
+               .max_current    = 100,
        }, {
                .name           = "lp5523:r",
                .chan_nr        = 6,
                .led_current    = 50,
+               .max_current    = 100,
        }, {
                .name           = "lp5523:kb5",
                .chan_nr        = 7,
                .led_current    = 50,
+               .max_current    = 100,
        }, {
                .name           = "lp5523:kb6",
                .chan_nr        = 8,
                .led_current    = 50,
+               .max_current    = 100,
        }
 };
 
index 64b5a83..8b6876c 100644 (file)
@@ -272,9 +272,19 @@ static int omap2_onenand_setup_async(void __iomem *onenand_base)
        struct gpmc_timings t;
        int ret;
 
-       if (gpmc_onenand_data->of_node)
+       if (gpmc_onenand_data->of_node) {
                gpmc_read_settings_dt(gpmc_onenand_data->of_node,
                                      &onenand_async);
+               if (onenand_async.sync_read || onenand_async.sync_write) {
+                       if (onenand_async.sync_write)
+                               gpmc_onenand_data->flags |=
+                                       ONENAND_SYNC_READWRITE;
+                       else
+                               gpmc_onenand_data->flags |= ONENAND_SYNC_READ;
+                       onenand_async.sync_read = false;
+                       onenand_async.sync_write = false;
+               }
+       }
 
        omap2_onenand_set_async_mode(onenand_base);
 
index 5d2080e..16f78a9 100644 (file)
@@ -28,7 +28,7 @@
 #define OMAP_PULL_UP                   (1 << 4)
 #define OMAP_ALTELECTRICALSEL          (1 << 5)
 
-/* 34xx specific mux bit defines */
+/* omap3/4/5 specific mux bit defines */
 #define OMAP_INPUT_EN                  (1 << 8)
 #define OMAP_OFF_EN                    (1 << 9)
 #define OMAP_OFFOUT_EN                 (1 << 10)
@@ -36,8 +36,6 @@
 #define OMAP_OFF_PULL_EN               (1 << 12)
 #define OMAP_OFF_PULL_UP               (1 << 13)
 #define OMAP_WAKEUP_EN                 (1 << 14)
-
-/* 44xx specific mux bit defines */
 #define OMAP_WAKEUP_EVENT              (1 << 15)
 
 /* Active pin states */
index fa74a06..ead48fa 100644 (file)
@@ -628,7 +628,7 @@ void __init omap4_local_timer_init(void)
 #endif /* CONFIG_HAVE_ARM_TWD */
 #endif /* CONFIG_ARCH_OMAP4 */
 
-#ifdef CONFIG_SOC_OMAP5
+#if defined(CONFIG_SOC_OMAP5) || defined(CONFIG_SOC_DRA7XX)
 void __init omap5_realtime_timer_init(void)
 {
        omap4_sync32k_timer_init();
@@ -636,7 +636,7 @@ void __init omap5_realtime_timer_init(void)
 
        clocksource_of_init();
 }
-#endif /* CONFIG_SOC_OMAP5 */
+#endif /* CONFIG_SOC_OMAP5 || CONFIG_SOC_DRA7XX */
 
 /**
  * omap_timer_init - build and register timer device with an
index 5bd1479..7f8f607 100644 (file)
@@ -1108,9 +1108,9 @@ static const struct pinctrl_map eva_pinctrl_map[] = {
        PIN_MAP_MUX_GROUP_DEFAULT("asoc-simple-card.1", "pfc-r8a7740",
                                  "fsib_mclk_in", "fsib"),
        /* GETHER */
-       PIN_MAP_MUX_GROUP_DEFAULT("sh-eth", "pfc-r8a7740",
+       PIN_MAP_MUX_GROUP_DEFAULT("r8a7740-gether", "pfc-r8a7740",
                                  "gether_mii", "gether"),
-       PIN_MAP_MUX_GROUP_DEFAULT("sh-eth", "pfc-r8a7740",
+       PIN_MAP_MUX_GROUP_DEFAULT("r8a7740-gether", "pfc-r8a7740",
                                  "gether_int", "gether"),
        /* HDMI */
        PIN_MAP_MUX_GROUP_DEFAULT("sh-mobile-hdmi", "pfc-r8a7740",
index ffb6f0a..5930af8 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/pinctrl/machine.h>
 #include <linux/platform_data/gpio-rcar.h>
 #include <linux/platform_device.h>
+#include <linux/phy.h>
 #include <linux/regulator/fixed.h>
 #include <linux/regulator/machine.h>
 #include <linux/sh_eth.h>
@@ -155,6 +156,30 @@ static void __init lager_add_standard_devices(void)
                                          &ether_pdata, sizeof(ether_pdata));
 }
 
+/*
+ * Ether LEDs on the Lager board are named LINK and ACTIVE which corresponds
+ * to non-default 01 setting of the Micrel KSZ8041 PHY control register 1 bits
+ * 14-15. We have to set them back to 01 from the default 00 value each time
+ * the PHY is reset. It's also important because the PHY's LED0 signal is
+ * connected to SoC's ETH_LINK signal and in the PHY's default mode it will
+ * bounce on and off after each packet, which we apparently want to avoid.
+ */
+static int lager_ksz8041_fixup(struct phy_device *phydev)
+{
+       u16 phyctrl1 = phy_read(phydev, 0x1e);
+
+       phyctrl1 &= ~0xc000;
+       phyctrl1 |= 0x4000;
+       return phy_write(phydev, 0x1e, phyctrl1);
+}
+
+static void __init lager_init(void)
+{
+       lager_add_standard_devices();
+
+       phy_register_fixup_for_id("r8a7790-ether-ff:01", lager_ksz8041_fixup);
+}
+
 static const char *lager_boards_compat_dt[] __initdata = {
        "renesas,lager",
        NULL,
@@ -163,6 +188,6 @@ static const char *lager_boards_compat_dt[] __initdata = {
 DT_MACHINE_START(LAGER_DT, "lager")
        .init_early     = r8a7790_init_delay,
        .init_time      = r8a7790_timer_init,
-       .init_machine   = lager_add_standard_devices,
+       .init_machine   = lager_init,
        .dt_compat      = lager_boards_compat_dt,
 MACHINE_END
index 7aeb5d6..e6eb481 100644 (file)
@@ -131,6 +131,16 @@ static void tc2_pm_down(u64 residency)
        } else
                BUG();
 
+       /*
+        * If the CPU is committed to power down, make sure
+        * the power controller will be in charge of waking it
+        * up upon IRQ, ie IRQ lines are cut from GIC CPU IF
+        * to the CPU by disabling the GIC CPU IF to prevent wfi
+        * from completing execution behind power controller back
+        */
+       if (!skip_wfi)
+               gic_cpu_if_down();
+
        if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
                arch_spin_unlock(&tc2_pm_lock);
 
@@ -231,7 +241,6 @@ static void tc2_pm_suspend(u64 residency)
        cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
        cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
        ve_spc_set_resume_addr(cluster, cpu, virt_to_phys(mcpm_entry_point));
-       gic_cpu_if_down();
        tc2_pm_down(residency);
 }
 
index f5e1a84..1272ed2 100644 (file)
@@ -1232,7 +1232,8 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size)
                                break;
 
                len = (j - i) << PAGE_SHIFT;
-               ret = iommu_map(mapping->domain, iova, phys, len, 0);
+               ret = iommu_map(mapping->domain, iova, phys, len,
+                               IOMMU_READ|IOMMU_WRITE);
                if (ret < 0)
                        goto fail;
                iova += len;
@@ -1431,6 +1432,27 @@ static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
                                         GFP_KERNEL);
 }
 
+static int __dma_direction_to_prot(enum dma_data_direction dir)
+{
+       int prot;
+
+       switch (dir) {
+       case DMA_BIDIRECTIONAL:
+               prot = IOMMU_READ | IOMMU_WRITE;
+               break;
+       case DMA_TO_DEVICE:
+               prot = IOMMU_READ;
+               break;
+       case DMA_FROM_DEVICE:
+               prot = IOMMU_WRITE;
+               break;
+       default:
+               prot = 0;
+       }
+
+       return prot;
+}
+
 /*
  * Map a part of the scatter-gather list into contiguous io address space
  */
@@ -1444,6 +1466,7 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
        int ret = 0;
        unsigned int count;
        struct scatterlist *s;
+       int prot;
 
        size = PAGE_ALIGN(size);
        *handle = DMA_ERROR_CODE;
@@ -1460,7 +1483,9 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
                        !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
                        __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
 
-               ret = iommu_map(mapping->domain, iova, phys, len, 0);
+               prot = __dma_direction_to_prot(dir);
+
+               ret = iommu_map(mapping->domain, iova, phys, len, prot);
                if (ret < 0)
                        goto fail;
                count += len >> PAGE_SHIFT;
@@ -1665,19 +1690,7 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p
        if (dma_addr == DMA_ERROR_CODE)
                return dma_addr;
 
-       switch (dir) {
-       case DMA_BIDIRECTIONAL:
-               prot = IOMMU_READ | IOMMU_WRITE;
-               break;
-       case DMA_TO_DEVICE:
-               prot = IOMMU_READ;
-               break;
-       case DMA_FROM_DEVICE:
-               prot = IOMMU_WRITE;
-               break;
-       default:
-               prot = 0;
-       }
+       prot = __dma_direction_to_prot(dir);
 
        ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot);
        if (ret < 0)
index febaee7..18ec4c5 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/nodemask.h>
 #include <linux/initrd.h>
 #include <linux/of_fdt.h>
-#include <linux/of_reserved_mem.h>
 #include <linux/highmem.h>
 #include <linux/gfp.h>
 #include <linux/memblock.h>
@@ -379,8 +378,6 @@ void __init arm_memblock_init(struct meminfo *mi,
        if (mdesc->reserve)
                mdesc->reserve();
 
-       early_init_dt_scan_reserved_mem();
-
        /*
         * reserve memory for DMA contigouos allocations,
         * must come from DMA area inside low memory
index 1a6bfe9..835c559 100644 (file)
@@ -6,13 +6,6 @@ config FRAME_POINTER
        bool
        default y
 
-config DEBUG_STACK_USAGE
-       bool "Enable stack utilization instrumentation"
-       depends on DEBUG_KERNEL
-       help
-         Enables the display of the minimum amount of free stack which each
-         task has ever had available in the sysrq-T output.
-
 config EARLY_PRINTK
        bool "Early printk support"
        default y
index 5b3e832..31c81e9 100644 (file)
@@ -42,7 +42,7 @@ CONFIG_IP_PNP_BOOTP=y
 # CONFIG_WIRELESS is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
-# CONFIG_BLK_DEV is not set
+CONFIG_BLK_DEV=y
 CONFIG_SCSI=y
 # CONFIG_SCSI_PROC_FS is not set
 CONFIG_BLK_DEV_SD=y
@@ -72,6 +72,7 @@ CONFIG_LOGO=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
 # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 # CONFIG_EXT3_FS_XATTR is not set
 CONFIG_FUSE_FS=y
@@ -90,3 +91,5 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_INFO=y
 # CONFIG_FTRACE is not set
 CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_BLK=y
index db80509..dd8ecfc 100644 (file)
@@ -177,6 +177,11 @@ static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
        return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
 }
 
+static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
+{
+       return vcpu_sys_reg(vcpu, MPIDR_EL1);
+}
+
 static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
 {
        if (vcpu_mode_is_32bit(vcpu))
index 0859a4d..5d85a02 100644 (file)
 
 #define KVM_VCPU_MAX_FEATURES 2
 
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-#define KVM_NR_PAGE_SIZES      1
-#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
-
 struct kvm_vcpu;
 int kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -151,6 +146,7 @@ struct kvm_vcpu_stat {
 struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
                        const struct kvm_vcpu_init *init);
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 struct kvm_one_reg;
index efe609c..680f74e 100644 (file)
@@ -91,6 +91,7 @@ int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
 #define        kvm_set_pte(ptep, pte)          set_pte(ptep, pte)
+#define        kvm_set_pmd(pmdp, pmd)          set_pmd(pmdp, pmd)
 
 static inline bool kvm_is_write_fault(unsigned long esr)
 {
@@ -116,13 +117,18 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
        pte_val(*pte) |= PTE_S2_RDWR;
 }
 
+static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+{
+       pmd_val(*pmd) |= PMD_S2_RDWR;
+}
+
 struct kvm;
 
-static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
+                                             unsigned long size)
 {
        if (!icache_is_aliasing()) {            /* PIPT */
-               unsigned long hva = gfn_to_hva(kvm, gfn);
-               flush_icache_range(hva, hva + PAGE_SIZE);
+               flush_icache_range(hva, hva + size);
        } else if (!icache_is_aivivt()) {       /* non ASID-tagged VIVT */
                /* any kind of VIPT cache */
                __flush_icache_all();
index d57e668..755f861 100644 (file)
@@ -85,6 +85,8 @@
 #define PTE_S2_RDONLY          (_AT(pteval_t, 1) << 6)   /* HAP[2:1] */
 #define PTE_S2_RDWR            (_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
 
+#define PMD_S2_RDWR            (_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
+
 /*
  * Memory Attribute override for Stage-2 (MemAttr[3:0])
  */
index edb3d5c..7ecc2b2 100644 (file)
@@ -166,9 +166,10 @@ do {                                                                       \
 
 #define get_user(x, ptr)                                               \
 ({                                                                     \
+       __typeof__(*(ptr)) __user *__p = (ptr);                         \
        might_fault();                                                  \
-       access_ok(VERIFY_READ, (ptr), sizeof(*(ptr))) ?                 \
-               __get_user((x), (ptr)) :                                \
+       access_ok(VERIFY_READ, __p, sizeof(*__p)) ?                     \
+               __get_user((x), __p) :                                  \
                ((x) = 0, -EFAULT);                                     \
 })
 
@@ -227,9 +228,10 @@ do {                                                                       \
 
 #define put_user(x, ptr)                                               \
 ({                                                                     \
+       __typeof__(*(ptr)) __user *__p = (ptr);                         \
        might_fault();                                                  \
-       access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr))) ?                \
-               __put_user((x), (ptr)) :                                \
+       access_ok(VERIFY_WRITE, __p, sizeof(*__p)) ?                    \
+               __put_user((x), __p) :                                  \
                -EFAULT;                                                \
 })
 
index 1f2e4d5..bb785d2 100644 (file)
@@ -80,8 +80,10 @@ void fpsimd_thread_switch(struct task_struct *next)
 
 void fpsimd_flush_thread(void)
 {
+       preempt_disable();
        memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
        fpsimd_load_state(&current->thread.fpsimd_state);
+       preempt_enable();
 }
 
 #ifdef CONFIG_KERNEL_MODE_NEON
index 2c3ff67..3f0731e 100644 (file)
@@ -248,6 +248,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
        return kvm_reset_vcpu(vcpu);
 }
 
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+       int target = kvm_target_cpu();
+
+       if (target < 0)
+               return -ENODEV;
+
+       memset(init, 0, sizeof(*init));
+
+       /*
+        * For now, we don't return any features.
+        * In future, we might use features to return target
+        * specific features available for the preferred
+        * target type.
+        */
+       init->target = (__u32)target;
+
+       return 0;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
        return -EINVAL;
index 8ae80a1..19da91e 100644 (file)
@@ -35,7 +35,7 @@
  */
 ENTRY(__cpu_flush_user_tlb_range)
        vma_vm_mm x3, x2                        // get vma->vm_mm
-       mmid    x3, x3                          // get vm_mm->context.id
+       mmid    w3, x3                          // get vm_mm->context.id
        dsb     sy
        lsr     x0, x0, #12                     // align address
        lsr     x1, x1, #12
index d22af85..fd79807 100644 (file)
@@ -1,5 +1,19 @@
 
 generic-y      += clkdev.h
+generic-y       += cputime.h
+generic-y       += delay.h
+generic-y       += device.h
+generic-y       += div64.h
+generic-y       += emergency-restart.h
 generic-y      += exec.h
-generic-y      += trace_clock.h
+generic-y       += futex.h
+generic-y       += irq_regs.h
 generic-y      += param.h
+generic-y       += local.h
+generic-y       += local64.h
+generic-y       += percpu.h
+generic-y       += scatterlist.h
+generic-y       += sections.h
+generic-y       += topology.h
+generic-y      += trace_clock.h
+generic-y       += xor.h
diff --git a/arch/avr32/include/asm/cputime.h b/arch/avr32/include/asm/cputime.h
deleted file mode 100644 (file)
index e87e0f8..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_CPUTIME_H
-#define __ASM_AVR32_CPUTIME_H
-
-#include <asm-generic/cputime.h>
-
-#endif /* __ASM_AVR32_CPUTIME_H */
diff --git a/arch/avr32/include/asm/delay.h b/arch/avr32/include/asm/delay.h
deleted file mode 100644 (file)
index 9670e12..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/delay.h>
diff --git a/arch/avr32/include/asm/device.h b/arch/avr32/include/asm/device.h
deleted file mode 100644 (file)
index d8f9872..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#include <asm-generic/device.h>
-
diff --git a/arch/avr32/include/asm/div64.h b/arch/avr32/include/asm/div64.h
deleted file mode 100644 (file)
index d7ddd4f..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_DIV64_H
-#define __ASM_AVR32_DIV64_H
-
-#include <asm-generic/div64.h>
-
-#endif /* __ASM_AVR32_DIV64_H */
diff --git a/arch/avr32/include/asm/emergency-restart.h b/arch/avr32/include/asm/emergency-restart.h
deleted file mode 100644 (file)
index 3e7e014..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_EMERGENCY_RESTART_H
-#define __ASM_AVR32_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* __ASM_AVR32_EMERGENCY_RESTART_H */
diff --git a/arch/avr32/include/asm/futex.h b/arch/avr32/include/asm/futex.h
deleted file mode 100644 (file)
index 10419f1..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_FUTEX_H
-#define __ASM_AVR32_FUTEX_H
-
-#include <asm-generic/futex.h>
-
-#endif /* __ASM_AVR32_FUTEX_H */
diff --git a/arch/avr32/include/asm/irq_regs.h b/arch/avr32/include/asm/irq_regs.h
deleted file mode 100644 (file)
index 3dd9c0b..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/avr32/include/asm/local.h b/arch/avr32/include/asm/local.h
deleted file mode 100644 (file)
index 1c16196..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_LOCAL_H
-#define __ASM_AVR32_LOCAL_H
-
-#include <asm-generic/local.h>
-
-#endif /* __ASM_AVR32_LOCAL_H */
diff --git a/arch/avr32/include/asm/local64.h b/arch/avr32/include/asm/local64.h
deleted file mode 100644 (file)
index 36c93b5..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/avr32/include/asm/percpu.h b/arch/avr32/include/asm/percpu.h
deleted file mode 100644 (file)
index 69227b4..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_PERCPU_H
-#define __ASM_AVR32_PERCPU_H
-
-#include <asm-generic/percpu.h>
-
-#endif /* __ASM_AVR32_PERCPU_H */
diff --git a/arch/avr32/include/asm/scatterlist.h b/arch/avr32/include/asm/scatterlist.h
deleted file mode 100644 (file)
index a5902d9..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_SCATTERLIST_H
-#define __ASM_AVR32_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* __ASM_AVR32_SCATTERLIST_H */
diff --git a/arch/avr32/include/asm/sections.h b/arch/avr32/include/asm/sections.h
deleted file mode 100644 (file)
index aa14252..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_SECTIONS_H
-#define __ASM_AVR32_SECTIONS_H
-
-#include <asm-generic/sections.h>
-
-#endif /* __ASM_AVR32_SECTIONS_H */
diff --git a/arch/avr32/include/asm/topology.h b/arch/avr32/include/asm/topology.h
deleted file mode 100644 (file)
index 5b766cb..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_AVR32_TOPOLOGY_H
-#define __ASM_AVR32_TOPOLOGY_H
-
-#include <asm-generic/topology.h>
-
-#endif /* __ASM_AVR32_TOPOLOGY_H */
diff --git a/arch/avr32/include/asm/xor.h b/arch/avr32/include/asm/xor.h
deleted file mode 100644 (file)
index 99c87aa..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_XOR_H
-#define _ASM_XOR_H
-
-#include <asm-generic/xor.h>
-
-#endif
index c273100..42a53e7 100644 (file)
@@ -289,7 +289,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
                memset(childregs, 0, sizeof(struct pt_regs));
                p->thread.cpu_context.r0 = arg;
                p->thread.cpu_context.r1 = usp; /* fn */
-               p->thread.cpu_context.r2 = syscall_return;
+               p->thread.cpu_context.r2 = (unsigned long)syscall_return;
                p->thread.cpu_context.pc = (unsigned long)ret_from_kernel_thread;
                childregs->sr = MODE_SUPERVISOR;
        } else {
index 869a1c6..12f828a 100644 (file)
@@ -98,7 +98,14 @@ static void comparator_mode(enum clock_event_mode mode,
        case CLOCK_EVT_MODE_SHUTDOWN:
                sysreg_write(COMPARE, 0);
                pr_debug("%s: stop\n", evdev->name);
-               cpu_idle_poll_ctrl(false);
+               if (evdev->mode == CLOCK_EVT_MODE_ONESHOT ||
+                   evdev->mode == CLOCK_EVT_MODE_RESUME) {
+                       /*
+                        * Only disable idle poll if we have forced that
+                        * in a previous call.
+                        */
+                       cpu_idle_poll_ctrl(false);
+               }
                break;
        default:
                BUG();
index 989dd3f..db95f57 100644 (file)
@@ -234,10 +234,6 @@ struct kvm_vm_data {
 #define KVM_REQ_PTC_G          32
 #define KVM_REQ_RESUME         33
 
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-#define KVM_NR_PAGE_SIZES      1
-#define KVM_PAGES_PER_HPAGE(x) 1
-
 struct kvm;
 struct kvm_vcpu;
 
@@ -480,7 +476,7 @@ struct kvm_arch {
 
        struct list_head assigned_dev_head;
        struct iommu_domain *iommu_domain;
-       int iommu_flags;
+       bool iommu_noncoherent;
 
        unsigned long irq_sources_bitmap;
        unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
index bdfd878..985bf80 100644 (file)
@@ -1550,12 +1550,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
        return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
                           struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+                           unsigned long npages)
 {
        return 0;
 }
index 4a9baa9..9969dba 100644 (file)
@@ -276,7 +276,7 @@ static struct platform_device mtx1_pci_host = {
        .resource       = alchemy_pci_host_res,
 };
 
-static struct __initdata platform_device * mtx1_devs[] = {
+static struct platform_device *mtx1_devs[] __initdata = {
        &mtx1_pci_host,
        &mtx1_gpio_leds,
        &mtx1_wdt,
index 51680d1..d445d06 100644 (file)
 
 /*
  * MIPS32, MIPS64, VR5500, IDT32332, IDT32334 and maybe a few other
- * pre-MIPS32/MIPS53 processors have CLO, CLZ. The IDT RC64574 is 64-bit and
+ * pre-MIPS32/MIPS64 processors have CLO, CLZ. The IDT RC64574 is 64-bit and
  * has CLO and CLZ but not DCLO nor DCLZ.  For 64-bit kernels
  * cpu_has_clo_clz also indicates the availability of DCLO and DCLZ.
  */
index 4d6d77e..e194f95 100644 (file)
@@ -22,7 +22,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-       asm goto("1:\tnop\n\t"
+       asm_volatile_goto("1:\tnop\n\t"
                "nop\n\t"
                ".pushsection __jump_table,  \"aw\"\n\t"
                WORD_INSN " 1b, %l[l_yes], %0\n\t"
index 4d6fa0b..3296696 100644 (file)
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
-/* Don't support huge pages */
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-
-/* We don't currently support large pages. */
-#define KVM_NR_PAGE_SIZES      1
-#define KVM_PAGES_PER_HPAGE(x) 1
-
 
 
 /* Special address that contains the comm page, used for reducing # of traps */
index 4204d76..029e002 100644 (file)
@@ -73,7 +73,7 @@
 3:
 
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
-       PTR_L   t8, __stack_chk_guard
+       PTR_LA  t8, __stack_chk_guard
        LONG_L  t9, TASK_STACK_CANARY(a1)
        LONG_S  t9, 0(t8)
 #endif
index 38af83f..20b7b04 100644 (file)
@@ -67,7 +67,7 @@ LEAF(resume)
 1:
 
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
-       PTR_L   t8, __stack_chk_guard
+       PTR_LA  t8, __stack_chk_guard
        LONG_L  t9, TASK_STACK_CANARY(a1)
        LONG_S  t9, 0(t8)
 #endif
index 921238a..078de5e 100644 (file)
@@ -69,7 +69,7 @@
 1:
 
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
-       PTR_L   t8, __stack_chk_guard
+       PTR_LA  t8, __stack_chk_guard
        LONG_L  t9, TASK_STACK_CANARY(a1)
        LONG_S  t9, 0(t8)
 #endif
index a7b0445..73b3482 100644 (file)
@@ -198,12 +198,13 @@ kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
        return -ENOIOCTLCMD;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
                           struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+                           unsigned long npages)
 {
        return 0;
 }
index 627883b..bc6f96f 100644 (file)
@@ -609,6 +609,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
                        r4k_blast_scache();
                else
                        blast_scache_range(addr, addr + size);
+               preempt_enable();
                __sync();
                return;
        }
@@ -650,6 +651,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
                         */
                        blast_inv_scache_range(addr, addr + size);
                }
+               preempt_enable();
                __sync();
                return;
        }
index f25a7e9..5f8b955 100644 (file)
@@ -308,12 +308,10 @@ static void mips_dma_sync_sg_for_cpu(struct device *dev,
 {
        int i;
 
-       /* Make sure that gcc doesn't leave the empty loop body.  */
-       for (i = 0; i < nelems; i++, sg++) {
-               if (cpu_needs_post_dma_flush(dev))
+       if (cpu_needs_post_dma_flush(dev))
+               for (i = 0; i < nelems; i++, sg++)
                        __dma_sync(sg_page(sg), sg->offset, sg->length,
                                   direction);
-       }
 }
 
 static void mips_dma_sync_sg_for_device(struct device *dev,
@@ -321,12 +319,10 @@ static void mips_dma_sync_sg_for_device(struct device *dev,
 {
        int i;
 
-       /* Make sure that gcc doesn't leave the empty loop body.  */
-       for (i = 0; i < nelems; i++, sg++) {
-               if (!plat_device_is_coherent(dev))
+       if (!plat_device_is_coherent(dev))
+               for (i = 0; i < nelems; i++, sg++)
                        __dma_sync(sg_page(sg), sg->offset, sg->length,
                                   direction);
-       }
 }
 
 int mips_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
index eb59bfe..93c9980 100644 (file)
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  */
-
-#include <linux/of.h>  /* linux/of.h gets to determine #include ordering */
-
 #ifndef _ASM_OPENRISC_PROM_H
 #define _ASM_OPENRISC_PROM_H
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
 
-#include <linux/types.h>
-#include <asm/irq.h>
-#include <linux/irqdomain.h>
-#include <linux/atomic.h>
-#include <linux/of_irq.h>
-#include <linux/of_fdt.h>
-#include <linux/of_address.h>
-#include <linux/proc_fs.h>
-#include <linux/platform_device.h>
 #define HAVE_ARCH_DEVTREE_FIXUPS
 
-/* Other Prototypes */
-extern int early_uartlite_console(void);
-
-/* Parse the ibm,dma-window property of an OF node into the busno, phys and
- * size parameters.
- */
-void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
-               unsigned long *busno, unsigned long *phys, unsigned long *size);
-
-extern void kdump_move_device_tree(void);
-
-/* Get the MAC address */
-extern const void *of_get_mac_address(struct device_node *np);
-
-/**
- * of_irq_map_pci - Resolve the interrupt for a PCI device
- * @pdev:      the device whose interrupt is to be resolved
- * @out_irq:   structure of_irq filled by this function
- *
- * This function resolves the PCI interrupt for a given PCI device. If a
- * device-node exists for a given pci_dev, it will use normal OF tree
- * walking. If not, it will implement standard swizzling and walk up the
- * PCI tree until an device-node is found, at which point it will finish
- * resolving using the OF tree walking.
- */
-struct pci_dev;
-extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
-
-#endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
 #endif /* _ASM_OPENRISC_PROM_H */
index 1945f99..4736020 100644 (file)
@@ -6,7 +6,7 @@ struct pt_regs;
 
 /* traps.c */
 void parisc_terminate(char *msg, struct pt_regs *regs,
-               int code, unsigned long offset);
+               int code, unsigned long offset) __noreturn __cold;
 
 /* mm/fault.c */
 void do_page_fault(struct pt_regs *regs, unsigned long code,
index c035673..b521c0a 100644 (file)
@@ -602,6 +602,7 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
                __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
        }
 }
+EXPORT_SYMBOL_GPL(flush_cache_page);
 
 #ifdef CONFIG_PARISC_TMPALIAS
 
index 8a252f2..2b96602 100644 (file)
@@ -72,7 +72,6 @@ enum ipi_message_type {
        IPI_NOP=0,
        IPI_RESCHEDULE=1,
        IPI_CALL_FUNC,
-       IPI_CALL_FUNC_SINGLE,
        IPI_CPU_START,
        IPI_CPU_STOP,
        IPI_CPU_TEST
@@ -164,11 +163,6 @@ ipi_interrupt(int irq, void *dev_id)
                                generic_smp_call_function_interrupt();
                                break;
 
-                       case IPI_CALL_FUNC_SINGLE:
-                               smp_debug(100, KERN_DEBUG "CPU%d IPI_CALL_FUNC_SINGLE\n", this_cpu);
-                               generic_smp_call_function_single_interrupt();
-                               break;
-
                        case IPI_CPU_START:
                                smp_debug(100, KERN_DEBUG "CPU%d IPI_CPU_START\n", this_cpu);
                                break;
@@ -260,7 +254,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 
 void arch_send_call_function_single_ipi(int cpu)
 {
-       send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE);
+       send_IPI_single(cpu, IPI_CALL_FUNC);
 }
 
 /*
index 04e47c6..1cd1d0c 100644 (file)
@@ -291,11 +291,6 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err)
        do_exit(SIGSEGV);
 }
 
-int syscall_ipi(int (*syscall) (struct pt_regs *), struct pt_regs *regs)
-{
-       return syscall(regs);
-}
-
 /* gdb uses break 4,8 */
 #define GDB_BREAK_INSN 0x10004
 static void handle_gdb_break(struct pt_regs *regs, int wot)
@@ -805,14 +800,14 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
        else {
 
            /*
-            * The kernel should never fault on its own address space.
+            * The kernel should never fault on its own address space,
+            * unless pagefault_disable() was called before.
             */
 
-           if (fault_space == 0
+           if (fault_space == 0 && !in_atomic())
            {
                pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC);
                parisc_terminate("Kernel Fault", regs, code, fault_address);
-       
            }
        }
 
index ac4370b..b5507ec 100644 (file)
@@ -56,7 +56,7 @@
 #ifdef __KERNEL__
 #include <linux/module.h>
 #include <linux/compiler.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #define s_space "%%sr1"
 #define d_space "%%sr2"
 #else
@@ -524,4 +524,17 @@ EXPORT_SYMBOL(copy_to_user);
 EXPORT_SYMBOL(copy_from_user);
 EXPORT_SYMBOL(copy_in_user);
 EXPORT_SYMBOL(memcpy);
+
+long probe_kernel_read(void *dst, const void *src, size_t size)
+{
+       unsigned long addr = (unsigned long)src;
+
+       if (size < 0 || addr < PAGE_SIZE)
+               return -EFAULT;
+
+       /* check for I/O space F_EXTEND(0xfff00000) access as well? */
+
+       return __probe_kernel_read(dst, src, size);
+}
+
 #endif
index d10d27a..0293588 100644 (file)
@@ -171,17 +171,25 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
                              unsigned long address)
 {
        struct vm_area_struct *vma, *prev_vma;
-       struct task_struct *tsk = current;
-       struct mm_struct *mm = tsk->mm;
+       struct task_struct *tsk;
+       struct mm_struct *mm;
        unsigned long acc_type;
        int fault;
-       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+       unsigned int flags;
 
-       if (in_atomic() || !mm)
+       if (in_atomic())
                goto no_context;
 
+       tsk = current;
+       mm = tsk->mm;
+       if (!mm)
+               goto no_context;
+
+       flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
        if (user_mode(regs))
                flags |= FAULT_FLAG_USER;
+
+       acc_type = parisc_acctyp(code, regs->iir);
        if (acc_type & VM_WRITE)
                flags |= FAULT_FLAG_WRITE;
 retry:
@@ -196,8 +204,6 @@ retry:
 
 good_area:
 
-       acc_type = parisc_acctyp(code,regs->iir);
-
        if ((vma->vm_flags & acc_type) != acc_type)
                goto bad_area;
 
index 6a15c96..15ca225 100644 (file)
@@ -74,7 +74,7 @@ src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c
 src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c
 src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c
 
-src-plat-y := of.c
+src-plat-y := of.c epapr.c
 src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \
                                treeboot-walnut.c cuboot-acadia.c \
                                cuboot-kilauea.c simpleboot.c \
@@ -97,7 +97,7 @@ src-plat-$(CONFIG_EMBEDDED6xx) += cuboot-pq2.c cuboot-mpc7448hpc2.c \
                                        prpmc2800.c
 src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c
 src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c
-src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c
+src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c
 
 src-wlib := $(sort $(src-wlib-y))
 src-plat := $(sort $(src-plat-y))
diff --git a/arch/powerpc/boot/epapr-wrapper.c b/arch/powerpc/boot/epapr-wrapper.c
new file mode 100644 (file)
index 0000000..c101910
--- /dev/null
@@ -0,0 +1,9 @@
+extern void epapr_platform_init(unsigned long r3, unsigned long r4,
+                               unsigned long r5, unsigned long r6,
+                               unsigned long r7);
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                  unsigned long r6, unsigned long r7)
+{
+       epapr_platform_init(r3, r4, r5, r6, r7);
+}
index 06c1961..02e91aa 100644 (file)
@@ -48,8 +48,8 @@ static void platform_fixups(void)
                       fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size);
 }
 
-void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
-                  unsigned long r6, unsigned long r7)
+void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                        unsigned long r6, unsigned long r7)
 {
        epapr_magic = r6;
        ima_size = r7;
index 61d9899..62e2f43 100644 (file)
@@ -26,6 +26,9 @@
 
 static unsigned long claim_base;
 
+void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                        unsigned long r6, unsigned long r7);
+
 static void *of_try_claim(unsigned long size)
 {
        unsigned long addr = 0;
@@ -61,7 +64,7 @@ static void of_image_hdr(const void *hdr)
        }
 }
 
-void platform_init(unsigned long a1, unsigned long a2, void *promptr)
+static void of_platform_init(unsigned long a1, unsigned long a2, void *promptr)
 {
        platform_ops.image_hdr = of_image_hdr;
        platform_ops.malloc = of_try_claim;
@@ -81,3 +84,14 @@ void platform_init(unsigned long a1, unsigned long a2, void *promptr)
                loader_info.initrd_size = a2;
        }
 }
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                  unsigned long r6, unsigned long r7)
+{
+       /* Detect OF vs. ePAPR boot */
+       if (r5)
+               of_platform_init(r3, r4, (void *)r5);
+       else
+               epapr_platform_init(r3, r4, r5, r6, r7);
+}
+
index 6761c74..cd7af84 100755 (executable)
@@ -148,18 +148,18 @@ make_space=y
 
 case "$platform" in
 pseries)
-    platformo=$object/of.o
+    platformo="$object/of.o $object/epapr.o"
     link_address='0x4000000'
     ;;
 maple)
-    platformo=$object/of.o
+    platformo="$object/of.o $object/epapr.o"
     link_address='0x400000'
     ;;
 pmac|chrp)
-    platformo=$object/of.o
+    platformo="$object/of.o $object/epapr.o"
     ;;
 coff)
-    platformo="$object/crt0.o $object/of.o"
+    platformo="$object/crt0.o $object/of.o $object/epapr.o"
     lds=$object/zImage.coff.lds
     link_address='0x500000'
     pie=
@@ -253,6 +253,7 @@ treeboot-iss4xx-mpic)
     platformo="$object/treeboot-iss4xx.o"
     ;;
 epapr)
+    platformo="$object/epapr.o $object/epapr-wrapper.o"
     link_address='0x20000000'
     pie=-pie
     ;;
index 9b198d1..856f8de 100644 (file)
@@ -77,4 +77,8 @@ static inline unsigned int get_d(u32 inst)
        return inst & 0xffff;
 }
 
+static inline unsigned int get_oc(u32 inst)
+{
+       return (inst >> 11) & 0x7fff;
+}
 #endif /* __ASM_PPC_DISASSEMBLE_H__ */
index cca12f0..894662a 100644 (file)
@@ -198,12 +198,27 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
        cmpwi   r10,0;                                                  \
        bne     do_kvm_##n
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+/*
+ * If hv is possible, interrupts come into to the hv version
+ * of the kvmppc_interrupt code, which then jumps to the PR handler,
+ * kvmppc_interrupt_pr, if the guest is a PR guest.
+ */
+#define kvmppc_interrupt kvmppc_interrupt_hv
+#else
+#define kvmppc_interrupt kvmppc_interrupt_pr
+#endif
+
 #define __KVM_HANDLER(area, h, n)                                      \
 do_kvm_##n:                                                            \
        BEGIN_FTR_SECTION_NESTED(947)                                   \
        ld      r10,area+EX_CFAR(r13);                                  \
        std     r10,HSTATE_CFAR(r13);                                   \
        END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947);          \
+       BEGIN_FTR_SECTION_NESTED(948)                                   \
+       ld      r10,area+EX_PPR(r13);                                   \
+       std     r10,HSTATE_PPR(r13);                                    \
+       END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948);    \
        ld      r10,area+EX_R10(r13);                                   \
        stw     r9,HSTATE_SCRATCH1(r13);                                \
        ld      r9,area+EX_R9(r13);                                     \
@@ -217,6 +232,10 @@ do_kvm_##n:                                                                \
        ld      r10,area+EX_R10(r13);                                   \
        beq     89f;                                                    \
        stw     r9,HSTATE_SCRATCH1(r13);                        \
+       BEGIN_FTR_SECTION_NESTED(948)                                   \
+       ld      r9,area+EX_PPR(r13);                                    \
+       std     r9,HSTATE_PPR(r13);                                     \
+       END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948);    \
        ld      r9,area+EX_R9(r13);                                     \
        std     r12,HSTATE_SCRATCH0(r13);                       \
        li      r12,n;                                                  \
@@ -236,7 +255,7 @@ do_kvm_##n:                                                         \
 #define KVM_HANDLER_SKIP(area, h, n)
 #endif
 
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 #define KVMTEST_PR(n)                  __KVMTEST(n)
 #define KVM_HANDLER_PR(area, h, n)     __KVM_HANDLER(area, h, n)
 #define KVM_HANDLER_PR_SKIP(area, h, n)        __KVM_HANDLER_SKIP(area, h, n)
index 0e40843..41f13ce 100644 (file)
@@ -69,9 +69,9 @@ extern struct thread_info *softirq_ctx[NR_CPUS];
 
 extern void irq_ctx_init(void);
 extern void call_do_softirq(struct thread_info *tp);
-extern int call_handle_irq(int irq, void *p1,
-                          struct thread_info *tp, void *func);
+extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp);
 extern void do_IRQ(struct pt_regs *regs);
+extern void __do_irq(struct pt_regs *regs);
 
 int irq_choose_cpu(const struct cpumask *mask);
 
index ae098c4..f016bb6 100644 (file)
@@ -19,7 +19,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-       asm goto("1:\n\t"
+       asm_volatile_goto("1:\n\t"
                 "nop\n\t"
                 ".pushsection __jump_table,  \"aw\"\n\t"
                 JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
index 851bac7..1bd92fd 100644 (file)
 #define BOOK3S_HFLAG_SLB                       0x2
 #define BOOK3S_HFLAG_PAIRED_SINGLE             0x4
 #define BOOK3S_HFLAG_NATIVE_PS                 0x8
+#define BOOK3S_HFLAG_MULTI_PGSIZE              0x10
+#define BOOK3S_HFLAG_NEW_TLBIE                 0x20
 
 #define RESUME_FLAG_NV          (1<<0)  /* Reload guest nonvolatile state? */
 #define RESUME_FLAG_HOST        (1<<1)  /* Resume host? */
 #define KVM_GUEST_MODE_NONE    0
 #define KVM_GUEST_MODE_GUEST   1
 #define KVM_GUEST_MODE_SKIP    2
+#define KVM_GUEST_MODE_GUEST_HV        3
+#define KVM_GUEST_MODE_HOST_HV 4
 
 #define KVM_INST_FETCH_FAILED  -1
 
index fa19e2f..4a594b7 100644 (file)
@@ -58,16 +58,18 @@ struct hpte_cache {
        struct hlist_node list_pte_long;
        struct hlist_node list_vpte;
        struct hlist_node list_vpte_long;
+#ifdef CONFIG_PPC_BOOK3S_64
+       struct hlist_node list_vpte_64k;
+#endif
        struct rcu_head rcu_head;
        u64 host_vpn;
        u64 pfn;
        ulong slot;
        struct kvmppc_pte pte;
+       int pagesize;
 };
 
 struct kvmppc_vcpu_book3s {
-       struct kvm_vcpu vcpu;
-       struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
        struct kvmppc_sid_map sid_map[SID_MAP_NUM];
        struct {
                u64 esid;
@@ -99,6 +101,9 @@ struct kvmppc_vcpu_book3s {
        struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG];
        struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
        struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG];
+#ifdef CONFIG_PPC_BOOK3S_64
+       struct hlist_head hpte_hash_vpte_64k[HPTEG_HASH_NUM_VPTE_64K];
+#endif
        int hpte_cache_count;
        spinlock_t mmu_lock;
 };
@@ -107,8 +112,9 @@ struct kvmppc_vcpu_book3s {
 #define CONTEXT_GUEST          1
 #define CONTEXT_GUEST_END      2
 
-#define VSID_REAL      0x0fffffffffc00000ULL
-#define VSID_BAT       0x0fffffffffb00000ULL
+#define VSID_REAL      0x07ffffffffc00000ULL
+#define VSID_BAT       0x07ffffffffb00000ULL
+#define VSID_64K       0x0800000000000000ULL
 #define VSID_1T                0x1000000000000000ULL
 #define VSID_REAL_DR   0x2000000000000000ULL
 #define VSID_REAL_IR   0x4000000000000000ULL
@@ -118,11 +124,12 @@ extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong ea, ulong ea_mask)
 extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask);
 extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end);
 extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr);
-extern void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr);
 extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu);
-extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
+extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte,
+                              bool iswrite);
+extern void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
 extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
 extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size);
 extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
@@ -134,6 +141,7 @@ extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr,
 
 extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
 extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte);
 extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu);
 extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
@@ -151,7 +159,8 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
                           bool upper, u32 val);
 extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
 extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
-extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
+extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
+                       bool *writable);
 extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
                        unsigned long *rmap, long pte_index, int realmode);
 extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
@@ -172,6 +181,8 @@ extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
                        unsigned long *hpret);
 extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
                        struct kvm_memory_slot *memslot, unsigned long *map);
+extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
+                       unsigned long mask);
 
 extern void kvmppc_entry_trampoline(void);
 extern void kvmppc_hv_entry_trampoline(void);
@@ -184,11 +195,9 @@ extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
 
 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
 {
-       return container_of(vcpu, struct kvmppc_vcpu_book3s, vcpu);
+       return vcpu->arch.book3s;
 }
 
-extern void kvm_return_point(void);
-
 /* Also add subarch specific defines */
 
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
@@ -198,203 +207,6 @@ extern void kvm_return_point(void);
 #include <asm/kvm_book3s_64.h>
 #endif
 
-#ifdef CONFIG_KVM_BOOK3S_PR
-
-static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
-{
-       return to_book3s(vcpu)->hior;
-}
-
-static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
-                       unsigned long pending_now, unsigned long old_pending)
-{
-       if (pending_now)
-               vcpu->arch.shared->int_pending = 1;
-       else if (old_pending)
-               vcpu->arch.shared->int_pending = 0;
-}
-
-static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
-{
-       if ( num < 14 ) {
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-               svcpu->gpr[num] = val;
-               svcpu_put(svcpu);
-               to_book3s(vcpu)->shadow_vcpu->gpr[num] = val;
-       } else
-               vcpu->arch.gpr[num] = val;
-}
-
-static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
-{
-       if ( num < 14 ) {
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-               ulong r = svcpu->gpr[num];
-               svcpu_put(svcpu);
-               return r;
-       } else
-               return vcpu->arch.gpr[num];
-}
-
-static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
-{
-       struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-       svcpu->cr = val;
-       svcpu_put(svcpu);
-       to_book3s(vcpu)->shadow_vcpu->cr = val;
-}
-
-static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
-{
-       struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-       u32 r;
-       r = svcpu->cr;
-       svcpu_put(svcpu);
-       return r;
-}
-
-static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
-{
-       struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-       svcpu->xer = val;
-       to_book3s(vcpu)->shadow_vcpu->xer = val;
-       svcpu_put(svcpu);
-}
-
-static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
-{
-       struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-       u32 r;
-       r = svcpu->xer;
-       svcpu_put(svcpu);
-       return r;
-}
-
-static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
-{
-       struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-       svcpu->ctr = val;
-       svcpu_put(svcpu);
-}
-
-static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
-{
-       struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-       ulong r;
-       r = svcpu->ctr;
-       svcpu_put(svcpu);
-       return r;
-}
-
-static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
-{
-       struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-       svcpu->lr = val;
-       svcpu_put(svcpu);
-}
-
-static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
-{
-       struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-       ulong r;
-       r = svcpu->lr;
-       svcpu_put(svcpu);
-       return r;
-}
-
-static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
-{
-       struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-       svcpu->pc = val;
-       svcpu_put(svcpu);
-}
-
-static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
-{
-       struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-       ulong r;
-       r = svcpu->pc;
-       svcpu_put(svcpu);
-       return r;
-}
-
-static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
-{
-       ulong pc = kvmppc_get_pc(vcpu);
-       struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-       u32 r;
-
-       /* Load the instruction manually if it failed to do so in the
-        * exit path */
-       if (svcpu->last_inst == KVM_INST_FETCH_FAILED)
-               kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false);
-
-       r = svcpu->last_inst;
-       svcpu_put(svcpu);
-       return r;
-}
-
-/*
- * Like kvmppc_get_last_inst(), but for fetching a sc instruction.
- * Because the sc instruction sets SRR0 to point to the following
- * instruction, we have to fetch from pc - 4.
- */
-static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
-{
-       ulong pc = kvmppc_get_pc(vcpu) - 4;
-       struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-       u32 r;
-
-       /* Load the instruction manually if it failed to do so in the
-        * exit path */
-       if (svcpu->last_inst == KVM_INST_FETCH_FAILED)
-               kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false);
-
-       r = svcpu->last_inst;
-       svcpu_put(svcpu);
-       return r;
-}
-
-static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
-{
-       struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-       ulong r;
-       r = svcpu->fault_dar;
-       svcpu_put(svcpu);
-       return r;
-}
-
-static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
-{
-       ulong crit_raw = vcpu->arch.shared->critical;
-       ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
-       bool crit;
-
-       /* Truncate crit indicators in 32 bit mode */
-       if (!(vcpu->arch.shared->msr & MSR_SF)) {
-               crit_raw &= 0xffffffff;
-               crit_r1 &= 0xffffffff;
-       }
-
-       /* Critical section when crit == r1 */
-       crit = (crit_raw == crit_r1);
-       /* ... and we're in supervisor mode */
-       crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
-
-       return crit;
-}
-#else /* CONFIG_KVM_BOOK3S_PR */
-
-static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
-{
-       return 0;
-}
-
-static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
-                       unsigned long pending_now, unsigned long old_pending)
-{
-}
-
 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
 {
        vcpu->arch.gpr[num] = val;
@@ -489,12 +301,6 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
        return vcpu->arch.fault_dar;
 }
 
-static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
-{
-       return false;
-}
-#endif
-
 /* Magic register values loaded into r3 and r4 before the 'sc' assembly
  * instruction for the OSI hypercalls */
 #define OSI_SC_MAGIC_R3                        0x113724FA
index ce0ef6c..c720e0b 100644 (file)
@@ -22,7 +22,7 @@
 
 static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
 {
-       return to_book3s(vcpu)->shadow_vcpu;
+       return vcpu->arch.shadow_vcpu;
 }
 
 static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
index 86d638a..bf0fa8b 100644 (file)
@@ -20,7 +20,7 @@
 #ifndef __ASM_KVM_BOOK3S_64_H__
 #define __ASM_KVM_BOOK3S_64_H__
 
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
 {
        preempt_disable();
@@ -35,7 +35,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
 #define SPAPR_TCE_SHIFT                12
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 #define KVM_DEFAULT_HPT_ORDER  24      /* 16MB HPT by default */
 extern unsigned long kvm_rma_pages;
 #endif
@@ -278,7 +278,7 @@ static inline int is_vrma_hpte(unsigned long hpte_v)
                (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
 }
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
  * Note modification of an HPTE; set the HPTE modified bit
  * if anyone is interested.
@@ -289,6 +289,6 @@ static inline void note_hpte_modification(struct kvm *kvm,
        if (atomic_read(&kvm->arch.hpte_mod_interest))
                rev->guest_rpte |= HPTE_GR_MODIFIED;
 }
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #endif /* __ASM_KVM_BOOK3S_64_H__ */
index 9039d3c..0bd9348 100644 (file)
@@ -83,7 +83,7 @@ struct kvmppc_host_state {
        u8 restore_hid5;
        u8 napping;
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        u8 hwthread_req;
        u8 hwthread_state;
        u8 host_ipi;
@@ -101,6 +101,7 @@ struct kvmppc_host_state {
 #endif
 #ifdef CONFIG_PPC_BOOK3S_64
        u64 cfar;
+       u64 ppr;
 #endif
 };
 
@@ -108,14 +109,14 @@ struct kvmppc_book3s_shadow_vcpu {
        ulong gpr[14];
        u32 cr;
        u32 xer;
-
-       u32 fault_dsisr;
-       u32 last_inst;
        ulong ctr;
        ulong lr;
        ulong pc;
+
        ulong shadow_srr1;
        ulong fault_dar;
+       u32 fault_dsisr;
+       u32 last_inst;
 
 #ifdef CONFIG_PPC_BOOK3S_32
        u32     sr[16];                 /* Guest SRs */
index d3c1eb3..dd8f615 100644 (file)
 /* LPIDs we support with this build -- runtime limit may be lower */
 #define KVMPPC_NR_LPIDS                        64
 
-#define KVMPPC_INST_EHPRIV     0x7c00021c
+#define KVMPPC_INST_EHPRIV             0x7c00021c
+#define EHPRIV_OC_SHIFT                        11
+/* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */
+#define EHPRIV_OC_DEBUG                        1
+#define KVMPPC_INST_EHPRIV_DEBUG       (KVMPPC_INST_EHPRIV | \
+                                        (EHPRIV_OC_DEBUG << EHPRIV_OC_SHIFT))
 
 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
 {
index 3328353..237d1d2 100644 (file)
@@ -63,20 +63,17 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
 #endif
 
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-#define KVM_NR_PAGE_SIZES      1
-#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
-
 #define HPTEG_CACHE_NUM                        (1 << 15)
 #define HPTEG_HASH_BITS_PTE            13
 #define HPTEG_HASH_BITS_PTE_LONG       12
 #define HPTEG_HASH_BITS_VPTE           13
 #define HPTEG_HASH_BITS_VPTE_LONG      5
+#define HPTEG_HASH_BITS_VPTE_64K       11
 #define HPTEG_HASH_NUM_PTE             (1 << HPTEG_HASH_BITS_PTE)
 #define HPTEG_HASH_NUM_PTE_LONG                (1 << HPTEG_HASH_BITS_PTE_LONG)
 #define HPTEG_HASH_NUM_VPTE            (1 << HPTEG_HASH_BITS_VPTE)
 #define HPTEG_HASH_NUM_VPTE_LONG       (1 << HPTEG_HASH_BITS_VPTE_LONG)
+#define HPTEG_HASH_NUM_VPTE_64K                (1 << HPTEG_HASH_BITS_VPTE_64K)
 
 /* Physical Address Mask - allowed range of real mode RAM access */
 #define KVM_PAM                        0x0fffffffffffffffULL
@@ -89,6 +86,9 @@ struct lppaca;
 struct slb_shadow;
 struct dtl_entry;
 
+struct kvmppc_vcpu_book3s;
+struct kvmppc_book3s_shadow_vcpu;
+
 struct kvm_vm_stat {
        u32 remote_tlb_flush;
 };
@@ -224,15 +224,15 @@ struct revmap_entry {
 #define KVMPPC_GOT_PAGE                0x80
 
 struct kvm_arch_memory_slot {
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        unsigned long *rmap;
        unsigned long *slot_phys;
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 };
 
 struct kvm_arch {
        unsigned int lpid;
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        unsigned long hpt_virt;
        struct revmap_entry *revmap;
        unsigned int host_lpid;
@@ -256,7 +256,10 @@ struct kvm_arch {
        cpumask_t need_tlb_flush;
        struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
        int hpt_cma_alloc;
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+       struct mutex hpt_mutex;
+#endif
 #ifdef CONFIG_PPC_BOOK3S_64
        struct list_head spapr_tce_tables;
        struct list_head rtas_tokens;
@@ -267,6 +270,7 @@ struct kvm_arch {
 #ifdef CONFIG_KVM_XICS
        struct kvmppc_xics *xics;
 #endif
+       struct kvmppc_ops *kvm_ops;
 };
 
 /*
@@ -294,6 +298,10 @@ struct kvmppc_vcore {
        u64 stolen_tb;
        u64 preempt_tb;
        struct kvm_vcpu *runner;
+       u64 tb_offset;          /* guest timebase - host timebase */
+       ulong lpcr;
+       u32 arch_compat;
+       ulong pcr;
 };
 
 #define VCORE_ENTRY_COUNT(vc)  ((vc)->entry_exit_count & 0xff)
@@ -328,6 +336,7 @@ struct kvmppc_pte {
        bool may_read           : 1;
        bool may_write          : 1;
        bool may_execute        : 1;
+       u8 page_size;           /* MMU_PAGE_xxx */
 };
 
 struct kvmppc_mmu {
@@ -340,7 +349,8 @@ struct kvmppc_mmu {
        /* book3s */
        void (*mtsrin)(struct kvm_vcpu *vcpu, u32 srnum, ulong value);
        u32  (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum);
-       int  (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data);
+       int  (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr,
+                     struct kvmppc_pte *pte, bool data, bool iswrite);
        void (*reset_msr)(struct kvm_vcpu *vcpu);
        void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large);
        int  (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid);
@@ -360,6 +370,7 @@ struct kvmppc_slb {
        bool large      : 1;    /* PTEs are 16MB */
        bool tb         : 1;    /* 1TB segment */
        bool class      : 1;
+       u8 base_page_size;      /* MMU_PAGE_xxx */
 };
 
 # ifdef CONFIG_PPC_FSL_BOOK3E
@@ -377,17 +388,6 @@ struct kvmppc_slb {
 #define KVMPPC_EPR_USER                1 /* exit to userspace to fill EPR */
 #define KVMPPC_EPR_KERNEL      2 /* in-kernel irqchip */
 
-struct kvmppc_booke_debug_reg {
-       u32 dbcr0;
-       u32 dbcr1;
-       u32 dbcr2;
-#ifdef CONFIG_KVM_E500MC
-       u32 dbcr4;
-#endif
-       u64 iac[KVMPPC_BOOKE_MAX_IAC];
-       u64 dac[KVMPPC_BOOKE_MAX_DAC];
-};
-
 #define KVMPPC_IRQ_DEFAULT     0
 #define KVMPPC_IRQ_MPIC                1
 #define KVMPPC_IRQ_XICS                2
@@ -402,6 +402,10 @@ struct kvm_vcpu_arch {
        int slb_max;            /* 1 + index of last valid entry in slb[] */
        int slb_nr;             /* total number of entries in SLB */
        struct kvmppc_mmu mmu;
+       struct kvmppc_vcpu_book3s *book3s;
+#endif
+#ifdef CONFIG_PPC_BOOK3S_32
+       struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
 #endif
 
        ulong gpr[32];
@@ -463,6 +467,8 @@ struct kvm_vcpu_arch {
        u32 ctrl;
        ulong dabr;
        ulong cfar;
+       ulong ppr;
+       ulong shadow_srr1;
 #endif
        u32 vrsave; /* also USPRG0 */
        u32 mmucr;
@@ -498,6 +504,8 @@ struct kvm_vcpu_arch {
 
        u64 mmcr[3];
        u32 pmc[8];
+       u64 siar;
+       u64 sdar;
 
 #ifdef CONFIG_KVM_EXIT_TIMING
        struct mutex exit_timing_lock;
@@ -531,7 +539,10 @@ struct kvm_vcpu_arch {
        u32 eptcfg;
        u32 epr;
        u32 crit_save;
-       struct kvmppc_booke_debug_reg dbg_reg;
+       /* guest debug registers*/
+       struct debug_reg dbg_reg;
+       /* hardware visible debug registers when in guest state */
+       struct debug_reg shadow_dbg_reg;
 #endif
        gpa_t paddr_accessed;
        gva_t vaddr_accessed;
@@ -582,7 +593,7 @@ struct kvm_vcpu_arch {
        struct kvmppc_icp *icp; /* XICS presentation controller */
 #endif
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        struct kvm_vcpu_arch_shared shregs;
 
        unsigned long pgfault_addr;
index b15554a..c8317fb 100644 (file)
@@ -106,13 +106,6 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                        struct kvm_interrupt *irq);
 extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
-
-extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                                  unsigned int op, int *advance);
-extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn,
-                                    ulong val);
-extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn,
-                                    ulong *val);
 extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_booke_init(void);
@@ -135,17 +128,17 @@ extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
                                struct kvm_create_spapr_tce *args);
 extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
                             unsigned long ioba, unsigned long tce);
-extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
-                               struct kvm_allocate_rma *rma);
 extern struct kvm_rma_info *kvm_alloc_rma(void);
 extern void kvm_release_rma(struct kvm_rma_info *ri);
 extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
 extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
 extern void kvmppc_core_destroy_vm(struct kvm *kvm);
-extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+extern void kvmppc_core_free_memslot(struct kvm *kvm,
+                                    struct kvm_memory_slot *free,
                                     struct kvm_memory_slot *dont);
-extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+extern int kvmppc_core_create_memslot(struct kvm *kvm,
+                                     struct kvm_memory_slot *slot,
                                      unsigned long npages);
 extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
                                struct kvm_memory_slot *memslot,
@@ -177,6 +170,72 @@ extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
 extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
 extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
 
+union kvmppc_one_reg {
+       u32     wval;
+       u64     dval;
+       vector128 vval;
+       u64     vsxval[2];
+       struct {
+               u64     addr;
+               u64     length;
+       }       vpaval;
+};
+
+struct kvmppc_ops {
+       struct module *owner;
+       int (*get_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+       int (*set_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+       int (*get_one_reg)(struct kvm_vcpu *vcpu, u64 id,
+                          union kvmppc_one_reg *val);
+       int (*set_one_reg)(struct kvm_vcpu *vcpu, u64 id,
+                          union kvmppc_one_reg *val);
+       void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
+       void (*vcpu_put)(struct kvm_vcpu *vcpu);
+       void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr);
+       int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
+       struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id);
+       void (*vcpu_free)(struct kvm_vcpu *vcpu);
+       int (*check_requests)(struct kvm_vcpu *vcpu);
+       int (*get_dirty_log)(struct kvm *kvm, struct kvm_dirty_log *log);
+       void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot);
+       int (*prepare_memory_region)(struct kvm *kvm,
+                                    struct kvm_memory_slot *memslot,
+                                    struct kvm_userspace_memory_region *mem);
+       void (*commit_memory_region)(struct kvm *kvm,
+                                    struct kvm_userspace_memory_region *mem,
+                                    const struct kvm_memory_slot *old);
+       int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
+       int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
+                          unsigned long end);
+       int (*age_hva)(struct kvm *kvm, unsigned long hva);
+       int (*test_age_hva)(struct kvm *kvm, unsigned long hva);
+       void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte);
+       void (*mmu_destroy)(struct kvm_vcpu *vcpu);
+       void (*free_memslot)(struct kvm_memory_slot *free,
+                            struct kvm_memory_slot *dont);
+       int (*create_memslot)(struct kvm_memory_slot *slot,
+                             unsigned long npages);
+       int (*init_vm)(struct kvm *kvm);
+       void (*destroy_vm)(struct kvm *kvm);
+       int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info);
+       int (*emulate_op)(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                         unsigned int inst, int *advance);
+       int (*emulate_mtspr)(struct kvm_vcpu *vcpu, int sprn, ulong spr_val);
+       int (*emulate_mfspr)(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val);
+       void (*fast_vcpu_kick)(struct kvm_vcpu *vcpu);
+       long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
+                             unsigned long arg);
+
+};
+
+extern struct kvmppc_ops *kvmppc_hv_ops;
+extern struct kvmppc_ops *kvmppc_pr_ops;
+
+static inline bool is_kvmppc_hv_enabled(struct kvm *kvm)
+{
+       return kvm->arch.kvm_ops == kvmppc_hv_ops;
+}
+
 /*
  * Cuts out inst bits with ordering according to spec.
  * That means the leftmost bit is zero. All given bits are included.
@@ -210,17 +269,6 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
        return r;
 }
 
-union kvmppc_one_reg {
-       u32     wval;
-       u64     dval;
-       vector128 vval;
-       u64     vsxval[2];
-       struct {
-               u64     addr;
-               u64     length;
-       }       vpaval;
-};
-
 #define one_reg_size(id)       \
        (1ul << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
 
@@ -245,10 +293,10 @@ union kvmppc_one_reg {
        __v;                                    \
 })
 
-void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 
-void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
+int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 
 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
@@ -260,7 +308,7 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
 
 struct openpic;
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 extern void kvm_cma_reserve(void) __init;
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {
@@ -269,10 +317,10 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 
 static inline u32 kvmppc_get_xics_latch(void)
 {
-       u32 xirr = get_paca()->kvm_hstate.saved_xirr;
+       u32 xirr;
 
+       xirr = get_paca()->kvm_hstate.saved_xirr;
        get_paca()->kvm_hstate.saved_xirr = 0;
-
        return xirr;
 }
 
@@ -281,7 +329,10 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
        paca[cpu].kvm_hstate.host_ipi = host_ipi;
 }
 
-extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
+static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+       vcpu->kvm->arch.kvm_ops->fast_vcpu_kick(vcpu);
+}
 
 #else
 static inline void __init kvm_cma_reserve(void)
index a5954ce..b6ea9e0 100644 (file)
@@ -166,7 +166,7 @@ struct paca_struct {
        struct dtl_entry *dtl_curr;     /* pointer corresponding to dtl_ridx */
 
 #ifdef CONFIG_KVM_BOOK3S_HANDLER
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
        /* We use this to store guest state in */
        struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
 #endif
index e378ccc..75a9e5a 100644 (file)
@@ -147,22 +147,7 @@ typedef struct {
 #define TS_FPR(i) fpr[i][TS_FPROFFSET]
 #define TS_TRANS_FPR(i) transact_fpr[i][TS_FPROFFSET]
 
-struct thread_struct {
-       unsigned long   ksp;            /* Kernel stack pointer */
-       unsigned long   ksp_limit;      /* if ksp <= ksp_limit stack overflow */
-
-#ifdef CONFIG_PPC64
-       unsigned long   ksp_vsid;
-#endif
-       struct pt_regs  *regs;          /* Pointer to saved register state */
-       mm_segment_t    fs;             /* for get_fs() validation */
-#ifdef CONFIG_BOOKE
-       /* BookE base exception scratch space; align on cacheline */
-       unsigned long   normsave[8] ____cacheline_aligned;
-#endif
-#ifdef CONFIG_PPC32
-       void            *pgdir;         /* root of page-table tree */
-#endif
+struct debug_reg {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
        /*
         * The following help to manage the use of Debug Control Registers
@@ -199,6 +184,27 @@ struct thread_struct {
        unsigned long   dvc2;
 #endif
 #endif
+};
+
+struct thread_struct {
+       unsigned long   ksp;            /* Kernel stack pointer */
+
+#ifdef CONFIG_PPC64
+       unsigned long   ksp_vsid;
+#endif
+       struct pt_regs  *regs;          /* Pointer to saved register state */
+       mm_segment_t    fs;             /* for get_fs() validation */
+#ifdef CONFIG_BOOKE
+       /* BookE base exception scratch space; align on cacheline */
+       unsigned long   normsave[8] ____cacheline_aligned;
+#endif
+#ifdef CONFIG_PPC32
+       void            *pgdir;         /* root of page-table tree */
+       unsigned long   ksp_limit;      /* if ksp <= ksp_limit stack overflow */
+#endif
+       /* Debug Registers */
+       struct debug_reg debug;
+
        /* FP and VSX 0-31 register set */
        double          fpr[32][TS_FPRWIDTH] __attribute__((aligned(16)));
        struct {
@@ -321,7 +327,6 @@ struct thread_struct {
 #else
 #define INIT_THREAD  { \
        .ksp = INIT_SP, \
-       .ksp_limit = INIT_SP_LIMIT, \
        .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
        .fs = KERNEL_DS, \
        .fpr = {{0}}, \
index 0156702..576ad88 100644 (file)
@@ -40,7 +40,7 @@
 #define _PAGE_U1       0x010000
 #define _PAGE_U0       0x020000
 #define _PAGE_ACCESSED 0x040000
-#define _PAGE_LENDIAN  0x080000
+#define _PAGE_ENDIAN   0x080000
 #define _PAGE_GUARDED  0x100000
 #define _PAGE_COHERENT 0x200000 /* M: enforce memory coherence */
 #define _PAGE_NO_CACHE 0x400000 /* I: cache inhibit */
index 10d1ef0..e294673 100644 (file)
 #define SPRN_TBRU      0x10D   /* Time Base Read Upper Register (user, R/O) */
 #define SPRN_TBWL      0x11C   /* Time Base Lower Register (super, R/W) */
 #define SPRN_TBWU      0x11D   /* Time Base Upper Register (super, R/W) */
+#define SPRN_TBU40     0x11E   /* Timebase upper 40 bits (hyper, R/W) */
 #define SPRN_SPURR     0x134   /* Scaled PURR */
 #define SPRN_HSPRG0    0x130   /* Hypervisor Scratch 0 */
 #define SPRN_HSPRG1    0x131   /* Hypervisor Scratch 1 */
 #define   LPCR_ISL     (1ul << (63-2))
 #define   LPCR_VC_SH   (63-2)
 #define   LPCR_DPFD_SH (63-11)
+#define   LPCR_DPFD    (7ul << LPCR_DPFD_SH)
 #define   LPCR_VRMASD  (0x1ful << (63-16))
 #define   LPCR_VRMA_L  (1ul << (63-12))
 #define   LPCR_VRMA_LP0        (1ul << (63-15))
 #define     LPCR_PECE2 0x00001000      /* machine check etc can cause exit */
 #define   LPCR_MER     0x00000800      /* Mediated External Exception */
 #define   LPCR_MER_SH  11
+#define   LPCR_TC      0x00000200      /* Translation control */
 #define   LPCR_LPES    0x0000000c
 #define   LPCR_LPES0   0x00000008      /* LPAR Env selector 0 */
 #define   LPCR_LPES1   0x00000004      /* LPAR Env selector 1 */
 #define   LPID_RSVD    0x3ff           /* Reserved LPID for partn switching */
 #define        SPRN_HMER       0x150   /* Hardware m? error recovery */
 #define        SPRN_HMEER      0x151   /* Hardware m? enable error recovery */
+#define SPRN_PCR       0x152   /* Processor compatibility register */
+#define   PCR_VEC_DIS  (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
+#define   PCR_VSX_DIS  (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
+#define   PCR_ARCH_205 0x2             /* Architecture 2.05 */
 #define        SPRN_HEIR       0x153   /* Hypervisor Emulated Instruction Register */
 #define SPRN_TLBINDEXR 0x154   /* P7 TLB control register */
 #define SPRN_TLBVPNR   0x155   /* P7 TLB control register */
 #define         HID4_RMLS2_SH   (63 - 2)       /* Real mode limit bottom 2 bits */
 #define         HID4_LPID5_SH   (63 - 6)       /* partition ID bottom 4 bits */
 #define         HID4_RMOR_SH    (63 - 22)      /* real mode offset (16 bits) */
+#define  HID4_RMOR      (0xFFFFul << HID4_RMOR_SH)
 #define  HID4_LPES1     (1 << (63-57)) /* LPAR env. sel. bit 1 */
 #define  HID4_RMLS0_SH  (63 - 58)      /* Real mode limit top bit */
 #define         HID4_LPID1_SH   0              /* partition ID top 2 bits */
 #define PVR_BE         0x0070
 #define PVR_PA6T       0x0090
 
+/* "Logical" PVR values defined in PAPR, representing architecture levels */
+#define PVR_ARCH_204   0x0f000001
+#define PVR_ARCH_205   0x0f000002
+#define PVR_ARCH_206   0x0f000003
+#define PVR_ARCH_206p  0x0f100003
+#define PVR_ARCH_207   0x0f000004
+
 /* Macros for setting and retrieving special purpose registers */
 #ifndef __ASSEMBLY__
 #define mfmsr()                ({unsigned long rval; \
index ed8f836..2e31aac 100644 (file)
 #define DBCR0_IA34T    0x00004000      /* Instr Addr 3-4 range Toggle */
 #define DBCR0_FT       0x00000001      /* Freeze Timers on debug event */
 
-#define dbcr_iac_range(task)   ((task)->thread.dbcr0)
+#define dbcr_iac_range(task)   ((task)->thread.debug.dbcr0)
 #define DBCR_IAC12I    DBCR0_IA12                      /* Range Inclusive */
 #define DBCR_IAC12X    (DBCR0_IA12 | DBCR0_IA12X)      /* Range Exclusive */
 #define DBCR_IAC12MODE (DBCR0_IA12 | DBCR0_IA12X)      /* IAC 1-2 Mode Bits */
 #define DBCR1_DAC1W    0x20000000      /* DAC1 Write Debug Event */
 #define DBCR1_DAC2W    0x10000000      /* DAC2 Write Debug Event */
 
-#define dbcr_dac(task) ((task)->thread.dbcr1)
+#define dbcr_dac(task) ((task)->thread.debug.dbcr1)
 #define DBCR_DAC1R     DBCR1_DAC1R
 #define DBCR_DAC1W     DBCR1_DAC1W
 #define DBCR_DAC2R     DBCR1_DAC2R
 #define DBCR0_CRET     0x00000020      /* Critical Return Debug Event */
 #define DBCR0_FT       0x00000001      /* Freeze Timers on debug event */
 
-#define dbcr_dac(task) ((task)->thread.dbcr0)
+#define dbcr_dac(task) ((task)->thread.debug.dbcr0)
 #define DBCR_DAC1R     DBCR0_DAC1R
 #define DBCR_DAC1W     DBCR0_DAC1W
 #define DBCR_DAC2R     DBCR0_DAC2R
 #define DBCR1_IAC34MX  0x000000C0      /* Instr Addr 3-4 range eXclusive */
 #define DBCR1_IAC34AT  0x00000001      /* Instr Addr 3-4 range Toggle */
 
-#define dbcr_iac_range(task)   ((task)->thread.dbcr1)
+#define dbcr_iac_range(task)   ((task)->thread.debug.dbcr1)
 #define DBCR_IAC12I    DBCR1_IAC12M    /* Range Inclusive */
 #define DBCR_IAC12X    DBCR1_IAC12MX   /* Range Exclusive */
 #define DBCR_IAC12MODE DBCR1_IAC12MX   /* IAC 1-2 Mode Bits */
index 2be5618..9ee1261 100644 (file)
@@ -35,6 +35,7 @@ extern void giveup_vsx(struct task_struct *);
 extern void enable_kernel_spe(void);
 extern void giveup_spe(struct task_struct *);
 extern void load_up_spe(struct task_struct *);
+extern void switch_booke_debug_regs(struct thread_struct *new_thread);
 
 #ifndef CONFIG_SMP
 extern void discard_lazy_cpu_state(void);
index 0fb1a6e..6836ec7 100644 (file)
@@ -27,6 +27,7 @@
 #define __KVM_HAVE_PPC_SMT
 #define __KVM_HAVE_IRQCHIP
 #define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_GUEST_DEBUG
 
 struct kvm_regs {
        __u64 pc;
@@ -269,7 +270,24 @@ struct kvm_fpu {
        __u64 fpr[32];
 };
 
+/*
+ * Defines for h/w breakpoint, watchpoint (read, write or both) and
+ * software breakpoint.
+ * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
+ * for KVM_DEBUG_EXIT.
+ */
+#define KVMPPC_DEBUG_NONE              0x0
+#define KVMPPC_DEBUG_BREAKPOINT                (1UL << 1)
+#define KVMPPC_DEBUG_WATCH_WRITE       (1UL << 2)
+#define KVMPPC_DEBUG_WATCH_READ                (1UL << 3)
 struct kvm_debug_exit_arch {
+       __u64 address;
+       /*
+        * exiting to userspace because of h/w breakpoint, watchpoint
+        * (read, write or both) and software breakpoint.
+        */
+       __u32 status;
+       __u32 reserved;
 };
 
 /* for KVM_SET_GUEST_DEBUG */
@@ -281,10 +299,6 @@ struct kvm_guest_debug_arch {
                 * Type denotes h/w breakpoint, read watchpoint, write
                 * watchpoint or watchpoint (both read and write).
                 */
-#define KVMPPC_DEBUG_NONE              0x0
-#define KVMPPC_DEBUG_BREAKPOINT                (1UL << 1)
-#define KVMPPC_DEBUG_WATCH_WRITE       (1UL << 2)
-#define KVMPPC_DEBUG_WATCH_READ                (1UL << 3)
                __u32 type;
                __u32 reserved;
        } bp[16];
@@ -429,6 +443,11 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_MMCR0      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10)
 #define KVM_REG_PPC_MMCR1      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11)
 #define KVM_REG_PPC_MMCRA      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12)
+#define KVM_REG_PPC_MMCR2      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x13)
+#define KVM_REG_PPC_MMCRS      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x14)
+#define KVM_REG_PPC_SIAR       (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x15)
+#define KVM_REG_PPC_SDAR       (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x16)
+#define KVM_REG_PPC_SIER       (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x17)
 
 #define KVM_REG_PPC_PMC1       (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18)
 #define KVM_REG_PPC_PMC2       (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19)
@@ -499,6 +518,65 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_TLB3PS     (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
 #define KVM_REG_PPC_EPTCFG     (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
 
+/* Timebase offset */
+#define KVM_REG_PPC_TB_OFFSET  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9c)
+
+/* POWER8 registers */
+#define KVM_REG_PPC_SPMC1      (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9d)
+#define KVM_REG_PPC_SPMC2      (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9e)
+#define KVM_REG_PPC_IAMR       (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9f)
+#define KVM_REG_PPC_TFHAR      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa0)
+#define KVM_REG_PPC_TFIAR      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa1)
+#define KVM_REG_PPC_TEXASR     (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa2)
+#define KVM_REG_PPC_FSCR       (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa3)
+#define KVM_REG_PPC_PSPB       (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xa4)
+#define KVM_REG_PPC_EBBHR      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa5)
+#define KVM_REG_PPC_EBBRR      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa6)
+#define KVM_REG_PPC_BESCR      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa7)
+#define KVM_REG_PPC_TAR                (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa8)
+#define KVM_REG_PPC_DPDES      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa9)
+#define KVM_REG_PPC_DAWR       (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaa)
+#define KVM_REG_PPC_DAWRX      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xab)
+#define KVM_REG_PPC_CIABR      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xac)
+#define KVM_REG_PPC_IC         (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xad)
+#define KVM_REG_PPC_VTB                (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xae)
+#define KVM_REG_PPC_CSIGR      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaf)
+#define KVM_REG_PPC_TACR       (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb0)
+#define KVM_REG_PPC_TCSCR      (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1)
+#define KVM_REG_PPC_PID                (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2)
+#define KVM_REG_PPC_ACOP       (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3)
+
+#define KVM_REG_PPC_VRSAVE     (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4)
+#define KVM_REG_PPC_LPCR       (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5)
+#define KVM_REG_PPC_PPR                (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb6)
+
+/* Architecture compatibility level */
+#define KVM_REG_PPC_ARCH_COMPAT        (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7)
+
+/* Transactional Memory checkpointed state:
+ * This is all GPRs, all VSX regs and a subset of SPRs
+ */
+#define KVM_REG_PPC_TM         (KVM_REG_PPC | 0x80000000)
+/* TM GPRs */
+#define KVM_REG_PPC_TM_GPR0    (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0)
+#define KVM_REG_PPC_TM_GPR(n)  (KVM_REG_PPC_TM_GPR0 + (n))
+#define KVM_REG_PPC_TM_GPR31   (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x1f)
+/* TM VSX */
+#define KVM_REG_PPC_TM_VSR0    (KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x20)
+#define KVM_REG_PPC_TM_VSR(n)  (KVM_REG_PPC_TM_VSR0 + (n))
+#define KVM_REG_PPC_TM_VSR63   (KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x5f)
+/* TM SPRS */
+#define KVM_REG_PPC_TM_CR      (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x60)
+#define KVM_REG_PPC_TM_LR      (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x61)
+#define KVM_REG_PPC_TM_CTR     (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x62)
+#define KVM_REG_PPC_TM_FPSCR   (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x63)
+#define KVM_REG_PPC_TM_AMR     (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x64)
+#define KVM_REG_PPC_TM_PPR     (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x65)
+#define KVM_REG_PPC_TM_VRSAVE  (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x66)
+#define KVM_REG_PPC_TM_VSCR    (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67)
+#define KVM_REG_PPC_TM_DSCR    (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68)
+#define KVM_REG_PPC_TM_TAR     (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69)
+
 /* PPC64 eXternal Interrupt Controller Specification */
 #define KVM_DEV_XICS_GRP_SOURCES       1       /* 64-bit source attributes */
 
index d8958be..479b036 100644 (file)
@@ -80,10 +80,11 @@ int main(void)
        DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr));
 #else
        DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
+       DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
+       DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
 #endif /* CONFIG_PPC64 */
 
        DEFINE(KSP, offsetof(struct thread_struct, ksp));
-       DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
        DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
 #ifdef CONFIG_BOOKE
        DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0]));
@@ -113,7 +114,7 @@ int main(void)
 #endif /* CONFIG_SPE */
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-       DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0));
+       DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, debug.dbcr0));
 #endif
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
        DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
@@ -445,7 +446,7 @@ int main(void)
        DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
        DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
        DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr));
        DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0));
        DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1));
@@ -476,7 +477,7 @@ int main(void)
        DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
 
        /* book3s */
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
        DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
        DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
@@ -508,6 +509,8 @@ int main(void)
        DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
        DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
        DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
+       DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar));
+       DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar));
        DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
        DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
        DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
@@ -517,18 +520,22 @@ int main(void)
        DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
        DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid));
        DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
+       DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
+       DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
        DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
        DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
        DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
        DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
-       DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
-                          offsetof(struct kvmppc_vcpu_book3s, vcpu));
+       DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
+       DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
+       DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr));
        DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
        DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
        DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
 
 #ifdef CONFIG_PPC_BOOK3S_64
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+       DEFINE(PACA_SVCPU, offsetof(struct paca_struct, shadow_vcpu));
 # define SVCPU_FIELD(x, f)     DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f))
 #else
 # define SVCPU_FIELD(x, f)
@@ -580,7 +587,7 @@ int main(void)
        HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
        HSTATE_FIELD(HSTATE_NAPPING, napping);
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req);
        HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
        HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
@@ -596,10 +603,11 @@ int main(void)
        HSTATE_FIELD(HSTATE_DABR, dabr);
        HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
        DEFINE(IPI_PRIORITY, IPI_PRIORITY);
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #ifdef CONFIG_PPC_BOOK3S_64
        HSTATE_FIELD(HSTATE_CFAR, cfar);
+       HSTATE_FIELD(HSTATE_PPR, ppr);
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 #else /* CONFIG_PPC_BOOK3S */
index 3a9ed6a..9f905e4 100644 (file)
@@ -126,7 +126,7 @@ BEGIN_FTR_SECTION
        bgt     cr1,.
        GET_PACA(r13)
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        li      r0,KVM_HWTHREAD_IN_KERNEL
        stb     r0,HSTATE_HWTHREAD_STATE(r13)
        /* Order setting hwthread_state vs. testing hwthread_req */
@@ -425,7 +425,7 @@ data_access_check_stab:
        mfspr   r9,SPRN_DSISR
        srdi    r10,r10,60
        rlwimi  r10,r9,16,0x20
-#ifdef CONFIG_KVM_BOOK3S_PR
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
        lbz     r9,HSTATE_IN_GUEST(r13)
        rlwimi  r10,r9,8,0x300
 #endif
@@ -650,6 +650,32 @@ slb_miss_user_pseries:
        b       .                               /* prevent spec. execution */
 #endif /* __DISABLED__ */
 
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+kvmppc_skip_interrupt:
+       /*
+        * Here all GPRs are unchanged from when the interrupt happened
+        * except for r13, which is saved in SPRG_SCRATCH0.
+        */
+       mfspr   r13, SPRN_SRR0
+       addi    r13, r13, 4
+       mtspr   SPRN_SRR0, r13
+       GET_SCRATCH0(r13)
+       rfid
+       b       .
+
+kvmppc_skip_Hinterrupt:
+       /*
+        * Here all GPRs are unchanged from when the interrupt happened
+        * except for r13, which is saved in SPRG_SCRATCH0.
+        */
+       mfspr   r13, SPRN_HSRR0
+       addi    r13, r13, 4
+       mtspr   SPRN_HSRR0, r13
+       GET_SCRATCH0(r13)
+       hrfid
+       b       .
+#endif
+
 /*
  * Code from here down to __end_handlers is invoked from the
  * exception prologs above.  Because the prologs assemble the
index e11863f..847e40e 100644 (file)
@@ -84,7 +84,7 @@ _GLOBAL(power7_nap)
        std     r9,_MSR(r1)
        std     r1,PACAR1(r13)
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        /* Tell KVM we're napping */
        li      r4,KVM_HWTHREAD_IN_NAP
        stb     r4,HSTATE_HWTHREAD_STATE(r13)
index 0adab06..572bb5b 100644 (file)
@@ -661,7 +661,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
        /* number of bytes needed for the bitmap */
        sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
 
-       page = alloc_pages_node(nid, GFP_ATOMIC, get_order(sz));
+       page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz));
        if (!page)
                panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
        tbl->it_map = page_address(page);
index c69440c..c7cb8c2 100644 (file)
@@ -441,50 +441,6 @@ void migrate_irqs(void)
 }
 #endif
 
-static inline void handle_one_irq(unsigned int irq)
-{
-       struct thread_info *curtp, *irqtp;
-       unsigned long saved_sp_limit;
-       struct irq_desc *desc;
-
-       desc = irq_to_desc(irq);
-       if (!desc)
-               return;
-
-       /* Switch to the irq stack to handle this */
-       curtp = current_thread_info();
-       irqtp = hardirq_ctx[smp_processor_id()];
-
-       if (curtp == irqtp) {
-               /* We're already on the irq stack, just handle it */
-               desc->handle_irq(irq, desc);
-               return;
-       }
-
-       saved_sp_limit = current->thread.ksp_limit;
-
-       irqtp->task = curtp->task;
-       irqtp->flags = 0;
-
-       /* Copy the softirq bits in preempt_count so that the
-        * softirq checks work in the hardirq context. */
-       irqtp->preempt_count = (irqtp->preempt_count & ~SOFTIRQ_MASK) |
-                              (curtp->preempt_count & SOFTIRQ_MASK);
-
-       current->thread.ksp_limit = (unsigned long)irqtp +
-               _ALIGN_UP(sizeof(struct thread_info), 16);
-
-       call_handle_irq(irq, desc, irqtp, desc->handle_irq);
-       current->thread.ksp_limit = saved_sp_limit;
-       irqtp->task = NULL;
-
-       /* Set any flag that may have been set on the
-        * alternate stack
-        */
-       if (irqtp->flags)
-               set_bits(irqtp->flags, &curtp->flags);
-}
-
 static inline void check_stack_overflow(void)
 {
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
@@ -501,9 +457,9 @@ static inline void check_stack_overflow(void)
 #endif
 }
 
-void do_IRQ(struct pt_regs *regs)
+void __do_irq(struct pt_regs *regs)
 {
-       struct pt_regs *old_regs = set_irq_regs(regs);
+       struct irq_desc *desc;
        unsigned int irq;
 
        irq_enter();
@@ -519,18 +475,57 @@ void do_IRQ(struct pt_regs *regs)
         */
        irq = ppc_md.get_irq();
 
-       /* We can hard enable interrupts now */
+       /* We can hard enable interrupts now to allow perf interrupts */
        may_hard_irq_enable();
 
        /* And finally process it */
-       if (irq != NO_IRQ)
-               handle_one_irq(irq);
-       else
+       if (unlikely(irq == NO_IRQ))
                __get_cpu_var(irq_stat).spurious_irqs++;
+       else {
+               desc = irq_to_desc(irq);
+               if (likely(desc))
+                       desc->handle_irq(irq, desc);
+       }
 
        trace_irq_exit(regs);
 
        irq_exit();
+}
+
+void do_IRQ(struct pt_regs *regs)
+{
+       struct pt_regs *old_regs = set_irq_regs(regs);
+       struct thread_info *curtp, *irqtp, *sirqtp;
+
+       /* Switch to the irq stack to handle this */
+       curtp = current_thread_info();
+       irqtp = hardirq_ctx[raw_smp_processor_id()];
+       sirqtp = softirq_ctx[raw_smp_processor_id()];
+
+       /* Already there ? */
+       if (unlikely(curtp == irqtp || curtp == sirqtp)) {
+               __do_irq(regs);
+               set_irq_regs(old_regs);
+               return;
+       }
+
+       /* Prepare the thread_info in the irq stack */
+       irqtp->task = curtp->task;
+       irqtp->flags = 0;
+
+       /* Copy the preempt_count so that the [soft]irq checks work. */
+       irqtp->preempt_count = curtp->preempt_count;
+
+       /* Switch stack and call */
+       call_do_irq(regs, irqtp);
+
+       /* Restore stack limit */
+       irqtp->task = NULL;
+
+       /* Copy back updates to the thread_info */
+       if (irqtp->flags)
+               set_bits(irqtp->flags, &curtp->flags);
+
        set_irq_regs(old_regs);
 }
 
@@ -592,28 +587,22 @@ void irq_ctx_init(void)
                memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
                tp = softirq_ctx[i];
                tp->cpu = i;
-               tp->preempt_count = 0;
 
                memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
                tp = hardirq_ctx[i];
                tp->cpu = i;
-               tp->preempt_count = HARDIRQ_OFFSET;
        }
 }
 
 static inline void do_softirq_onstack(void)
 {
        struct thread_info *curtp, *irqtp;
-       unsigned long saved_sp_limit = current->thread.ksp_limit;
 
        curtp = current_thread_info();
        irqtp = softirq_ctx[smp_processor_id()];
        irqtp->task = curtp->task;
        irqtp->flags = 0;
-       current->thread.ksp_limit = (unsigned long)irqtp +
-                                   _ALIGN_UP(sizeof(struct thread_info), 16);
        call_do_softirq(irqtp);
-       current->thread.ksp_limit = saved_sp_limit;
        irqtp->task = NULL;
 
        /* Set any flag that may have been set on the
index 777d999..2b0ad98 100644 (file)
 
        .text
 
+/*
+ * We store the saved ksp_limit in the unused part
+ * of the STACK_FRAME_OVERHEAD
+ */
 _GLOBAL(call_do_softirq)
        mflr    r0
        stw     r0,4(r1)
+       lwz     r10,THREAD+KSP_LIMIT(r2)
+       addi    r11,r3,THREAD_INFO_GAP
        stwu    r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
        mr      r1,r3
+       stw     r10,8(r1)
+       stw     r11,THREAD+KSP_LIMIT(r2)
        bl      __do_softirq
+       lwz     r10,8(r1)
        lwz     r1,0(r1)
        lwz     r0,4(r1)
+       stw     r10,THREAD+KSP_LIMIT(r2)
        mtlr    r0
        blr
 
-_GLOBAL(call_handle_irq)
+_GLOBAL(call_do_irq)
        mflr    r0
        stw     r0,4(r1)
-       mtctr   r6
-       stwu    r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5)
-       mr      r1,r5
-       bctrl
+       lwz     r10,THREAD+KSP_LIMIT(r2)
+       addi    r11,r3,THREAD_INFO_GAP
+       stwu    r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
+       mr      r1,r4
+       stw     r10,8(r1)
+       stw     r11,THREAD+KSP_LIMIT(r2)
+       bl      __do_irq
+       lwz     r10,8(r1)
        lwz     r1,0(r1)
        lwz     r0,4(r1)
+       stw     r10,THREAD+KSP_LIMIT(r2)
        mtlr    r0
        blr
 
index 971d7e7..e59caf8 100644 (file)
@@ -40,14 +40,12 @@ _GLOBAL(call_do_softirq)
        mtlr    r0
        blr
 
-_GLOBAL(call_handle_irq)
-       ld      r8,0(r6)
+_GLOBAL(call_do_irq)
        mflr    r0
        std     r0,16(r1)
-       mtctr   r8
-       stdu    r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5)
-       mr      r1,r5
-       bctrl
+       stdu    r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
+       mr      r1,r4
+       bl      .__do_irq
        ld      r1,0(r1)
        ld      r0,16(r1)
        mtlr    r0
index 6f428da..ec5ae55 100644 (file)
@@ -314,28 +314,28 @@ static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk);
  */
 static void set_debug_reg_defaults(struct thread_struct *thread)
 {
-       thread->iac1 = thread->iac2 = 0;
+       thread->debug.iac1 = thread->debug.iac2 = 0;
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
-       thread->iac3 = thread->iac4 = 0;
+       thread->debug.iac3 = thread->debug.iac4 = 0;
 #endif
-       thread->dac1 = thread->dac2 = 0;
+       thread->debug.dac1 = thread->debug.dac2 = 0;
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
-       thread->dvc1 = thread->dvc2 = 0;
+       thread->debug.dvc1 = thread->debug.dvc2 = 0;
 #endif
-       thread->dbcr0 = 0;
+       thread->debug.dbcr0 = 0;
 #ifdef CONFIG_BOOKE
        /*
         * Force User/Supervisor bits to b11 (user-only MSR[PR]=1)
         */
-       thread->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |   \
+       thread->debug.dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
                        DBCR1_IAC3US | DBCR1_IAC4US;
        /*
         * Force Data Address Compare User/Supervisor bits to be User-only
         * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0.
         */
-       thread->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+       thread->debug.dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
 #else
-       thread->dbcr1 = 0;
+       thread->debug.dbcr1 = 0;
 #endif
 }
 
@@ -348,22 +348,22 @@ static void prime_debug_regs(struct thread_struct *thread)
         */
        mtmsr(mfmsr() & ~MSR_DE);
 
-       mtspr(SPRN_IAC1, thread->iac1);
-       mtspr(SPRN_IAC2, thread->iac2);
+       mtspr(SPRN_IAC1, thread->debug.iac1);
+       mtspr(SPRN_IAC2, thread->debug.iac2);
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
-       mtspr(SPRN_IAC3, thread->iac3);
-       mtspr(SPRN_IAC4, thread->iac4);
+       mtspr(SPRN_IAC3, thread->debug.iac3);
+       mtspr(SPRN_IAC4, thread->debug.iac4);
 #endif
-       mtspr(SPRN_DAC1, thread->dac1);
-       mtspr(SPRN_DAC2, thread->dac2);
+       mtspr(SPRN_DAC1, thread->debug.dac1);
+       mtspr(SPRN_DAC2, thread->debug.dac2);
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
-       mtspr(SPRN_DVC1, thread->dvc1);
-       mtspr(SPRN_DVC2, thread->dvc2);
+       mtspr(SPRN_DVC1, thread->debug.dvc1);
+       mtspr(SPRN_DVC2, thread->debug.dvc2);
 #endif
-       mtspr(SPRN_DBCR0, thread->dbcr0);
-       mtspr(SPRN_DBCR1, thread->dbcr1);
+       mtspr(SPRN_DBCR0, thread->debug.dbcr0);
+       mtspr(SPRN_DBCR1, thread->debug.dbcr1);
 #ifdef CONFIG_BOOKE
-       mtspr(SPRN_DBCR2, thread->dbcr2);
+       mtspr(SPRN_DBCR2, thread->debug.dbcr2);
 #endif
 }
 /*
@@ -371,12 +371,13 @@ static void prime_debug_regs(struct thread_struct *thread)
  * debug registers, set the debug registers from the values
  * stored in the new thread.
  */
-static void switch_booke_debug_regs(struct thread_struct *new_thread)
+void switch_booke_debug_regs(struct thread_struct *new_thread)
 {
-       if ((current->thread.dbcr0 & DBCR0_IDM)
-               || (new_thread->dbcr0 & DBCR0_IDM))
+       if ((current->thread.debug.dbcr0 & DBCR0_IDM)
+               || (new_thread->debug.dbcr0 & DBCR0_IDM))
                        prime_debug_regs(new_thread);
 }
+EXPORT_SYMBOL_GPL(switch_booke_debug_regs);
 #else  /* !CONFIG_PPC_ADV_DEBUG_REGS */
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
 static void set_debug_reg_defaults(struct thread_struct *thread)
@@ -1000,9 +1001,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
        kregs = (struct pt_regs *) sp;
        sp -= STACK_FRAME_OVERHEAD;
        p->thread.ksp = sp;
+#ifdef CONFIG_PPC32
        p->thread.ksp_limit = (unsigned long)task_stack_page(p) +
                                _ALIGN_UP(sizeof(struct thread_info), 16);
-
+#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        p->thread.ptrace_bps[0] = NULL;
 #endif
index 12e656f..5fe2842 100644 (file)
@@ -196,6 +196,8 @@ static int __initdata mem_reserve_cnt;
 
 static cell_t __initdata regbuf[1024];
 
+static bool rtas_has_query_cpu_stopped;
+
 
 /*
  * Error results ... some OF calls will return "-1" on error, some
@@ -1574,6 +1576,11 @@ static void __init prom_instantiate_rtas(void)
        prom_setprop(rtas_node, "/rtas", "linux,rtas-entry",
                     &val, sizeof(val));
 
+       /* Check if it supports "query-cpu-stopped-state" */
+       if (prom_getprop(rtas_node, "query-cpu-stopped-state",
+                        &val, sizeof(val)) != PROM_ERROR)
+               rtas_has_query_cpu_stopped = true;
+
 #if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__)
        /* PowerVN takeover hack */
        prom_rtas_data = base;
@@ -1815,6 +1822,18 @@ static void __init prom_hold_cpus(void)
                = (void *) LOW_ADDR(__secondary_hold_acknowledge);
        unsigned long secondary_hold = LOW_ADDR(__secondary_hold);
 
+       /*
+        * On pseries, if RTAS supports "query-cpu-stopped-state",
+        * we skip this stage, the CPUs will be started by the
+        * kernel using RTAS.
+        */
+       if ((of_platform == PLATFORM_PSERIES ||
+            of_platform == PLATFORM_PSERIES_LPAR) &&
+           rtas_has_query_cpu_stopped) {
+               prom_printf("prom_hold_cpus: skipped\n");
+               return;
+       }
+
        prom_debug("prom_hold_cpus: start...\n");
        prom_debug("    1) spinloop       = 0x%x\n", (unsigned long)spinloop);
        prom_debug("    1) *spinloop      = 0x%x\n", *spinloop);
@@ -3011,6 +3030,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
         * On non-powermacs, put all CPUs in spin-loops.
         *
         * PowerMacs use a different mechanism to spin CPUs
+        *
+        * (This must be done after instanciating RTAS)
         */
        if (of_platform != PLATFORM_POWERMAC &&
            of_platform != PLATFORM_OPAL)
index 9a0d24c..ddaf178 100644 (file)
@@ -854,8 +854,8 @@ void user_enable_single_step(struct task_struct *task)
 
        if (regs != NULL) {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-               task->thread.dbcr0 &= ~DBCR0_BT;
-               task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+               task->thread.debug.dbcr0 &= ~DBCR0_BT;
+               task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
                regs->msr |= MSR_DE;
 #else
                regs->msr &= ~MSR_BE;
@@ -871,8 +871,8 @@ void user_enable_block_step(struct task_struct *task)
 
        if (regs != NULL) {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-               task->thread.dbcr0 &= ~DBCR0_IC;
-               task->thread.dbcr0 = DBCR0_IDM | DBCR0_BT;
+               task->thread.debug.dbcr0 &= ~DBCR0_IC;
+               task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT;
                regs->msr |= MSR_DE;
 #else
                regs->msr &= ~MSR_SE;
@@ -894,16 +894,16 @@ void user_disable_single_step(struct task_struct *task)
                 * And, after doing so, if all debug flags are off, turn
                 * off DBCR0(IDM) and MSR(DE) .... Torez
                 */
-               task->thread.dbcr0 &= ~DBCR0_IC;
+               task->thread.debug.dbcr0 &= ~DBCR0_IC;
                /*
                 * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set.
                 */
-               if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0,
-                                       task->thread.dbcr1)) {
+               if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+                                       task->thread.debug.dbcr1)) {
                        /*
                         * All debug events were off.....
                         */
-                       task->thread.dbcr0 &= ~DBCR0_IDM;
+                       task->thread.debug.dbcr0 &= ~DBCR0_IDM;
                        regs->msr &= ~MSR_DE;
                }
 #else
@@ -1022,14 +1022,14 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
         */
 
        /* DAC's hold the whole address without any mode flags */
-       task->thread.dac1 = data & ~0x3UL;
+       task->thread.debug.dac1 = data & ~0x3UL;
 
-       if (task->thread.dac1 == 0) {
+       if (task->thread.debug.dac1 == 0) {
                dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
-               if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0,
-                                       task->thread.dbcr1)) {
+               if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+                                       task->thread.debug.dbcr1)) {
                        task->thread.regs->msr &= ~MSR_DE;
-                       task->thread.dbcr0 &= ~DBCR0_IDM;
+                       task->thread.debug.dbcr0 &= ~DBCR0_IDM;
                }
                return 0;
        }
@@ -1041,7 +1041,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 
        /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
           register */
-       task->thread.dbcr0 |= DBCR0_IDM;
+       task->thread.debug.dbcr0 |= DBCR0_IDM;
 
        /* Check for write and read flags and set DBCR0
           accordingly */
@@ -1071,10 +1071,10 @@ static long set_instruction_bp(struct task_struct *child,
                              struct ppc_hw_breakpoint *bp_info)
 {
        int slot;
-       int slot1_in_use = ((child->thread.dbcr0 & DBCR0_IAC1) != 0);
-       int slot2_in_use = ((child->thread.dbcr0 & DBCR0_IAC2) != 0);
-       int slot3_in_use = ((child->thread.dbcr0 & DBCR0_IAC3) != 0);
-       int slot4_in_use = ((child->thread.dbcr0 & DBCR0_IAC4) != 0);
+       int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0);
+       int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0);
+       int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0);
+       int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0);
 
        if (dbcr_iac_range(child) & DBCR_IAC12MODE)
                slot2_in_use = 1;
@@ -1093,9 +1093,9 @@ static long set_instruction_bp(struct task_struct *child,
                /* We need a pair of IAC regsisters */
                if ((!slot1_in_use) && (!slot2_in_use)) {
                        slot = 1;
-                       child->thread.iac1 = bp_info->addr;
-                       child->thread.iac2 = bp_info->addr2;
-                       child->thread.dbcr0 |= DBCR0_IAC1;
+                       child->thread.debug.iac1 = bp_info->addr;
+                       child->thread.debug.iac2 = bp_info->addr2;
+                       child->thread.debug.dbcr0 |= DBCR0_IAC1;
                        if (bp_info->addr_mode ==
                                        PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
                                dbcr_iac_range(child) |= DBCR_IAC12X;
@@ -1104,9 +1104,9 @@ static long set_instruction_bp(struct task_struct *child,
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
                } else if ((!slot3_in_use) && (!slot4_in_use)) {
                        slot = 3;
-                       child->thread.iac3 = bp_info->addr;
-                       child->thread.iac4 = bp_info->addr2;
-                       child->thread.dbcr0 |= DBCR0_IAC3;
+                       child->thread.debug.iac3 = bp_info->addr;
+                       child->thread.debug.iac4 = bp_info->addr2;
+                       child->thread.debug.dbcr0 |= DBCR0_IAC3;
                        if (bp_info->addr_mode ==
                                        PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
                                dbcr_iac_range(child) |= DBCR_IAC34X;
@@ -1126,30 +1126,30 @@ static long set_instruction_bp(struct task_struct *child,
                         */
                        if (slot2_in_use || (slot3_in_use == slot4_in_use)) {
                                slot = 1;
-                               child->thread.iac1 = bp_info->addr;
-                               child->thread.dbcr0 |= DBCR0_IAC1;
+                               child->thread.debug.iac1 = bp_info->addr;
+                               child->thread.debug.dbcr0 |= DBCR0_IAC1;
                                goto out;
                        }
                }
                if (!slot2_in_use) {
                        slot = 2;
-                       child->thread.iac2 = bp_info->addr;
-                       child->thread.dbcr0 |= DBCR0_IAC2;
+                       child->thread.debug.iac2 = bp_info->addr;
+                       child->thread.debug.dbcr0 |= DBCR0_IAC2;
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
                } else if (!slot3_in_use) {
                        slot = 3;
-                       child->thread.iac3 = bp_info->addr;
-                       child->thread.dbcr0 |= DBCR0_IAC3;
+                       child->thread.debug.iac3 = bp_info->addr;
+                       child->thread.debug.dbcr0 |= DBCR0_IAC3;
                } else if (!slot4_in_use) {
                        slot = 4;
-                       child->thread.iac4 = bp_info->addr;
-                       child->thread.dbcr0 |= DBCR0_IAC4;
+                       child->thread.debug.iac4 = bp_info->addr;
+                       child->thread.debug.dbcr0 |= DBCR0_IAC4;
 #endif
                } else
                        return -ENOSPC;
        }
 out:
-       child->thread.dbcr0 |= DBCR0_IDM;
+       child->thread.debug.dbcr0 |= DBCR0_IDM;
        child->thread.regs->msr |= MSR_DE;
 
        return slot;
@@ -1159,49 +1159,49 @@ static int del_instruction_bp(struct task_struct *child, int slot)
 {
        switch (slot) {
        case 1:
-               if ((child->thread.dbcr0 & DBCR0_IAC1) == 0)
+               if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0)
                        return -ENOENT;
 
                if (dbcr_iac_range(child) & DBCR_IAC12MODE) {
                        /* address range - clear slots 1 & 2 */
-                       child->thread.iac2 = 0;
+                       child->thread.debug.iac2 = 0;
                        dbcr_iac_range(child) &= ~DBCR_IAC12MODE;
                }
-               child->thread.iac1 = 0;
-               child->thread.dbcr0 &= ~DBCR0_IAC1;
+               child->thread.debug.iac1 = 0;
+               child->thread.debug.dbcr0 &= ~DBCR0_IAC1;
                break;
        case 2:
-               if ((child->thread.dbcr0 & DBCR0_IAC2) == 0)
+               if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0)
                        return -ENOENT;
 
                if (dbcr_iac_range(child) & DBCR_IAC12MODE)
                        /* used in a range */
                        return -EINVAL;
-               child->thread.iac2 = 0;
-               child->thread.dbcr0 &= ~DBCR0_IAC2;
+               child->thread.debug.iac2 = 0;
+               child->thread.debug.dbcr0 &= ~DBCR0_IAC2;
                break;
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
        case 3:
-               if ((child->thread.dbcr0 & DBCR0_IAC3) == 0)
+               if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0)
                        return -ENOENT;
 
                if (dbcr_iac_range(child) & DBCR_IAC34MODE) {
                        /* address range - clear slots 3 & 4 */
-                       child->thread.iac4 = 0;
+                       child->thread.debug.iac4 = 0;
                        dbcr_iac_range(child) &= ~DBCR_IAC34MODE;
                }
-               child->thread.iac3 = 0;
-               child->thread.dbcr0 &= ~DBCR0_IAC3;
+               child->thread.debug.iac3 = 0;
+               child->thread.debug.dbcr0 &= ~DBCR0_IAC3;
                break;
        case 4:
-               if ((child->thread.dbcr0 & DBCR0_IAC4) == 0)
+               if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0)
                        return -ENOENT;
 
                if (dbcr_iac_range(child) & DBCR_IAC34MODE)
                        /* Used in a range */
                        return -EINVAL;
-               child->thread.iac4 = 0;
-               child->thread.dbcr0 &= ~DBCR0_IAC4;
+               child->thread.debug.iac4 = 0;
+               child->thread.debug.dbcr0 &= ~DBCR0_IAC4;
                break;
 #endif
        default:
@@ -1231,18 +1231,18 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
                        dbcr_dac(child) |= DBCR_DAC1R;
                if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
                        dbcr_dac(child) |= DBCR_DAC1W;
-               child->thread.dac1 = (unsigned long)bp_info->addr;
+               child->thread.debug.dac1 = (unsigned long)bp_info->addr;
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
                if (byte_enable) {
-                       child->thread.dvc1 =
+                       child->thread.debug.dvc1 =
                                (unsigned long)bp_info->condition_value;
-                       child->thread.dbcr2 |=
+                       child->thread.debug.dbcr2 |=
                                ((byte_enable << DBCR2_DVC1BE_SHIFT) |
                                 (condition_mode << DBCR2_DVC1M_SHIFT));
                }
 #endif
 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-       } else if (child->thread.dbcr2 & DBCR2_DAC12MODE) {
+       } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
                /* Both dac1 and dac2 are part of a range */
                return -ENOSPC;
 #endif
@@ -1252,19 +1252,19 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
                        dbcr_dac(child) |= DBCR_DAC2R;
                if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
                        dbcr_dac(child) |= DBCR_DAC2W;
-               child->thread.dac2 = (unsigned long)bp_info->addr;
+               child->thread.debug.dac2 = (unsigned long)bp_info->addr;
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
                if (byte_enable) {
-                       child->thread.dvc2 =
+                       child->thread.debug.dvc2 =
                                (unsigned long)bp_info->condition_value;
-                       child->thread.dbcr2 |=
+                       child->thread.debug.dbcr2 |=
                                ((byte_enable << DBCR2_DVC2BE_SHIFT) |
                                 (condition_mode << DBCR2_DVC2M_SHIFT));
                }
 #endif
        } else
                return -ENOSPC;
-       child->thread.dbcr0 |= DBCR0_IDM;
+       child->thread.debug.dbcr0 |= DBCR0_IDM;
        child->thread.regs->msr |= MSR_DE;
 
        return slot + 4;
@@ -1276,32 +1276,32 @@ static int del_dac(struct task_struct *child, int slot)
                if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0)
                        return -ENOENT;
 
-               child->thread.dac1 = 0;
+               child->thread.debug.dac1 = 0;
                dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W);
 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-               if (child->thread.dbcr2 & DBCR2_DAC12MODE) {
-                       child->thread.dac2 = 0;
-                       child->thread.dbcr2 &= ~DBCR2_DAC12MODE;
+               if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
+                       child->thread.debug.dac2 = 0;
+                       child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
                }
-               child->thread.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
+               child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
 #endif
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
-               child->thread.dvc1 = 0;
+               child->thread.debug.dvc1 = 0;
 #endif
        } else if (slot == 2) {
                if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0)
                        return -ENOENT;
 
 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-               if (child->thread.dbcr2 & DBCR2_DAC12MODE)
+               if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE)
                        /* Part of a range */
                        return -EINVAL;
-               child->thread.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
+               child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
 #endif
 #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
-               child->thread.dvc2 = 0;
+               child->thread.debug.dvc2 = 0;
 #endif
-               child->thread.dac2 = 0;
+               child->thread.debug.dac2 = 0;
                dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W);
        } else
                return -EINVAL;
@@ -1343,22 +1343,22 @@ static int set_dac_range(struct task_struct *child,
                        return -EIO;
        }
 
-       if (child->thread.dbcr0 &
+       if (child->thread.debug.dbcr0 &
            (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W))
                return -ENOSPC;
 
        if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
-               child->thread.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
+               child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
        if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
-               child->thread.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
-       child->thread.dac1 = bp_info->addr;
-       child->thread.dac2 = bp_info->addr2;
+               child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
+       child->thread.debug.dac1 = bp_info->addr;
+       child->thread.debug.dac2 = bp_info->addr2;
        if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
-               child->thread.dbcr2  |= DBCR2_DAC12M;
+               child->thread.debug.dbcr2  |= DBCR2_DAC12M;
        else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
-               child->thread.dbcr2  |= DBCR2_DAC12MX;
+               child->thread.debug.dbcr2  |= DBCR2_DAC12MX;
        else    /* PPC_BREAKPOINT_MODE_MASK */
-               child->thread.dbcr2  |= DBCR2_DAC12MM;
+               child->thread.debug.dbcr2  |= DBCR2_DAC12MM;
        child->thread.regs->msr |= MSR_DE;
 
        return 5;
@@ -1489,9 +1489,9 @@ static long ppc_del_hwdebug(struct task_struct *child, long data)
                rc = del_dac(child, (int)data - 4);
 
        if (!rc) {
-               if (!DBCR_ACTIVE_EVENTS(child->thread.dbcr0,
-                                       child->thread.dbcr1)) {
-                       child->thread.dbcr0 &= ~DBCR0_IDM;
+               if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0,
+                                       child->thread.debug.dbcr1)) {
+                       child->thread.debug.dbcr0 &= ~DBCR0_IDM;
                        child->thread.regs->msr &= ~MSR_DE;
                }
        }
@@ -1669,7 +1669,7 @@ long arch_ptrace(struct task_struct *child, long request,
                if (addr > 0)
                        break;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-               ret = put_user(child->thread.dac1, datalp);
+               ret = put_user(child->thread.debug.dac1, datalp);
 #else
                dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |
                             (child->thread.hw_brk.type & HW_BRK_TYPE_DABR));
index f51599e..18c7c65 100644 (file)
@@ -269,7 +269,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
                if (addr > 0)
                        break;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-               ret = put_user(child->thread.dac1, (u32 __user *)data);
+               ret = put_user(child->thread.debug.dac1, (u32 __user *)data);
 #else
                dabr_fake = (
                        (child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |
index bebdf1a..3f220d9 100644 (file)
@@ -1309,7 +1309,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
        unsigned char tmp;
        unsigned long new_msr = regs->msr;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-       unsigned long new_dbcr0 = current->thread.dbcr0;
+       unsigned long new_dbcr0 = current->thread.debug.dbcr0;
 #endif
 
        for (i=0; i<ndbg; i++) {
@@ -1324,7 +1324,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
                        } else {
                                new_dbcr0 &= ~DBCR0_IC;
                                if (!DBCR_ACTIVE_EVENTS(new_dbcr0,
-                                               current->thread.dbcr1)) {
+                                               current->thread.debug.dbcr1)) {
                                        new_msr &= ~MSR_DE;
                                        new_dbcr0 &= ~DBCR0_IDM;
                                }
@@ -1359,7 +1359,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
           the user is really doing something wrong. */
        regs->msr = new_msr;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-       current->thread.dbcr0 = new_dbcr0;
+       current->thread.debug.dbcr0 = new_dbcr0;
 #endif
 
        if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx))
index 27a90b9..b4e6676 100644 (file)
@@ -17,6 +17,7 @@
 #include <asm/machdep.h>
 #include <asm/smp.h>
 #include <asm/pmc.h>
+#include <asm/firmware.h>
 
 #include "cacheinfo.h"
 
@@ -179,15 +180,25 @@ SYSFS_PMCSETUP(spurr, SPRN_SPURR);
 SYSFS_PMCSETUP(dscr, SPRN_DSCR);
 SYSFS_PMCSETUP(pir, SPRN_PIR);
 
+/*
+  Lets only enable read for phyp resources and
+  enable write when needed with a separate function.
+  Lets be conservative and default to pseries.
+*/
 static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
 static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
 static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
-static DEVICE_ATTR(purr, 0600, show_purr, store_purr);
+static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
 static DEVICE_ATTR(pir, 0400, show_pir, NULL);
 
 unsigned long dscr_default = 0;
 EXPORT_SYMBOL(dscr_default);
 
+static void add_write_permission_dev_attr(struct device_attribute *attr)
+{
+       attr->attr.mode |= 0200;
+}
+
 static ssize_t show_dscr_default(struct device *dev,
                struct device_attribute *attr, char *buf)
 {
@@ -394,8 +405,11 @@ static void register_cpu_online(unsigned int cpu)
        if (cpu_has_feature(CPU_FTR_MMCRA))
                device_create_file(s, &dev_attr_mmcra);
 
-       if (cpu_has_feature(CPU_FTR_PURR))
+       if (cpu_has_feature(CPU_FTR_PURR)) {
+               if (!firmware_has_feature(FW_FEATURE_LPAR))
+                       add_write_permission_dev_attr(&dev_attr_purr);
                device_create_file(s, &dev_attr_purr);
+       }
 
        if (cpu_has_feature(CPU_FTR_SPURR))
                device_create_file(s, &dev_attr_spurr);
index 7b60b98..cd809ea 100644 (file)
@@ -79,6 +79,11 @@ _GLOBAL(tm_abort)
        TABORT(R3)
        blr
 
+       .section        ".toc","aw"
+DSCR_DEFAULT:
+       .tc dscr_default[TC],dscr_default
+
+       .section        ".text"
 
 /* void tm_reclaim(struct thread_struct *thread,
  *                 unsigned long orig_msr,
@@ -123,6 +128,7 @@ _GLOBAL(tm_reclaim)
        mr      r15, r14
        ori     r15, r15, MSR_FP
        li      r16, MSR_RI
+       ori     r16, r16, MSR_EE /* IRQs hard off */
        andc    r15, r15, r16
        oris    r15, r15, MSR_VEC@h
 #ifdef CONFIG_VSX
@@ -187,11 +193,18 @@ dont_backup_fp:
        std     r1, PACATMSCRATCH(r13)
        ld      r1, PACAR1(r13)
 
+       /* Store the PPR in r11 and reset to decent value */
+       std     r11, GPR11(r1)                  /* Temporary stash */
+       mfspr   r11, SPRN_PPR
+       HMT_MEDIUM
+
        /* Now get some more GPRS free */
        std     r7, GPR7(r1)                    /* Temporary stash */
        std     r12, GPR12(r1)                  /* ''   ''    ''   */
        ld      r12, STACK_PARAM(0)(r1)         /* Param 0, thread_struct * */
 
+       std     r11, THREAD_TM_PPR(r12)         /* Store PPR and free r11 */
+
        addi    r7, r12, PT_CKPT_REGS           /* Thread's ckpt_regs */
 
        /* Make r7 look like an exception frame so that we
@@ -203,15 +216,19 @@ dont_backup_fp:
        SAVE_GPR(0, r7)                         /* user r0 */
        SAVE_GPR(2, r7)                 /* user r2 */
        SAVE_4GPRS(3, r7)                       /* user r3-r6 */
-       SAVE_4GPRS(8, r7)                       /* user r8-r11 */
+       SAVE_GPR(8, r7)                         /* user r8 */
+       SAVE_GPR(9, r7)                         /* user r9 */
+       SAVE_GPR(10, r7)                        /* user r10 */
        ld      r3, PACATMSCRATCH(r13)          /* user r1 */
        ld      r4, GPR7(r1)                    /* user r7 */
-       ld      r5, GPR12(r1)                   /* user r12 */
-       GET_SCRATCH0(6)                         /* user r13 */
+       ld      r5, GPR11(r1)                   /* user r11 */
+       ld      r6, GPR12(r1)                   /* user r12 */
+       GET_SCRATCH0(8)                         /* user r13 */
        std     r3, GPR1(r7)
        std     r4, GPR7(r7)
-       std     r5, GPR12(r7)
-       std     r6, GPR13(r7)
+       std     r5, GPR11(r7)
+       std     r6, GPR12(r7)
+       std     r8, GPR13(r7)
 
        SAVE_NVGPRS(r7)                         /* user r14-r31 */
 
@@ -234,14 +251,12 @@ dont_backup_fp:
        std     r6, _XER(r7)
 
 
-       /* ******************** TAR, PPR, DSCR ********** */
+       /* ******************** TAR, DSCR ********** */
        mfspr   r3, SPRN_TAR
-       mfspr   r4, SPRN_PPR
-       mfspr   r5, SPRN_DSCR
+       mfspr   r4, SPRN_DSCR
 
        std     r3, THREAD_TM_TAR(r12)
-       std     r4, THREAD_TM_PPR(r12)
-       std     r5, THREAD_TM_DSCR(r12)
+       std     r4, THREAD_TM_DSCR(r12)
 
        /* MSR and flags:  We don't change CRs, and we don't need to alter
         * MSR.
@@ -258,7 +273,7 @@ dont_backup_fp:
        std     r3, THREAD_TM_TFHAR(r12)
        std     r4, THREAD_TM_TFIAR(r12)
 
-       /* AMR and PPR are checkpointed too, but are unsupported by Linux. */
+       /* AMR is checkpointed too, but is unsupported by Linux. */
 
        /* Restore original MSR/IRQ state & clear TM mode */
        ld      r14, TM_FRAME_L0(r1)            /* Orig MSR */
@@ -274,6 +289,12 @@ dont_backup_fp:
        mtcr    r4
        mtlr    r0
        ld      r2, 40(r1)
+
+       /* Load system default DSCR */
+       ld      r4, DSCR_DEFAULT@toc(r2)
+       ld      r0, 0(r4)
+       mtspr   SPRN_DSCR, r0
+
        blr
 
 
@@ -358,25 +379,24 @@ dont_restore_fp:
 
 restore_gprs:
 
-       /* ******************** TAR, PPR, DSCR ********** */
-       ld      r4, THREAD_TM_TAR(r3)
-       ld      r5, THREAD_TM_PPR(r3)
-       ld      r6, THREAD_TM_DSCR(r3)
+       /* ******************** CR,LR,CCR,MSR ********** */
+       ld      r4, _CTR(r7)
+       ld      r5, _LINK(r7)
+       ld      r6, _CCR(r7)
+       ld      r8, _XER(r7)
 
-       mtspr   SPRN_TAR,       r4
-       mtspr   SPRN_PPR,       r5
-       mtspr   SPRN_DSCR,      r6
+       mtctr   r4
+       mtlr    r5
+       mtcr    r6
+       mtxer   r8
 
-       /* ******************** CR,LR,CCR,MSR ********** */
-       ld      r3, _CTR(r7)
-       ld      r4, _LINK(r7)
-       ld      r5, _CCR(r7)
-       ld      r6, _XER(r7)
+       /* ******************** TAR ******************** */
+       ld      r4, THREAD_TM_TAR(r3)
+       mtspr   SPRN_TAR,       r4
 
-       mtctr   r3
-       mtlr    r4
-       mtcr    r5
-       mtxer   r6
+       /* Load up the PPR and DSCR in GPRs only at this stage */
+       ld      r5, THREAD_TM_DSCR(r3)
+       ld      r6, THREAD_TM_PPR(r3)
 
        /* Clear the MSR RI since we are about to change R1.  EE is already off
         */
@@ -384,19 +404,26 @@ restore_gprs:
        mtmsrd  r4, 1
 
        REST_4GPRS(0, r7)                       /* GPR0-3 */
-       REST_GPR(4, r7)                         /* GPR4-6 */
-       REST_GPR(5, r7)
-       REST_GPR(6, r7)
+       REST_GPR(4, r7)                         /* GPR4 */
        REST_4GPRS(8, r7)                       /* GPR8-11 */
        REST_2GPRS(12, r7)                      /* GPR12-13 */
 
        REST_NVGPRS(r7)                         /* GPR14-31 */
 
-       ld      r7, GPR7(r7)                    /* GPR7 */
+       /* Load up PPR and DSCR here so we don't run with user values for long
+        */
+       mtspr   SPRN_DSCR, r5
+       mtspr   SPRN_PPR, r6
+
+       REST_GPR(5, r7)                         /* GPR5-7 */
+       REST_GPR(6, r7)
+       ld      r7, GPR7(r7)
 
        /* Commit register state as checkpointed state: */
        TRECHKPT
 
+       HMT_MEDIUM
+
        /* Our transactional state has now changed.
         *
         * Now just get out of here.  Transactional (current) state will be
@@ -419,6 +446,12 @@ restore_gprs:
        mtcr    r4
        mtlr    r0
        ld      r2, 40(r1)
+
+       /* Load system default DSCR */
+       ld      r4, DSCR_DEFAULT@toc(r2)
+       ld      r0, 0(r4)
+       mtspr   SPRN_DSCR, r0
+
        blr
 
        /* ****************************************************************** */
index f783c93..4f5df4e 100644 (file)
@@ -351,8 +351,8 @@ static inline int check_io_access(struct pt_regs *regs)
 #define REASON_TRAP            ESR_PTR
 
 /* single-step stuff */
-#define single_stepping(regs)  (current->thread.dbcr0 & DBCR0_IC)
-#define clear_single_step(regs)        (current->thread.dbcr0 &= ~DBCR0_IC)
+#define single_stepping(regs)  (current->thread.debug.dbcr0 & DBCR0_IC)
+#define clear_single_step(regs)        (current->thread.debug.dbcr0 &= ~DBCR0_IC)
 
 #else
 /* On non-4xx, the reason for the machine check or program
@@ -1486,7 +1486,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
        if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
                dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W);
 #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-               current->thread.dbcr2 &= ~DBCR2_DAC12MODE;
+               current->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
 #endif
                do_send_trap(regs, mfspr(SPRN_DAC1), debug_status, TRAP_HWBKPT,
                             5);
@@ -1497,24 +1497,24 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
                             6);
                changed |= 0x01;
        }  else if (debug_status & DBSR_IAC1) {
-               current->thread.dbcr0 &= ~DBCR0_IAC1;
+               current->thread.debug.dbcr0 &= ~DBCR0_IAC1;
                dbcr_iac_range(current) &= ~DBCR_IAC12MODE;
                do_send_trap(regs, mfspr(SPRN_IAC1), debug_status, TRAP_HWBKPT,
                             1);
                changed |= 0x01;
        }  else if (debug_status & DBSR_IAC2) {
-               current->thread.dbcr0 &= ~DBCR0_IAC2;
+               current->thread.debug.dbcr0 &= ~DBCR0_IAC2;
                do_send_trap(regs, mfspr(SPRN_IAC2), debug_status, TRAP_HWBKPT,
                             2);
                changed |= 0x01;
        }  else if (debug_status & DBSR_IAC3) {
-               current->thread.dbcr0 &= ~DBCR0_IAC3;
+               current->thread.debug.dbcr0 &= ~DBCR0_IAC3;
                dbcr_iac_range(current) &= ~DBCR_IAC34MODE;
                do_send_trap(regs, mfspr(SPRN_IAC3), debug_status, TRAP_HWBKPT,
                             3);
                changed |= 0x01;
        }  else if (debug_status & DBSR_IAC4) {
-               current->thread.dbcr0 &= ~DBCR0_IAC4;
+               current->thread.debug.dbcr0 &= ~DBCR0_IAC4;
                do_send_trap(regs, mfspr(SPRN_IAC4), debug_status, TRAP_HWBKPT,
                             4);
                changed |= 0x01;
@@ -1524,19 +1524,20 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
         * Check all other debug flags and see if that bit needs to be turned
         * back on or not.
         */
-       if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, current->thread.dbcr1))
+       if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
+                              current->thread.debug.dbcr1))
                regs->msr |= MSR_DE;
        else
                /* Make sure the IDM flag is off */
-               current->thread.dbcr0 &= ~DBCR0_IDM;
+               current->thread.debug.dbcr0 &= ~DBCR0_IDM;
 
        if (changed & 0x01)
-               mtspr(SPRN_DBCR0, current->thread.dbcr0);
+               mtspr(SPRN_DBCR0, current->thread.debug.dbcr0);
 }
 
 void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
 {
-       current->thread.dbsr = debug_status;
+       current->thread.debug.dbsr = debug_status;
 
        /* Hack alert: On BookE, Branch Taken stops on the branch itself, while
         * on server, it stops on the target of the branch. In order to simulate
@@ -1553,8 +1554,8 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
 
                /* Do the single step trick only when coming from userspace */
                if (user_mode(regs)) {
-                       current->thread.dbcr0 &= ~DBCR0_BT;
-                       current->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+                       current->thread.debug.dbcr0 &= ~DBCR0_BT;
+                       current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
                        regs->msr |= MSR_DE;
                        return;
                }
@@ -1582,13 +1583,13 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
                        return;
 
                if (user_mode(regs)) {
-                       current->thread.dbcr0 &= ~DBCR0_IC;
-                       if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0,
-                                              current->thread.dbcr1))
+                       current->thread.debug.dbcr0 &= ~DBCR0_IC;
+                       if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
+                                              current->thread.debug.dbcr1))
                                regs->msr |= MSR_DE;
                        else
                                /* Make sure the IDM bit is off */
-                               current->thread.dbcr0 &= ~DBCR0_IDM;
+                               current->thread.debug.dbcr0 &= ~DBCR0_IDM;
                }
 
                _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
index 78a3506..d38cc08 100644 (file)
@@ -1530,11 +1530,15 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
        const char *cp;
 
        dn = dev->of_node;
-       if (!dn)
-               return -ENODEV;
+       if (!dn) {
+               strcat(buf, "\n");
+               return strlen(buf);
+       }
        cp = of_get_property(dn, "compatible", NULL);
-       if (!cp)
-               return -ENODEV;
+       if (!cp) {
+               strcat(buf, "\n");
+               return strlen(buf);
+       }
 
        return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp);
 }
index 2f5c6b6..93221e8 100644 (file)
 #include "44x_tlb.h"
 #include "booke.h"
 
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_44x(struct kvm_vcpu *vcpu, int cpu)
 {
        kvmppc_booke_vcpu_load(vcpu, cpu);
        kvmppc_44x_tlb_load(vcpu);
 }
 
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_44x(struct kvm_vcpu *vcpu)
 {
        kvmppc_44x_tlb_put(vcpu);
        kvmppc_booke_vcpu_put(vcpu);
@@ -114,29 +114,32 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
        return 0;
 }
 
-void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_get_sregs_44x(struct kvm_vcpu *vcpu,
+                                     struct kvm_sregs *sregs)
 {
-       kvmppc_get_sregs_ivor(vcpu, sregs);
+       return kvmppc_get_sregs_ivor(vcpu, sregs);
 }
 
-int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_set_sregs_44x(struct kvm_vcpu *vcpu,
+                                    struct kvm_sregs *sregs)
 {
        return kvmppc_set_sregs_ivor(vcpu, sregs);
 }
 
-int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
-                       union kvmppc_one_reg *val)
+static int kvmppc_get_one_reg_44x(struct kvm_vcpu *vcpu, u64 id,
+                                 union kvmppc_one_reg *val)
 {
        return -EINVAL;
 }
 
-int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
-                      union kvmppc_one_reg *val)
+static int kvmppc_set_one_reg_44x(struct kvm_vcpu *vcpu, u64 id,
+                                 union kvmppc_one_reg *val)
 {
        return -EINVAL;
 }
 
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static struct kvm_vcpu *kvmppc_core_vcpu_create_44x(struct kvm *kvm,
+                                                   unsigned int id)
 {
        struct kvmppc_vcpu_44x *vcpu_44x;
        struct kvm_vcpu *vcpu;
@@ -167,7 +170,7 @@ out:
        return ERR_PTR(err);
 }
 
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_44x(struct kvm_vcpu *vcpu)
 {
        struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 
@@ -176,28 +179,53 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
        kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
 }
 
-int kvmppc_core_init_vm(struct kvm *kvm)
+static int kvmppc_core_init_vm_44x(struct kvm *kvm)
 {
        return 0;
 }
 
-void kvmppc_core_destroy_vm(struct kvm *kvm)
+static void kvmppc_core_destroy_vm_44x(struct kvm *kvm)
 {
 }
 
+static struct kvmppc_ops kvm_ops_44x = {
+       .get_sregs = kvmppc_core_get_sregs_44x,
+       .set_sregs = kvmppc_core_set_sregs_44x,
+       .get_one_reg = kvmppc_get_one_reg_44x,
+       .set_one_reg = kvmppc_set_one_reg_44x,
+       .vcpu_load   = kvmppc_core_vcpu_load_44x,
+       .vcpu_put    = kvmppc_core_vcpu_put_44x,
+       .vcpu_create = kvmppc_core_vcpu_create_44x,
+       .vcpu_free   = kvmppc_core_vcpu_free_44x,
+       .mmu_destroy  = kvmppc_mmu_destroy_44x,
+       .init_vm = kvmppc_core_init_vm_44x,
+       .destroy_vm = kvmppc_core_destroy_vm_44x,
+       .emulate_op = kvmppc_core_emulate_op_44x,
+       .emulate_mtspr = kvmppc_core_emulate_mtspr_44x,
+       .emulate_mfspr = kvmppc_core_emulate_mfspr_44x,
+};
+
 static int __init kvmppc_44x_init(void)
 {
        int r;
 
        r = kvmppc_booke_init();
        if (r)
-               return r;
+               goto err_out;
+
+       r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE);
+       if (r)
+               goto err_out;
+       kvm_ops_44x.owner = THIS_MODULE;
+       kvmppc_pr_ops = &kvm_ops_44x;
 
-       return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE);
+err_out:
+       return r;
 }
 
 static void __exit kvmppc_44x_exit(void)
 {
+       kvmppc_pr_ops = NULL;
        kvmppc_booke_exit();
 }
 
index 35ec0a8..92c9ab4 100644 (file)
@@ -91,8 +91,8 @@ static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn)
        return EMULATE_DONE;
 }
 
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                           unsigned int inst, int *advance)
+int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                              unsigned int inst, int *advance)
 {
        int emulated = EMULATE_DONE;
        int dcrn = get_dcrn(inst);
@@ -152,7 +152,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
        return emulated;
 }
 
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
+int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 {
        int emulated = EMULATE_DONE;
 
@@ -172,7 +172,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
        return emulated;
 }
 
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
+int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 {
        int emulated = EMULATE_DONE;
 
index ed03854..0deef10 100644 (file)
@@ -268,7 +268,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
        trace_kvm_stlb_inval(stlb_index);
 }
 
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu)
 {
        struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
        int i;
index ffaef2c..8aeeda1 100644 (file)
@@ -34,17 +34,20 @@ config KVM_BOOK3S_64_HANDLER
        bool
        select KVM_BOOK3S_HANDLER
 
-config KVM_BOOK3S_PR
+config KVM_BOOK3S_PR_POSSIBLE
        bool
        select KVM_MMIO
        select MMU_NOTIFIER
 
+config KVM_BOOK3S_HV_POSSIBLE
+       bool
+
 config KVM_BOOK3S_32
        tristate "KVM support for PowerPC book3s_32 processors"
        depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT
        select KVM
        select KVM_BOOK3S_32_HANDLER
-       select KVM_BOOK3S_PR
+       select KVM_BOOK3S_PR_POSSIBLE
        ---help---
          Support running unmodified book3s_32 guest kernels
          in virtual machines on book3s_32 host processors.
@@ -59,6 +62,7 @@ config KVM_BOOK3S_64
        depends on PPC_BOOK3S_64
        select KVM_BOOK3S_64_HANDLER
        select KVM
+       select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
        ---help---
          Support running unmodified book3s_64 and book3s_32 guest kernels
          in virtual machines on book3s_64 host processors.
@@ -69,8 +73,9 @@ config KVM_BOOK3S_64
          If unsure, say N.
 
 config KVM_BOOK3S_64_HV
-       bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
+       tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
        depends on KVM_BOOK3S_64
+       select KVM_BOOK3S_HV_POSSIBLE
        select MMU_NOTIFIER
        select CMA
        ---help---
@@ -89,9 +94,20 @@ config KVM_BOOK3S_64_HV
          If unsure, say N.
 
 config KVM_BOOK3S_64_PR
-       def_bool y
-       depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV
-       select KVM_BOOK3S_PR
+       tristate "KVM support without using hypervisor mode in host"
+       depends on KVM_BOOK3S_64
+       select KVM_BOOK3S_PR_POSSIBLE
+       ---help---
+         Support running guest kernels in virtual machines on processors
+         without using hypervisor mode in the host, by running the
+         guest in user mode (problem state) and emulating all
+         privileged instructions and registers.
+
+         This is not as fast as using hypervisor mode, but works on
+         machines where hypervisor mode is not available or not usable,
+         and can emulate processors that are different from the host
+         processor, including emulating 32-bit processors on a 64-bit
+         host.
 
 config KVM_BOOKE_HV
        bool
index 6646c95..ce569b6 100644 (file)
@@ -53,41 +53,51 @@ kvm-e500mc-objs := \
        e500_emulate.o
 kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
 
-kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
-       $(KVM)/coalesced_mmio.o \
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \
+       book3s_64_vio_hv.o
+
+kvm-pr-y := \
        fpu.o \
        book3s_paired_singles.o \
        book3s_pr.o \
        book3s_pr_papr.o \
-       book3s_64_vio_hv.o \
        book3s_emulate.o \
        book3s_interrupts.o \
        book3s_mmu_hpte.o \
        book3s_64_mmu_host.o \
        book3s_64_mmu.o \
        book3s_32_mmu.o
-kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
+
+ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+kvm-book3s_64-module-objs := \
+       $(KVM)/coalesced_mmio.o
+
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
        book3s_rmhandlers.o
+endif
 
-kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
+kvm-hv-y += \
        book3s_hv.o \
        book3s_hv_interrupts.o \
        book3s_64_mmu_hv.o
+
 kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
        book3s_hv_rm_xics.o
-kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
+
+ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
        book3s_hv_rmhandlers.o \
        book3s_hv_rm_mmu.o \
-       book3s_64_vio_hv.o \
        book3s_hv_ras.o \
        book3s_hv_builtin.o \
        book3s_hv_cma.o \
        $(kvm-book3s_64-builtin-xics-objs-y)
+endif
 
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
        book3s_xics.o
 
-kvm-book3s_64-module-objs := \
+kvm-book3s_64-module-objs += \
        $(KVM)/kvm_main.o \
        $(KVM)/eventfd.o \
        powerpc.o \
@@ -123,4 +133,7 @@ obj-$(CONFIG_KVM_E500MC) += kvm.o
 obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o
 obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o
 
+obj-$(CONFIG_KVM_BOOK3S_64_PR) += kvm-pr.o
+obj-$(CONFIG_KVM_BOOK3S_64_HV) += kvm-hv.o
+
 obj-y += $(kvm-book3s_64-builtin-objs-y)
index 700df6f..8912608 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 
+#include "book3s.h"
 #include "trace.h"
 
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
@@ -69,6 +70,50 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 {
 }
 
+static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
+{
+       if (!is_kvmppc_hv_enabled(vcpu->kvm))
+               return to_book3s(vcpu)->hior;
+       return 0;
+}
+
+static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
+                       unsigned long pending_now, unsigned long old_pending)
+{
+       if (is_kvmppc_hv_enabled(vcpu->kvm))
+               return;
+       if (pending_now)
+               vcpu->arch.shared->int_pending = 1;
+       else if (old_pending)
+               vcpu->arch.shared->int_pending = 0;
+}
+
+static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
+{
+       ulong crit_raw;
+       ulong crit_r1;
+       bool crit;
+
+       if (is_kvmppc_hv_enabled(vcpu->kvm))
+               return false;
+
+       crit_raw = vcpu->arch.shared->critical;
+       crit_r1 = kvmppc_get_gpr(vcpu, 1);
+
+       /* Truncate crit indicators in 32 bit mode */
+       if (!(vcpu->arch.shared->msr & MSR_SF)) {
+               crit_raw &= 0xffffffff;
+               crit_r1 &= 0xffffffff;
+       }
+
+       /* Critical section when crit == r1 */
+       crit = (crit_raw == crit_r1);
+       /* ... and we're in supervisor mode */
+       crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
+
+       return crit;
+}
+
 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
 {
        vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu);
@@ -126,28 +171,32 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
        printk(KERN_INFO "Queueing interrupt %x\n", vec);
 #endif
 }
-
+EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);
 
 void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
 {
        /* might as well deliver this straight away */
        kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags);
 }
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_program);
 
 void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
 {
        kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
 }
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_dec);
 
 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
 {
        return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
 }
+EXPORT_SYMBOL_GPL(kvmppc_core_pending_dec);
 
 void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
 {
        kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
 }
+EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);
 
 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                 struct kvm_interrupt *irq)
@@ -285,8 +334,10 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter);
 
-pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
+                       bool *writable)
 {
        ulong mp_pa = vcpu->arch.magic_page_pa;
 
@@ -302,20 +353,23 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
 
                pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT;
                get_page(pfn_to_page(pfn));
+               if (writable)
+                       *writable = true;
                return pfn;
        }
 
-       return gfn_to_pfn(vcpu->kvm, gfn);
+       return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable);
 }
+EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn);
 
 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
-                        struct kvmppc_pte *pte)
+                       bool iswrite, struct kvmppc_pte *pte)
 {
        int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR));
        int r;
 
        if (relocated) {
-               r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data);
+               r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data, iswrite);
        } else {
                pte->eaddr = eaddr;
                pte->raddr = eaddr & KVM_PAM;
@@ -361,7 +415,7 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 
        vcpu->stat.st++;
 
-       if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
+       if (kvmppc_xlate(vcpu, *eaddr, data, true, &pte))
                return -ENOENT;
 
        *eaddr = pte.raddr;
@@ -374,6 +428,7 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 
        return EMULATE_DONE;
 }
+EXPORT_SYMBOL_GPL(kvmppc_st);
 
 int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
                      bool data)
@@ -383,7 +438,7 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 
        vcpu->stat.ld++;
 
-       if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
+       if (kvmppc_xlate(vcpu, *eaddr, data, false, &pte))
                goto nopte;
 
        *eaddr = pte.raddr;
@@ -404,6 +459,7 @@ nopte:
 mmio:
        return EMULATE_DO_MMIO;
 }
+EXPORT_SYMBOL_GPL(kvmppc_ld);
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
@@ -419,6 +475,18 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 }
 
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+                                 struct kvm_sregs *sregs)
+{
+       return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+                                 struct kvm_sregs *sregs)
+{
+       return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+}
+
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
        int i;
@@ -495,8 +563,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
        if (size > sizeof(val))
                return -EINVAL;
 
-       r = kvmppc_get_one_reg(vcpu, reg->id, &val);
-
+       r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
        if (r == -EINVAL) {
                r = 0;
                switch (reg->id) {
@@ -528,6 +595,9 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
                        }
                        val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
                        break;
+               case KVM_REG_PPC_VRSAVE:
+                       val = get_reg_val(reg->id, vcpu->arch.vrsave);
+                       break;
 #endif /* CONFIG_ALTIVEC */
                case KVM_REG_PPC_DEBUG_INST: {
                        u32 opcode = INS_TW;
@@ -572,8 +642,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
        if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
                return -EFAULT;
 
-       r = kvmppc_set_one_reg(vcpu, reg->id, &val);
-
+       r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
        if (r == -EINVAL) {
                r = 0;
                switch (reg->id) {
@@ -605,6 +674,13 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
                        }
                        vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
                        break;
+               case KVM_REG_PPC_VRSAVE:
+                       if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+                               r = -ENXIO;
+                               break;
+                       }
+                       vcpu->arch.vrsave = set_reg_val(reg->id, val);
+                       break;
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_KVM_XICS
                case KVM_REG_PPC_ICP_STATE:
@@ -625,6 +701,27 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
        return r;
 }
 
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+       vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+       vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);
+}
+
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+{
+       vcpu->kvm->arch.kvm_ops->set_msr(vcpu, msr);
+}
+EXPORT_SYMBOL_GPL(kvmppc_set_msr);
+
+int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+       return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu);
+}
+
 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
                                   struct kvm_translation *tr)
 {
@@ -644,3 +741,141 @@ void kvmppc_decrementer_func(unsigned long data)
        kvmppc_core_queue_dec(vcpu);
        kvm_vcpu_kick(vcpu);
 }
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+       return kvm->arch.kvm_ops->vcpu_create(kvm, id);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+       vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
+}
+
+int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+{
+       return vcpu->kvm->arch.kvm_ops->check_requests(vcpu);
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+       return kvm->arch.kvm_ops->get_dirty_log(kvm, log);
+}
+
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+                             struct kvm_memory_slot *dont)
+{
+       kvm->arch.kvm_ops->free_memslot(free, dont);
+}
+
+int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+                              unsigned long npages)
+{
+       return kvm->arch.kvm_ops->create_memslot(slot, npages);
+}
+
+void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+       kvm->arch.kvm_ops->flush_memslot(kvm, memslot);
+}
+
+int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+                               struct kvm_memory_slot *memslot,
+                               struct kvm_userspace_memory_region *mem)
+{
+       return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem);
+}
+
+void kvmppc_core_commit_memory_region(struct kvm *kvm,
+                               struct kvm_userspace_memory_region *mem,
+                               const struct kvm_memory_slot *old)
+{
+       kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old);
+}
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+       return kvm->arch.kvm_ops->unmap_hva(kvm, hva);
+}
+EXPORT_SYMBOL_GPL(kvm_unmap_hva);
+
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+       return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+       return kvm->arch.kvm_ops->age_hva(kvm, hva);
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+       return kvm->arch.kvm_ops->test_age_hva(kvm, hva);
+}
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+       kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte);
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+       vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+
+#ifdef CONFIG_PPC64
+       INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+       INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
+#endif
+
+       return kvm->arch.kvm_ops->init_vm(kvm);
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+       kvm->arch.kvm_ops->destroy_vm(kvm);
+
+#ifdef CONFIG_PPC64
+       kvmppc_rtas_tokens_free(kvm);
+       WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
+#endif
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+       /*
+        * We always return 0 for book3s. We check
+        * for compatability while loading the HV
+        * or PR module
+        */
+       return 0;
+}
+
+static int kvmppc_book3s_init(void)
+{
+       int r;
+
+       r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+       if (r)
+               return r;
+#ifdef CONFIG_KVM_BOOK3S_32
+       r = kvmppc_book3s_init_pr();
+#endif
+       return r;
+
+}
+
+static void kvmppc_book3s_exit(void)
+{
+#ifdef CONFIG_KVM_BOOK3S_32
+       kvmppc_book3s_exit_pr();
+#endif
+       kvm_exit();
+}
+
+module_init(kvmppc_book3s_init);
+module_exit(kvmppc_book3s_exit);
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
new file mode 100644 (file)
index 0000000..4bf956c
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+
+#ifndef __POWERPC_KVM_BOOK3S_H__
+#define __POWERPC_KVM_BOOK3S_H__
+
+extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
+                                        struct kvm_memory_slot *memslot);
+extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
+extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
+                                 unsigned long end);
+extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva);
+extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
+extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
+
+extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                                    unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu,
+                                       int sprn, ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu,
+                                       int sprn, ulong *spr_val);
+extern int kvmppc_book3s_init_pr(void);
+extern void kvmppc_book3s_exit_pr(void);
+
+#endif
index c8cefdd..76a64ce 100644 (file)
@@ -84,7 +84,8 @@ static inline bool sr_nx(u32 sr_raw)
 }
 
 static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
-                                         struct kvmppc_pte *pte, bool data);
+                                         struct kvmppc_pte *pte, bool data,
+                                         bool iswrite);
 static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
                                             u64 *vsid);
 
@@ -99,7 +100,7 @@ static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
        u64 vsid;
        struct kvmppc_pte pte;
 
-       if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data))
+       if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data, false))
                return pte.vpage;
 
        kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
@@ -111,10 +112,11 @@ static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
        kvmppc_set_msr(vcpu, 0);
 }
 
-static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s,
+static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,
                                      u32 sre, gva_t eaddr,
                                      bool primary)
 {
+       struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
        u32 page, hash, pteg, htabmask;
        hva_t r;
 
@@ -132,7 +134,7 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3
                kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg,
                sr_vsid(sre));
 
-       r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
+       r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);
        if (kvm_is_error_hva(r))
                return r;
        return r | (pteg & ~PAGE_MASK);
@@ -145,7 +147,8 @@ static u32 kvmppc_mmu_book3s_32_get_ptem(u32 sre, gva_t eaddr, bool primary)
 }
 
 static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
-                                         struct kvmppc_pte *pte, bool data)
+                                         struct kvmppc_pte *pte, bool data,
+                                         bool iswrite)
 {
        struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
        struct kvmppc_bat *bat;
@@ -186,8 +189,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
                                printk(KERN_INFO "BAT is not readable!\n");
                                continue;
                        }
-                       if (!pte->may_write) {
-                               /* let's treat r/o BATs as not-readable for now */
+                       if (iswrite && !pte->may_write) {
                                dprintk_pte("BAT is read-only!\n");
                                continue;
                        }
@@ -201,9 +203,8 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
 
 static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
                                     struct kvmppc_pte *pte, bool data,
-                                    bool primary)
+                                    bool iswrite, bool primary)
 {
-       struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
        u32 sre;
        hva_t ptegp;
        u32 pteg[16];
@@ -218,7 +219,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
 
        pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
 
-       ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu_book3s, sre, eaddr, primary);
+       ptegp = kvmppc_mmu_book3s_32_get_pteg(vcpu, sre, eaddr, primary);
        if (kvm_is_error_hva(ptegp)) {
                printk(KERN_INFO "KVM: Invalid PTEG!\n");
                goto no_page_found;
@@ -258,9 +259,6 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
                                        break;
                        }
 
-                       if ( !pte->may_read )
-                               continue;
-
                        dprintk_pte("MMU: Found PTE -> %x %x - %x\n",
                                    pteg[i], pteg[i+1], pp);
                        found = 1;
@@ -271,19 +269,23 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
        /* Update PTE C and A bits, so the guest's swapper knows we used the
           page */
        if (found) {
-               u32 oldpte = pteg[i+1];
-
-               if (pte->may_read)
-                       pteg[i+1] |= PTEG_FLAG_ACCESSED;
-               if (pte->may_write)
-                       pteg[i+1] |= PTEG_FLAG_DIRTY;
-               else
-                       dprintk_pte("KVM: Mapping read-only page!\n");
-
-               /* Write back into the PTEG */
-               if (pteg[i+1] != oldpte)
-                       copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
-
+               u32 pte_r = pteg[i+1];
+               char __user *addr = (char __user *) &pteg[i+1];
+
+               /*
+                * Use single-byte writes to update the HPTE, to
+                * conform to what real hardware does.
+                */
+               if (pte->may_read && !(pte_r & PTEG_FLAG_ACCESSED)) {
+                       pte_r |= PTEG_FLAG_ACCESSED;
+                       put_user(pte_r >> 8, addr + 2);
+               }
+               if (iswrite && pte->may_write && !(pte_r & PTEG_FLAG_DIRTY)) {
+                       pte_r |= PTEG_FLAG_DIRTY;
+                       put_user(pte_r, addr + 3);
+               }
+               if (!pte->may_read || (iswrite && !pte->may_write))
+                       return -EPERM;
                return 0;
        }
 
@@ -302,12 +304,14 @@ no_page_found:
 }
 
 static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
-                                     struct kvmppc_pte *pte, bool data)
+                                     struct kvmppc_pte *pte, bool data,
+                                     bool iswrite)
 {
        int r;
        ulong mp_ea = vcpu->arch.magic_page_ea;
 
        pte->eaddr = eaddr;
+       pte->page_size = MMU_PAGE_4K;
 
        /* Magic page override */
        if (unlikely(mp_ea) &&
@@ -323,11 +327,13 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
                return 0;
        }
 
-       r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data);
+       r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data, iswrite);
        if (r < 0)
-              r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true);
+               r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte,
+                                                  data, iswrite, true);
        if (r < 0)
-              r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, false);
+               r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte,
+                                                  data, iswrite, false);
 
        return r;
 }
@@ -347,7 +353,12 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
 
 static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large)
 {
-       kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000);
+       int i;
+       struct kvm_vcpu *v;
+
+       /* flush this VA on all cpus */
+       kvm_for_each_vcpu(i, v, vcpu->kvm)
+               kvmppc_mmu_pte_flush(v, ea, 0x0FFFF000);
 }
 
 static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
index 00e619b..3a0abd2 100644 (file)
@@ -138,7 +138,8 @@ static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr,
 
 extern char etext[];
 
-int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
+int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
+                       bool iswrite)
 {
        pfn_t hpaddr;
        u64 vpn;
@@ -152,9 +153,11 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
        bool evict = false;
        struct hpte_cache *pte;
        int r = 0;
+       bool writable;
 
        /* Get host physical address for gpa */
-       hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
+       hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT,
+                                  iswrite, &writable);
        if (is_error_noslot_pfn(hpaddr)) {
                printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
                                 orig_pte->eaddr);
@@ -204,7 +207,7 @@ next_pteg:
                (primary ? 0 : PTE_SEC);
        pteg1 = hpaddr | PTE_M | PTE_R | PTE_C;
 
-       if (orig_pte->may_write) {
+       if (orig_pte->may_write && writable) {
                pteg1 |= PP_RWRW;
                mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
        } else {
@@ -259,6 +262,11 @@ out:
        return r;
 }
 
+void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+       kvmppc_mmu_pte_vflush(vcpu, pte->vpage, 0xfffffffffULL);
+}
+
 static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
 {
        struct kvmppc_sid_map *map;
@@ -341,7 +349,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
        svcpu_put(svcpu);
 }
 
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
 {
        int i;
 
index 7e345e0..83da1f8 100644 (file)
@@ -107,9 +107,20 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
        return kvmppc_slb_calc_vpn(slb, eaddr);
 }
 
+static int mmu_pagesize(int mmu_pg)
+{
+       switch (mmu_pg) {
+       case MMU_PAGE_64K:
+               return 16;
+       case MMU_PAGE_16M:
+               return 24;
+       }
+       return 12;
+}
+
 static int kvmppc_mmu_book3s_64_get_pagesize(struct kvmppc_slb *slbe)
 {
-       return slbe->large ? 24 : 12;
+       return mmu_pagesize(slbe->base_page_size);
 }
 
 static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
@@ -119,11 +130,11 @@ static u32 kvmppc_mmu_book3s_64_get_page(struct kvmppc_slb *slbe, gva_t eaddr)
        return ((eaddr & kvmppc_slb_offset_mask(slbe)) >> p);
 }
 
-static hva_t kvmppc_mmu_book3s_64_get_pteg(
-                               struct kvmppc_vcpu_book3s *vcpu_book3s,
+static hva_t kvmppc_mmu_book3s_64_get_pteg(struct kvm_vcpu *vcpu,
                                struct kvmppc_slb *slbe, gva_t eaddr,
                                bool second)
 {
+       struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
        u64 hash, pteg, htabsize;
        u32 ssize;
        hva_t r;
@@ -148,10 +159,10 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg(
 
        /* When running a PAPR guest, SDR1 contains a HVA address instead
            of a GPA */
-       if (vcpu_book3s->vcpu.arch.papr_enabled)
+       if (vcpu->arch.papr_enabled)
                r = pteg;
        else
-               r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
+               r = gfn_to_hva(vcpu->kvm, pteg >> PAGE_SHIFT);
 
        if (kvm_is_error_hva(r))
                return r;
@@ -166,18 +177,38 @@ static u64 kvmppc_mmu_book3s_64_get_avpn(struct kvmppc_slb *slbe, gva_t eaddr)
        avpn = kvmppc_mmu_book3s_64_get_page(slbe, eaddr);
        avpn |= slbe->vsid << (kvmppc_slb_sid_shift(slbe) - p);
 
-       if (p < 24)
-               avpn >>= ((80 - p) - 56) - 8;
+       if (p < 16)
+               avpn >>= ((80 - p) - 56) - 8;   /* 16 - p */
        else
-               avpn <<= 8;
+               avpn <<= p - 16;
 
        return avpn;
 }
 
+/*
+ * Return page size encoded in the second word of a HPTE, or
+ * -1 for an invalid encoding for the base page size indicated by
+ * the SLB entry.  This doesn't handle mixed pagesize segments yet.
+ */
+static int decode_pagesize(struct kvmppc_slb *slbe, u64 r)
+{
+       switch (slbe->base_page_size) {
+       case MMU_PAGE_64K:
+               if ((r & 0xf000) == 0x1000)
+                       return MMU_PAGE_64K;
+               break;
+       case MMU_PAGE_16M:
+               if ((r & 0xff000) == 0)
+                       return MMU_PAGE_16M;
+               break;
+       }
+       return -1;
+}
+
 static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
-                               struct kvmppc_pte *gpte, bool data)
+                                     struct kvmppc_pte *gpte, bool data,
+                                     bool iswrite)
 {
-       struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
        struct kvmppc_slb *slbe;
        hva_t ptegp;
        u64 pteg[16];
@@ -189,6 +220,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
        u8 pp, key = 0;
        bool found = false;
        bool second = false;
+       int pgsize;
        ulong mp_ea = vcpu->arch.magic_page_ea;
 
        /* Magic page override */
@@ -202,6 +234,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
                gpte->may_execute = true;
                gpte->may_read = true;
                gpte->may_write = true;
+               gpte->page_size = MMU_PAGE_4K;
 
                return 0;
        }
@@ -222,8 +255,12 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
        v_mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_LARGE | HPTE_V_VALID |
                HPTE_V_SECONDARY;
 
+       pgsize = slbe->large ? MMU_PAGE_16M : MMU_PAGE_4K;
+
+       mutex_lock(&vcpu->kvm->arch.hpt_mutex);
+
 do_second:
-       ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu_book3s, slbe, eaddr, second);
+       ptegp = kvmppc_mmu_book3s_64_get_pteg(vcpu, slbe, eaddr, second);
        if (kvm_is_error_hva(ptegp))
                goto no_page_found;
 
@@ -240,6 +277,13 @@ do_second:
        for (i=0; i<16; i+=2) {
                /* Check all relevant fields of 1st dword */
                if ((pteg[i] & v_mask) == v_val) {
+                       /* If large page bit is set, check pgsize encoding */
+                       if (slbe->large &&
+                           (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
+                               pgsize = decode_pagesize(slbe, pteg[i+1]);
+                               if (pgsize < 0)
+                                       continue;
+                       }
                        found = true;
                        break;
                }
@@ -256,13 +300,15 @@ do_second:
        v = pteg[i];
        r = pteg[i+1];
        pp = (r & HPTE_R_PP) | key;
-       eaddr_mask = 0xFFF;
+       if (r & HPTE_R_PP0)
+               pp |= 8;
 
        gpte->eaddr = eaddr;
        gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
-       if (slbe->large)
-               eaddr_mask = 0xFFFFFF;
+
+       eaddr_mask = (1ull << mmu_pagesize(pgsize)) - 1;
        gpte->raddr = (r & HPTE_R_RPN & ~eaddr_mask) | (eaddr & eaddr_mask);
+       gpte->page_size = pgsize;
        gpte->may_execute = ((r & HPTE_R_N) ? false : true);
        gpte->may_read = false;
        gpte->may_write = false;
@@ -277,6 +323,7 @@ do_second:
        case 3:
        case 5:
        case 7:
+       case 10:
                gpte->may_read = true;
                break;
        }
@@ -287,30 +334,37 @@ do_second:
 
        /* Update PTE R and C bits, so the guest's swapper knows we used the
         * page */
-       if (gpte->may_read) {
-               /* Set the accessed flag */
+       if (gpte->may_read && !(r & HPTE_R_R)) {
+               /*
+                * Set the accessed flag.
+                * We have to write this back with a single byte write
+                * because another vcpu may be accessing this on
+                * non-PAPR platforms such as mac99, and this is
+                * what real hardware does.
+                */
+               char __user *addr = (char __user *) &pteg[i+1];
                r |= HPTE_R_R;
+               put_user(r >> 8, addr + 6);
        }
-       if (data && gpte->may_write) {
-               /* Set the dirty flag -- XXX even if not writing */
+       if (iswrite && gpte->may_write && !(r & HPTE_R_C)) {
+               /* Set the dirty flag */
+               /* Use a single byte write */
+               char __user *addr = (char __user *) &pteg[i+1];
                r |= HPTE_R_C;
+               put_user(r, addr + 7);
        }
 
-       /* Write back into the PTEG */
-       if (pteg[i+1] != r) {
-               pteg[i+1] = r;
-               copy_to_user((void __user *)ptegp, pteg, sizeof(pteg));
-       }
+       mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
 
-       if (!gpte->may_read)
+       if (!gpte->may_read || (iswrite && !gpte->may_write))
                return -EPERM;
        return 0;
 
 no_page_found:
+       mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
        return -ENOENT;
 
 no_seg_found:
-
        dprintk("KVM MMU: Trigger segment fault\n");
        return -EINVAL;
 }
@@ -345,6 +399,21 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
        slbe->nx    = (rs & SLB_VSID_N) ? 1 : 0;
        slbe->class = (rs & SLB_VSID_C) ? 1 : 0;
 
+       slbe->base_page_size = MMU_PAGE_4K;
+       if (slbe->large) {
+               if (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE) {
+                       switch (rs & SLB_VSID_LP) {
+                       case SLB_VSID_LP_00:
+                               slbe->base_page_size = MMU_PAGE_16M;
+                               break;
+                       case SLB_VSID_LP_01:
+                               slbe->base_page_size = MMU_PAGE_64K;
+                               break;
+                       }
+               } else
+                       slbe->base_page_size = MMU_PAGE_16M;
+       }
+
        slbe->orige = rb & (ESID_MASK | SLB_ESID_V);
        slbe->origv = rs;
 
@@ -460,14 +529,45 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
                                       bool large)
 {
        u64 mask = 0xFFFFFFFFFULL;
+       long i;
+       struct kvm_vcpu *v;
 
        dprintk("KVM MMU: tlbie(0x%lx)\n", va);
 
-       if (large)
-               mask = 0xFFFFFF000ULL;
-       kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask);
+       /*
+        * The tlbie instruction changed behaviour starting with
+        * POWER6.  POWER6 and later don't have the large page flag
+        * in the instruction but in the RB value, along with bits
+        * indicating page and segment sizes.
+        */
+       if (vcpu->arch.hflags & BOOK3S_HFLAG_NEW_TLBIE) {
+               /* POWER6 or later */
+               if (va & 1) {           /* L bit */
+                       if ((va & 0xf000) == 0x1000)
+                               mask = 0xFFFFFFFF0ULL;  /* 64k page */
+                       else
+                               mask = 0xFFFFFF000ULL;  /* 16M page */
+               }
+       } else {
+               /* older processors, e.g. PPC970 */
+               if (large)
+                       mask = 0xFFFFFF000ULL;
+       }
+       /* flush this VA on all vcpus */
+       kvm_for_each_vcpu(i, v, vcpu->kvm)
+               kvmppc_mmu_pte_vflush(v, va >> 12, mask);
 }
 
+#ifdef CONFIG_PPC_64K_PAGES
+static int segment_contains_magic_page(struct kvm_vcpu *vcpu, ulong esid)
+{
+       ulong mp_ea = vcpu->arch.magic_page_ea;
+
+       return mp_ea && !(vcpu->arch.shared->msr & MSR_PR) &&
+               (mp_ea >> SID_SHIFT) == esid;
+}
+#endif
+
 static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
                                             u64 *vsid)
 {
@@ -475,11 +575,13 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
        struct kvmppc_slb *slb;
        u64 gvsid = esid;
        ulong mp_ea = vcpu->arch.magic_page_ea;
+       int pagesize = MMU_PAGE_64K;
 
        if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
                slb = kvmppc_mmu_book3s_64_find_slbe(vcpu, ea);
                if (slb) {
                        gvsid = slb->vsid;
+                       pagesize = slb->base_page_size;
                        if (slb->tb) {
                                gvsid <<= SID_SHIFT_1T - SID_SHIFT;
                                gvsid |= esid & ((1ul << (SID_SHIFT_1T - SID_SHIFT)) - 1);
@@ -490,28 +592,41 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 
        switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
        case 0:
-               *vsid = VSID_REAL | esid;
+               gvsid = VSID_REAL | esid;
                break;
        case MSR_IR:
-               *vsid = VSID_REAL_IR | gvsid;
+               gvsid |= VSID_REAL_IR;
                break;
        case MSR_DR:
-               *vsid = VSID_REAL_DR | gvsid;
+               gvsid |= VSID_REAL_DR;
                break;
        case MSR_DR|MSR_IR:
                if (!slb)
                        goto no_slb;
 
-               *vsid = gvsid;
                break;
        default:
                BUG();
                break;
        }
 
+#ifdef CONFIG_PPC_64K_PAGES
+       /*
+        * Mark this as a 64k segment if the host is using
+        * 64k pages, the host MMU supports 64k pages and
+        * the guest segment page size is >= 64k,
+        * but not if this segment contains the magic page.
+        */
+       if (pagesize >= MMU_PAGE_64K &&
+           mmu_psize_defs[MMU_PAGE_64K].shift &&
+           !segment_contains_magic_page(vcpu, esid))
+               gvsid |= VSID_64K;
+#endif
+
        if (vcpu->arch.shared->msr & MSR_PR)
-               *vsid |= VSID_PR;
+               gvsid |= VSID_PR;
 
+       *vsid = gvsid;
        return 0;
 
 no_slb:
index e524052..0d513af 100644 (file)
 #include <asm/machdep.h>
 #include <asm/mmu_context.h>
 #include <asm/hw_irq.h>
-#include "trace.h"
+#include "trace_pr.h"
 
 #define PTE_SIZE 12
 
 void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 {
        ppc_md.hpte_invalidate(pte->slot, pte->host_vpn,
-                              MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M,
+                              pte->pagesize, pte->pagesize, MMU_SEGSIZE_256M,
                               false);
 }
 
@@ -78,7 +78,8 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
        return NULL;
 }
 
-int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
+int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
+                       bool iswrite)
 {
        unsigned long vpn;
        pfn_t hpaddr;
@@ -90,16 +91,26 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
        int attempt = 0;
        struct kvmppc_sid_map *map;
        int r = 0;
+       int hpsize = MMU_PAGE_4K;
+       bool writable;
+       unsigned long mmu_seq;
+       struct kvm *kvm = vcpu->kvm;
+       struct hpte_cache *cpte;
+       unsigned long gfn = orig_pte->raddr >> PAGE_SHIFT;
+       unsigned long pfn;
+
+       /* used to check for invalidations in progress */
+       mmu_seq = kvm->mmu_notifier_seq;
+       smp_rmb();
 
        /* Get host physical address for gpa */
-       hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
-       if (is_error_noslot_pfn(hpaddr)) {
-               printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
+       pfn = kvmppc_gfn_to_pfn(vcpu, gfn, iswrite, &writable);
+       if (is_error_noslot_pfn(pfn)) {
+               printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", gfn);
                r = -EINVAL;
                goto out;
        }
-       hpaddr <<= PAGE_SHIFT;
-       hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
+       hpaddr = pfn << PAGE_SHIFT;
 
        /* and write the mapping ea -> hpa into the pt */
        vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid);
@@ -117,20 +128,39 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
                goto out;
        }
 
-       vsid = map->host_vsid;
-       vpn = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
+       vpn = hpt_vpn(orig_pte->eaddr, map->host_vsid, MMU_SEGSIZE_256M);
 
-       if (!orig_pte->may_write)
-               rflags |= HPTE_R_PP;
-       else
-               mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
+       kvm_set_pfn_accessed(pfn);
+       if (!orig_pte->may_write || !writable)
+               rflags |= PP_RXRX;
+       else {
+               mark_page_dirty(vcpu->kvm, gfn);
+               kvm_set_pfn_dirty(pfn);
+       }
 
        if (!orig_pte->may_execute)
                rflags |= HPTE_R_N;
        else
-               kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT);
+               kvmppc_mmu_flush_icache(pfn);
+
+       /*
+        * Use 64K pages if possible; otherwise, on 64K page kernels,
+        * we need to transfer 4 more bits from guest real to host real addr.
+        */
+       if (vsid & VSID_64K)
+               hpsize = MMU_PAGE_64K;
+       else
+               hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
+
+       hash = hpt_hash(vpn, mmu_psize_defs[hpsize].shift, MMU_SEGSIZE_256M);
 
-       hash = hpt_hash(vpn, PTE_SIZE, MMU_SEGSIZE_256M);
+       cpte = kvmppc_mmu_hpte_cache_next(vcpu);
+
+       spin_lock(&kvm->mmu_lock);
+       if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) {
+               r = -EAGAIN;
+               goto out_unlock;
+       }
 
 map_again:
        hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
@@ -139,11 +169,11 @@ map_again:
        if (attempt > 1)
                if (ppc_md.hpte_remove(hpteg) < 0) {
                        r = -1;
-                       goto out;
+                       goto out_unlock;
                }
 
        ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
-                                MMU_PAGE_4K, MMU_PAGE_4K, MMU_SEGSIZE_256M);
+                                hpsize, hpsize, MMU_SEGSIZE_256M);
 
        if (ret < 0) {
                /* If we couldn't map a primary PTE, try a secondary */
@@ -152,8 +182,6 @@ map_again:
                attempt++;
                goto map_again;
        } else {
-               struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
-
                trace_kvm_book3s_64_mmu_map(rflags, hpteg,
                                            vpn, hpaddr, orig_pte);
 
@@ -164,19 +192,37 @@ map_again:
                        hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
                }
 
-               pte->slot = hpteg + (ret & 7);
-               pte->host_vpn = vpn;
-               pte->pte = *orig_pte;
-               pte->pfn = hpaddr >> PAGE_SHIFT;
+               cpte->slot = hpteg + (ret & 7);
+               cpte->host_vpn = vpn;
+               cpte->pte = *orig_pte;
+               cpte->pfn = pfn;
+               cpte->pagesize = hpsize;
 
-               kvmppc_mmu_hpte_cache_map(vcpu, pte);
+               kvmppc_mmu_hpte_cache_map(vcpu, cpte);
+               cpte = NULL;
        }
-       kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
+
+out_unlock:
+       spin_unlock(&kvm->mmu_lock);
+       kvm_release_pfn_clean(pfn);
+       if (cpte)
+               kvmppc_mmu_hpte_cache_free(cpte);
 
 out:
        return r;
 }
 
+void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+       u64 mask = 0xfffffffffULL;
+       u64 vsid;
+
+       vcpu->arch.mmu.esid_to_vsid(vcpu, pte->eaddr >> SID_SHIFT, &vsid);
+       if (vsid & VSID_64K)
+               mask = 0xffffffff0ULL;
+       kvmppc_mmu_pte_vflush(vcpu, pte->vpage, mask);
+}
+
 static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
 {
        struct kvmppc_sid_map *map;
@@ -291,6 +337,12 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
        slb_vsid &= ~SLB_VSID_KP;
        slb_esid |= slb_index;
 
+#ifdef CONFIG_PPC_64K_PAGES
+       /* Set host segment base page size to 64K if possible */
+       if (gvsid & VSID_64K)
+               slb_vsid |= mmu_psize_defs[MMU_PAGE_64K].sllp;
+#endif
+
        svcpu->slb[slb_index].esid = slb_esid;
        svcpu->slb[slb_index].vsid = slb_vsid;
 
@@ -326,7 +378,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
        svcpu_put(svcpu);
 }
 
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
 {
        kvmppc_mmu_hpte_destroy(vcpu);
        __destroy_context(to_book3s(vcpu)->context_id[0]);
index 043eec8..f3ff587 100644 (file)
@@ -260,10 +260,6 @@ int kvmppc_mmu_hv_init(void)
        return 0;
 }
 
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
-{
-}
-
 static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
 {
        kvmppc_set_msr(vcpu, MSR_SF | MSR_ME);
@@ -451,7 +447,7 @@ static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
 }
 
 static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
-                       struct kvmppc_pte *gpte, bool data)
+                       struct kvmppc_pte *gpte, bool data, bool iswrite)
 {
        struct kvm *kvm = vcpu->kvm;
        struct kvmppc_slb *slbe;
@@ -906,21 +902,22 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
        return 0;
 }
 
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
 {
        if (kvm->arch.using_mmu_notifiers)
                kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
        return 0;
 }
 
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
        if (kvm->arch.using_mmu_notifiers)
                kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
        return 0;
 }
 
-void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
+                                 struct kvm_memory_slot *memslot)
 {
        unsigned long *rmapp;
        unsigned long gfn;
@@ -994,7 +991,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
        return ret;
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva)
 {
        if (!kvm->arch.using_mmu_notifiers)
                return 0;
@@ -1032,14 +1029,14 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
        return ret;
 }
 
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
 {
        if (!kvm->arch.using_mmu_notifiers)
                return 0;
        return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
 }
 
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
        if (!kvm->arch.using_mmu_notifiers)
                return;
@@ -1512,9 +1509,8 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 
                                kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
                                        (VRMA_VSID << SLB_VSID_SHIFT_1T);
-                               lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
-                               lpcr |= senc << (LPCR_VRMASD_SH - 4);
-                               kvm->arch.lpcr = lpcr;
+                               lpcr = senc << (LPCR_VRMASD_SH - 4);
+                               kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
                                rma_setup = 1;
                        }
                        ++i;
index 30c2f3b..2c25f54 100644 (file)
@@ -74,3 +74,4 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
        /* Didn't find the liobn, punt it to userspace */
        return H_TOO_HARD;
 }
+EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
index 360ce68..99d40f8 100644 (file)
@@ -86,8 +86,8 @@ static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)
        return true;
 }
 
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                           unsigned int inst, int *advance)
+int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                             unsigned int inst, int *advance)
 {
        int emulated = EMULATE_DONE;
        int rt = get_rt(inst);
@@ -172,7 +172,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        vcpu->arch.mmu.tlbie(vcpu, addr, large);
                        break;
                }
-#ifdef CONFIG_KVM_BOOK3S_64_PR
+#ifdef CONFIG_PPC_BOOK3S_64
                case OP_31_XOP_FAKE_SC1:
                {
                        /* SC 1 papr hypercalls */
@@ -267,12 +267,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
                        r = kvmppc_st(vcpu, &addr, 32, zeros, true);
                        if ((r == -ENOENT) || (r == -EPERM)) {
-                               struct kvmppc_book3s_shadow_vcpu *svcpu;
-
-                               svcpu = svcpu_get(vcpu);
                                *advance = 0;
                                vcpu->arch.shared->dar = vaddr;
-                               svcpu->fault_dar = vaddr;
+                               vcpu->arch.fault_dar = vaddr;
 
                                dsisr = DSISR_ISSTORE;
                                if (r == -ENOENT)
@@ -281,8 +278,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                                        dsisr |= DSISR_PROTFAULT;
 
                                vcpu->arch.shared->dsisr = dsisr;
-                               svcpu->fault_dsisr = dsisr;
-                               svcpu_put(svcpu);
+                               vcpu->arch.fault_dsisr = dsisr;
 
                                kvmppc_book3s_queue_irqprio(vcpu,
                                        BOOK3S_INTERRUPT_DATA_STORAGE);
@@ -349,7 +345,7 @@ static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn)
        return bat;
 }
 
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
+int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 {
        int emulated = EMULATE_DONE;
 
@@ -472,7 +468,7 @@ unprivileged:
        return emulated;
 }
 
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
+int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 {
        int emulated = EMULATE_DONE;
 
index 7057a02..852989a 100644 (file)
 #include <linux/export.h>
 #include <asm/kvm_book3s.h>
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline);
-#else
+#endif
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline);
 EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
 #ifdef CONFIG_ALTIVEC
index 62a2b5a..072287f 100644 (file)
@@ -52,6 +52,9 @@
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 #include <linux/hugetlb.h>
+#include <linux/module.h>
+
+#include "book3s.h"
 
 /* #define EXIT_DEBUG */
 /* #define EXIT_DEBUG_SIMPLE */
@@ -66,7 +69,7 @@
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
-void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
+static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 {
        int me;
        int cpu = vcpu->cpu;
@@ -125,7 +128,7 @@ void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
  * purely defensive; they should never fail.)
  */
 
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
 {
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
@@ -143,7 +146,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        spin_unlock(&vcpu->arch.tbacct_lock);
 }
 
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
 {
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
@@ -155,17 +158,46 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
        spin_unlock(&vcpu->arch.tbacct_lock);
 }
 
-void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
 {
        vcpu->arch.shregs.msr = msr;
        kvmppc_end_cede(vcpu);
 }
 
-void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
+void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
 {
        vcpu->arch.pvr = pvr;
 }
 
+int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
+{
+       unsigned long pcr = 0;
+       struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+       if (arch_compat) {
+               if (!cpu_has_feature(CPU_FTR_ARCH_206))
+                       return -EINVAL; /* 970 has no compat mode support */
+
+               switch (arch_compat) {
+               case PVR_ARCH_205:
+                       pcr = PCR_ARCH_205;
+                       break;
+               case PVR_ARCH_206:
+               case PVR_ARCH_206p:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+       }
+
+       spin_lock(&vc->lock);
+       vc->arch_compat = arch_compat;
+       vc->pcr = pcr;
+       spin_unlock(&vc->lock);
+
+       return 0;
+}
+
 void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
 {
        int r;
@@ -195,7 +227,7 @@ void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
                pr_err("  ESID = %.16llx VSID = %.16llx\n",
                       vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
        pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
-              vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1,
+              vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1,
               vcpu->arch.last_inst);
 }
 
@@ -489,7 +521,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
        memset(dt, 0, sizeof(struct dtl_entry));
        dt->dispatch_reason = 7;
        dt->processor_id = vc->pcpu + vcpu->arch.ptid;
-       dt->timebase = now;
+       dt->timebase = now + vc->tb_offset;
        dt->enqueue_to_dispatch_time = stolen;
        dt->srr0 = kvmppc_get_pc(vcpu);
        dt->srr1 = vcpu->arch.shregs.msr;
@@ -538,6 +570,15 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
                }
                break;
        case H_CONFER:
+               target = kvmppc_get_gpr(vcpu, 4);
+               if (target == -1)
+                       break;
+               tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+               if (!tvcpu) {
+                       ret = H_PARAMETER;
+                       break;
+               }
+               kvm_vcpu_yield_to(tvcpu);
                break;
        case H_REGISTER_VPA:
                ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
@@ -576,8 +617,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
        return RESUME_GUEST;
 }
 
-static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                             struct task_struct *tsk)
+static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                                struct task_struct *tsk)
 {
        int r = RESUME_HOST;
 
@@ -671,16 +712,16 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
                        vcpu->arch.trap, kvmppc_get_pc(vcpu),
                        vcpu->arch.shregs.msr);
+               run->hw.hardware_exit_reason = vcpu->arch.trap;
                r = RESUME_HOST;
-               BUG();
                break;
        }
 
        return r;
 }
 
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-                                 struct kvm_sregs *sregs)
+static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu,
+                                           struct kvm_sregs *sregs)
 {
        int i;
 
@@ -694,12 +735,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
        return 0;
 }
 
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-                                 struct kvm_sregs *sregs)
+static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
+                                           struct kvm_sregs *sregs)
 {
        int i, j;
 
-       kvmppc_set_pvr(vcpu, sregs->pvr);
+       kvmppc_set_pvr_hv(vcpu, sregs->pvr);
 
        j = 0;
        for (i = 0; i < vcpu->arch.slb_nr; i++) {
@@ -714,7 +755,23 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        return 0;
 }
 
-int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
+static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr)
+{
+       struct kvmppc_vcore *vc = vcpu->arch.vcore;
+       u64 mask;
+
+       spin_lock(&vc->lock);
+       /*
+        * Userspace can only modify DPFD (default prefetch depth),
+        * ILE (interrupt little-endian) and TC (translation control).
+        */
+       mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
+       vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
+       spin_unlock(&vc->lock);
+}
+
+static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
+                                union kvmppc_one_reg *val)
 {
        int r = 0;
        long int i;
@@ -749,6 +806,12 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
                i = id - KVM_REG_PPC_PMC1;
                *val = get_reg_val(id, vcpu->arch.pmc[i]);
                break;
+       case KVM_REG_PPC_SIAR:
+               *val = get_reg_val(id, vcpu->arch.siar);
+               break;
+       case KVM_REG_PPC_SDAR:
+               *val = get_reg_val(id, vcpu->arch.sdar);
+               break;
 #ifdef CONFIG_VSX
        case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
                if (cpu_has_feature(CPU_FTR_VSX)) {
@@ -787,6 +850,18 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
                val->vpaval.length = vcpu->arch.dtl.len;
                spin_unlock(&vcpu->arch.vpa_update_lock);
                break;
+       case KVM_REG_PPC_TB_OFFSET:
+               *val = get_reg_val(id, vcpu->arch.vcore->tb_offset);
+               break;
+       case KVM_REG_PPC_LPCR:
+               *val = get_reg_val(id, vcpu->arch.vcore->lpcr);
+               break;
+       case KVM_REG_PPC_PPR:
+               *val = get_reg_val(id, vcpu->arch.ppr);
+               break;
+       case KVM_REG_PPC_ARCH_COMPAT:
+               *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
+               break;
        default:
                r = -EINVAL;
                break;
@@ -795,7 +870,8 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
        return r;
 }
 
-int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
+static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
+                                union kvmppc_one_reg *val)
 {
        int r = 0;
        long int i;
@@ -833,6 +909,12 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
                i = id - KVM_REG_PPC_PMC1;
                vcpu->arch.pmc[i] = set_reg_val(id, *val);
                break;
+       case KVM_REG_PPC_SIAR:
+               vcpu->arch.siar = set_reg_val(id, *val);
+               break;
+       case KVM_REG_PPC_SDAR:
+               vcpu->arch.sdar = set_reg_val(id, *val);
+               break;
 #ifdef CONFIG_VSX
        case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
                if (cpu_has_feature(CPU_FTR_VSX)) {
@@ -880,6 +962,20 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
                len -= len % sizeof(struct dtl_entry);
                r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
                break;
+       case KVM_REG_PPC_TB_OFFSET:
+               /* round up to multiple of 2^24 */
+               vcpu->arch.vcore->tb_offset =
+                       ALIGN(set_reg_val(id, *val), 1UL << 24);
+               break;
+       case KVM_REG_PPC_LPCR:
+               kvmppc_set_lpcr(vcpu, set_reg_val(id, *val));
+               break;
+       case KVM_REG_PPC_PPR:
+               vcpu->arch.ppr = set_reg_val(id, *val);
+               break;
+       case KVM_REG_PPC_ARCH_COMPAT:
+               r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
+               break;
        default:
                r = -EINVAL;
                break;
@@ -888,14 +984,8 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
        return r;
 }
 
-int kvmppc_core_check_processor_compat(void)
-{
-       if (cpu_has_feature(CPU_FTR_HVMODE))
-               return 0;
-       return -EIO;
-}
-
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
+                                                  unsigned int id)
 {
        struct kvm_vcpu *vcpu;
        int err = -EINVAL;
@@ -919,8 +1009,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
        vcpu->arch.mmcr[0] = MMCR0_FC;
        vcpu->arch.ctrl = CTRL_RUNLATCH;
        /* default to host PVR, since we can't spoof it */
-       vcpu->arch.pvr = mfspr(SPRN_PVR);
-       kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+       kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
        spin_lock_init(&vcpu->arch.vpa_update_lock);
        spin_lock_init(&vcpu->arch.tbacct_lock);
        vcpu->arch.busy_preempt = TB_NIL;
@@ -940,6 +1029,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
                        spin_lock_init(&vcore->lock);
                        init_waitqueue_head(&vcore->wq);
                        vcore->preempt_tb = TB_NIL;
+                       vcore->lpcr = kvm->arch.lpcr;
                }
                kvm->arch.vcores[core] = vcore;
                kvm->arch.online_vcores++;
@@ -972,7 +1062,7 @@ static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
                                        vpa->dirty);
 }
 
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)
 {
        spin_lock(&vcpu->arch.vpa_update_lock);
        unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
@@ -983,6 +1073,12 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
        kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
+static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
+{
+       /* Indicate we want to get back into the guest */
+       return 1;
+}
+
 static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
 {
        unsigned long dec_nsec, now;
@@ -1264,8 +1360,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 
                ret = RESUME_GUEST;
                if (vcpu->arch.trap)
-                       ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
-                                                vcpu->arch.run_task);
+                       ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
+                                                   vcpu->arch.run_task);
 
                vcpu->arch.ret = ret;
                vcpu->arch.trap = 0;
@@ -1424,7 +1520,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
        return vcpu->arch.ret;
 }
 
-int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
+static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
        int r;
        int srcu_idx;
@@ -1546,7 +1642,8 @@ static const struct file_operations kvm_rma_fops = {
        .release        = kvm_rma_release,
 };
 
-long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
+static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
+                                     struct kvm_allocate_rma *ret)
 {
        long fd;
        struct kvm_rma_info *ri;
@@ -1592,7 +1689,8 @@ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
        (*sps)++;
 }
 
-int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
+static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
+                                        struct kvm_ppc_smmu_info *info)
 {
        struct kvm_ppc_one_seg_page_size *sps;
 
@@ -1613,7 +1711,8 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
+                                        struct kvm_dirty_log *log)
 {
        struct kvm_memory_slot *memslot;
        int r;
@@ -1667,8 +1766,8 @@ static void unpin_slot(struct kvm_memory_slot *memslot)
        }
 }
 
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
-                             struct kvm_memory_slot *dont)
+static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
+                                       struct kvm_memory_slot *dont)
 {
        if (!dont || free->arch.rmap != dont->arch.rmap) {
                vfree(free->arch.rmap);
@@ -1681,8 +1780,8 @@ void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
        }
 }
 
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
-                              unsigned long npages)
+static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
+                                        unsigned long npages)
 {
        slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
        if (!slot->arch.rmap)
@@ -1692,9 +1791,9 @@ int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
        return 0;
 }
 
-int kvmppc_core_prepare_memory_region(struct kvm *kvm,
-                                     struct kvm_memory_slot *memslot,
-                                     struct kvm_userspace_memory_region *mem)
+static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
+                                       struct kvm_memory_slot *memslot,
+                                       struct kvm_userspace_memory_region *mem)
 {
        unsigned long *phys;
 
@@ -1710,9 +1809,9 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
        return 0;
 }
 
-void kvmppc_core_commit_memory_region(struct kvm *kvm,
-                                     struct kvm_userspace_memory_region *mem,
-                                     const struct kvm_memory_slot *old)
+static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
+                               struct kvm_userspace_memory_region *mem,
+                               const struct kvm_memory_slot *old)
 {
        unsigned long npages = mem->memory_size >> PAGE_SHIFT;
        struct kvm_memory_slot *memslot;
@@ -1729,6 +1828,37 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
        }
 }
 
+/*
+ * Update LPCR values in kvm->arch and in vcores.
+ * Caller must hold kvm->lock.
+ */
+void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
+{
+       long int i;
+       u32 cores_done = 0;
+
+       if ((kvm->arch.lpcr & mask) == lpcr)
+               return;
+
+       kvm->arch.lpcr = (kvm->arch.lpcr & ~mask) | lpcr;
+
+       for (i = 0; i < KVM_MAX_VCORES; ++i) {
+               struct kvmppc_vcore *vc = kvm->arch.vcores[i];
+               if (!vc)
+                       continue;
+               spin_lock(&vc->lock);
+               vc->lpcr = (vc->lpcr & ~mask) | lpcr;
+               spin_unlock(&vc->lock);
+               if (++cores_done >= kvm->arch.online_vcores)
+                       break;
+       }
+}
+
+static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
+{
+       return;
+}
+
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 {
        int err = 0;
@@ -1737,7 +1867,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
        unsigned long hva;
        struct kvm_memory_slot *memslot;
        struct vm_area_struct *vma;
-       unsigned long lpcr, senc;
+       unsigned long lpcr = 0, senc;
+       unsigned long lpcr_mask = 0;
        unsigned long psize, porder;
        unsigned long rma_size;
        unsigned long rmls;
@@ -1802,9 +1933,9 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
                senc = slb_pgsize_encoding(psize);
                kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
                        (VRMA_VSID << SLB_VSID_SHIFT_1T);
-               lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
-               lpcr |= senc << (LPCR_VRMASD_SH - 4);
-               kvm->arch.lpcr = lpcr;
+               lpcr_mask = LPCR_VRMASD;
+               /* the -4 is to account for senc values starting at 0x10 */
+               lpcr = senc << (LPCR_VRMASD_SH - 4);
 
                /* Create HPTEs in the hash page table for the VRMA */
                kvmppc_map_vrma(vcpu, memslot, porder);
@@ -1825,23 +1956,21 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
                kvm->arch.rma = ri;
 
                /* Update LPCR and RMOR */
-               lpcr = kvm->arch.lpcr;
                if (cpu_has_feature(CPU_FTR_ARCH_201)) {
                        /* PPC970; insert RMLS value (split field) in HID4 */
-                       lpcr &= ~((1ul << HID4_RMLS0_SH) |
-                                 (3ul << HID4_RMLS2_SH));
-                       lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) |
+                       lpcr_mask = (1ul << HID4_RMLS0_SH) |
+                               (3ul << HID4_RMLS2_SH) | HID4_RMOR;
+                       lpcr = ((rmls >> 2) << HID4_RMLS0_SH) |
                                ((rmls & 3) << HID4_RMLS2_SH);
                        /* RMOR is also in HID4 */
                        lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
                                << HID4_RMOR_SH;
                } else {
                        /* POWER7 */
-                       lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
-                       lpcr |= rmls << LPCR_RMLS_SH;
+                       lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS;
+                       lpcr = rmls << LPCR_RMLS_SH;
                        kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
                }
-               kvm->arch.lpcr = lpcr;
                pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
                        ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
 
@@ -1860,6 +1989,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
                }
        }
 
+       kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+
        /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
        smp_wmb();
        kvm->arch.rma_setup_done = 1;
@@ -1875,7 +2006,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
        goto out_srcu;
 }
 
-int kvmppc_core_init_vm(struct kvm *kvm)
+static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 {
        unsigned long lpcr, lpid;
 
@@ -1893,9 +2024,6 @@ int kvmppc_core_init_vm(struct kvm *kvm)
         */
        cpumask_setall(&kvm->arch.need_tlb_flush);
 
-       INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
-       INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
-
        kvm->arch.rma = NULL;
 
        kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
@@ -1931,61 +2059,162 @@ int kvmppc_core_init_vm(struct kvm *kvm)
        return 0;
 }
 
-void kvmppc_core_destroy_vm(struct kvm *kvm)
+static void kvmppc_free_vcores(struct kvm *kvm)
+{
+       long int i;
+
+       for (i = 0; i < KVM_MAX_VCORES; ++i)
+               kfree(kvm->arch.vcores[i]);
+       kvm->arch.online_vcores = 0;
+}
+
+static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 {
        uninhibit_secondary_onlining();
 
+       kvmppc_free_vcores(kvm);
        if (kvm->arch.rma) {
                kvm_release_rma(kvm->arch.rma);
                kvm->arch.rma = NULL;
        }
 
-       kvmppc_rtas_tokens_free(kvm);
-
        kvmppc_free_hpt(kvm);
-       WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
 }
 
-/* These are stubs for now */
-void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
+/* We don't need to emulate any privileged instructions or dcbz */
+static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                                    unsigned int inst, int *advance)
 {
+       return EMULATE_FAIL;
 }
 
-/* We don't need to emulate any privileged instructions or dcbz */
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                           unsigned int inst, int *advance)
+static int kvmppc_core_emulate_mtspr_hv(struct kvm_vcpu *vcpu, int sprn,
+                                       ulong spr_val)
 {
        return EMULATE_FAIL;
 }
 
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
+static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
+                                       ulong *spr_val)
 {
        return EMULATE_FAIL;
 }
 
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
+static int kvmppc_core_check_processor_compat_hv(void)
 {
-       return EMULATE_FAIL;
+       if (!cpu_has_feature(CPU_FTR_HVMODE))
+               return -EIO;
+       return 0;
 }
 
-static int kvmppc_book3s_hv_init(void)
+static long kvm_arch_vm_ioctl_hv(struct file *filp,
+                                unsigned int ioctl, unsigned long arg)
 {
-       int r;
+       struct kvm *kvm __maybe_unused = filp->private_data;
+       void __user *argp = (void __user *)arg;
+       long r;
 
-       r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+       switch (ioctl) {
 
-       if (r)
+       case KVM_ALLOCATE_RMA: {
+               struct kvm_allocate_rma rma;
+               struct kvm *kvm = filp->private_data;
+
+               r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
+               if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
+                       r = -EFAULT;
+               break;
+       }
+
+       case KVM_PPC_ALLOCATE_HTAB: {
+               u32 htab_order;
+
+               r = -EFAULT;
+               if (get_user(htab_order, (u32 __user *)argp))
+                       break;
+               r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
+               if (r)
+                       break;
+               r = -EFAULT;
+               if (put_user(htab_order, (u32 __user *)argp))
+                       break;
+               r = 0;
+               break;
+       }
+
+       case KVM_PPC_GET_HTAB_FD: {
+               struct kvm_get_htab_fd ghf;
+
+               r = -EFAULT;
+               if (copy_from_user(&ghf, argp, sizeof(ghf)))
+                       break;
+               r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
+               break;
+       }
+
+       default:
+               r = -ENOTTY;
+       }
+
+       return r;
+}
+
+static struct kvmppc_ops kvm_ops_hv = {
+       .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
+       .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
+       .get_one_reg = kvmppc_get_one_reg_hv,
+       .set_one_reg = kvmppc_set_one_reg_hv,
+       .vcpu_load   = kvmppc_core_vcpu_load_hv,
+       .vcpu_put    = kvmppc_core_vcpu_put_hv,
+       .set_msr     = kvmppc_set_msr_hv,
+       .vcpu_run    = kvmppc_vcpu_run_hv,
+       .vcpu_create = kvmppc_core_vcpu_create_hv,
+       .vcpu_free   = kvmppc_core_vcpu_free_hv,
+       .check_requests = kvmppc_core_check_requests_hv,
+       .get_dirty_log  = kvm_vm_ioctl_get_dirty_log_hv,
+       .flush_memslot  = kvmppc_core_flush_memslot_hv,
+       .prepare_memory_region = kvmppc_core_prepare_memory_region_hv,
+       .commit_memory_region  = kvmppc_core_commit_memory_region_hv,
+       .unmap_hva = kvm_unmap_hva_hv,
+       .unmap_hva_range = kvm_unmap_hva_range_hv,
+       .age_hva  = kvm_age_hva_hv,
+       .test_age_hva = kvm_test_age_hva_hv,
+       .set_spte_hva = kvm_set_spte_hva_hv,
+       .mmu_destroy  = kvmppc_mmu_destroy_hv,
+       .free_memslot = kvmppc_core_free_memslot_hv,
+       .create_memslot = kvmppc_core_create_memslot_hv,
+       .init_vm =  kvmppc_core_init_vm_hv,
+       .destroy_vm = kvmppc_core_destroy_vm_hv,
+       .get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv,
+       .emulate_op = kvmppc_core_emulate_op_hv,
+       .emulate_mtspr = kvmppc_core_emulate_mtspr_hv,
+       .emulate_mfspr = kvmppc_core_emulate_mfspr_hv,
+       .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
+       .arch_vm_ioctl  = kvm_arch_vm_ioctl_hv,
+};
+
+static int kvmppc_book3s_init_hv(void)
+{
+       int r;
+       /*
+        * FIXME!! Do we need to check on all cpus ?
+        */
+       r = kvmppc_core_check_processor_compat_hv();
+       if (r < 0)
                return r;
 
-       r = kvmppc_mmu_hv_init();
+       kvm_ops_hv.owner = THIS_MODULE;
+       kvmppc_hv_ops = &kvm_ops_hv;
 
+       r = kvmppc_mmu_hv_init();
        return r;
 }
 
-static void kvmppc_book3s_hv_exit(void)
+static void kvmppc_book3s_exit_hv(void)
 {
-       kvm_exit();
+       kvmppc_hv_ops = NULL;
 }
 
-module_init(kvmppc_book3s_hv_init);
-module_exit(kvmppc_book3s_hv_exit);
+module_init(kvmppc_book3s_init_hv);
+module_exit(kvmppc_book3s_exit_hv);
+MODULE_LICENSE("GPL");
index 37f1cc4..928142c 100644 (file)
@@ -158,9 +158,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
  * Interrupts are enabled again at this point.
  */
 
-.global kvmppc_handler_highmem
-kvmppc_handler_highmem:
-
        /*
         * Register usage at this point:
         *
index 294b7af..bc8de75 100644 (file)
 #error Need to fix lppaca and SLB shadow accesses in little endian mode
 #endif
 
-/*****************************************************************************
- *                                                                           *
- *        Real Mode handlers that need to be in the linear mapping           *
- *                                                                           *
- ****************************************************************************/
-
-       .globl  kvmppc_skip_interrupt
-kvmppc_skip_interrupt:
-       mfspr   r13,SPRN_SRR0
-       addi    r13,r13,4
-       mtspr   SPRN_SRR0,r13
-       GET_SCRATCH0(r13)
-       rfid
-       b       .
-
-       .globl  kvmppc_skip_Hinterrupt
-kvmppc_skip_Hinterrupt:
-       mfspr   r13,SPRN_HSRR0
-       addi    r13,r13,4
-       mtspr   SPRN_HSRR0,r13
-       GET_SCRATCH0(r13)
-       hrfid
-       b       .
-
 /*
  * Call kvmppc_hv_entry in real mode.
  * Must be called with interrupts hard-disabled.
@@ -66,8 +42,11 @@ kvmppc_skip_Hinterrupt:
  * LR = return address to continue at after eventually re-enabling MMU
  */
 _GLOBAL(kvmppc_hv_entry_trampoline)
+       mflr    r0
+       std     r0, PPC_LR_STKOFF(r1)
+       stdu    r1, -112(r1)
        mfmsr   r10
-       LOAD_REG_ADDR(r5, kvmppc_hv_entry)
+       LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
        li      r0,MSR_RI
        andc    r0,r10,r0
        li      r6,MSR_IR | MSR_DR
@@ -77,11 +56,103 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
        mtsrr1  r6
        RFI
 
-/******************************************************************************
- *                                                                            *
- *                               Entry code                                   *
- *                                                                            *
- *****************************************************************************/
+kvmppc_call_hv_entry:
+       bl      kvmppc_hv_entry
+
+       /* Back from guest - restore host state and return to caller */
+
+       /* Restore host DABR and DABRX */
+       ld      r5,HSTATE_DABR(r13)
+       li      r6,7
+       mtspr   SPRN_DABR,r5
+       mtspr   SPRN_DABRX,r6
+
+       /* Restore SPRG3 */
+       ld      r3,PACA_SPRG3(r13)
+       mtspr   SPRN_SPRG3,r3
+
+       /*
+        * Reload DEC.  HDEC interrupts were disabled when
+        * we reloaded the host's LPCR value.
+        */
+       ld      r3, HSTATE_DECEXP(r13)
+       mftb    r4
+       subf    r4, r4, r3
+       mtspr   SPRN_DEC, r4
+
+       /* Reload the host's PMU registers */
+       ld      r3, PACALPPACAPTR(r13)  /* is the host using the PMU? */
+       lbz     r4, LPPACA_PMCINUSE(r3)
+       cmpwi   r4, 0
+       beq     23f                     /* skip if not */
+       lwz     r3, HSTATE_PMC(r13)
+       lwz     r4, HSTATE_PMC + 4(r13)
+       lwz     r5, HSTATE_PMC + 8(r13)
+       lwz     r6, HSTATE_PMC + 12(r13)
+       lwz     r8, HSTATE_PMC + 16(r13)
+       lwz     r9, HSTATE_PMC + 20(r13)
+BEGIN_FTR_SECTION
+       lwz     r10, HSTATE_PMC + 24(r13)
+       lwz     r11, HSTATE_PMC + 28(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+       mtspr   SPRN_PMC1, r3
+       mtspr   SPRN_PMC2, r4
+       mtspr   SPRN_PMC3, r5
+       mtspr   SPRN_PMC4, r6
+       mtspr   SPRN_PMC5, r8
+       mtspr   SPRN_PMC6, r9
+BEGIN_FTR_SECTION
+       mtspr   SPRN_PMC7, r10
+       mtspr   SPRN_PMC8, r11
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+       ld      r3, HSTATE_MMCR(r13)
+       ld      r4, HSTATE_MMCR + 8(r13)
+       ld      r5, HSTATE_MMCR + 16(r13)
+       mtspr   SPRN_MMCR1, r4
+       mtspr   SPRN_MMCRA, r5
+       mtspr   SPRN_MMCR0, r3
+       isync
+23:
+
+       /*
+        * For external and machine check interrupts, we need
+        * to call the Linux handler to process the interrupt.
+        * We do that by jumping to absolute address 0x500 for
+        * external interrupts, or the machine_check_fwnmi label
+        * for machine checks (since firmware might have patched
+        * the vector area at 0x200).  The [h]rfid at the end of the
+        * handler will return to the book3s_hv_interrupts.S code.
+        * For other interrupts we do the rfid to get back
+        * to the book3s_hv_interrupts.S code here.
+        */
+       ld      r8, 112+PPC_LR_STKOFF(r1)
+       addi    r1, r1, 112
+       ld      r7, HSTATE_HOST_MSR(r13)
+
+       cmpwi   cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+       cmpwi   r12, BOOK3S_INTERRUPT_EXTERNAL
+BEGIN_FTR_SECTION
+       beq     11f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+       /* RFI into the highmem handler, or branch to interrupt handler */
+       mfmsr   r6
+       li      r0, MSR_RI
+       andc    r6, r6, r0
+       mtmsrd  r6, 1                   /* Clear RI in MSR */
+       mtsrr0  r8
+       mtsrr1  r7
+       beqa    0x500                   /* external interrupt (PPC970) */
+       beq     cr1, 13f                /* machine check */
+       RFI
+
+       /* On POWER7, we have external interrupts set to use HSRR0/1 */
+11:    mtspr   SPRN_HSRR0, r8
+       mtspr   SPRN_HSRR1, r7
+       ba      0x500
+
+13:    b       machine_check_fwnmi
+
 
 /*
  * We come in here when wakened from nap mode on a secondary hw thread.
@@ -137,7 +208,7 @@ kvm_start_guest:
        cmpdi   r4,0
        /* if we have no vcpu to run, go back to sleep */
        beq     kvm_no_guest
-       b       kvmppc_hv_entry
+       b       30f
 
 27:    /* XXX should handle hypervisor maintenance interrupts etc. here */
        b       kvm_no_guest
@@ -147,6 +218,57 @@ kvm_start_guest:
        stw     r8,HSTATE_SAVED_XIRR(r13)
        b       kvm_no_guest
 
+30:    bl      kvmppc_hv_entry
+
+       /* Back from the guest, go back to nap */
+       /* Clear our vcpu pointer so we don't come back in early */
+       li      r0, 0
+       std     r0, HSTATE_KVM_VCPU(r13)
+       lwsync
+       /* Clear any pending IPI - we're an offline thread */
+       ld      r5, HSTATE_XICS_PHYS(r13)
+       li      r7, XICS_XIRR
+       lwzcix  r3, r5, r7              /* ack any pending interrupt */
+       rlwinm. r0, r3, 0, 0xffffff     /* any pending? */
+       beq     37f
+       sync
+       li      r0, 0xff
+       li      r6, XICS_MFRR
+       stbcix  r0, r5, r6              /* clear the IPI */
+       stwcix  r3, r5, r7              /* EOI it */
+37:    sync
+
+       /* increment the nap count and then go to nap mode */
+       ld      r4, HSTATE_KVM_VCORE(r13)
+       addi    r4, r4, VCORE_NAP_COUNT
+       lwsync                          /* make previous updates visible */
+51:    lwarx   r3, 0, r4
+       addi    r3, r3, 1
+       stwcx.  r3, 0, r4
+       bne     51b
+
+kvm_no_guest:
+       li      r0, KVM_HWTHREAD_IN_NAP
+       stb     r0, HSTATE_HWTHREAD_STATE(r13)
+       li      r3, LPCR_PECE0
+       mfspr   r4, SPRN_LPCR
+       rlwimi  r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
+       mtspr   SPRN_LPCR, r4
+       isync
+       std     r0, HSTATE_SCRATCH0(r13)
+       ptesync
+       ld      r0, HSTATE_SCRATCH0(r13)
+1:     cmpd    r0, r0
+       bne     1b
+       nap
+       b       .
+
+/******************************************************************************
+ *                                                                            *
+ *                               Entry code                                   *
+ *                                                                            *
+ *****************************************************************************/
+
 .global kvmppc_hv_entry
 kvmppc_hv_entry:
 
@@ -159,7 +281,8 @@ kvmppc_hv_entry:
         * all other volatile GPRS = free
         */
        mflr    r0
-       std     r0, HSTATE_VMHANDLER(r13)
+       std     r0, PPC_LR_STKOFF(r1)
+       stdu    r1, -112(r1)
 
        /* Set partition DABR */
        /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
@@ -200,8 +323,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
        ld      r3, VCPU_MMCR(r4)
        ld      r5, VCPU_MMCR + 8(r4)
        ld      r6, VCPU_MMCR + 16(r4)
+       ld      r7, VCPU_SIAR(r4)
+       ld      r8, VCPU_SDAR(r4)
        mtspr   SPRN_MMCR1, r5
        mtspr   SPRN_MMCRA, r6
+       mtspr   SPRN_SIAR, r7
+       mtspr   SPRN_SDAR, r8
        mtspr   SPRN_MMCR0, r3
        isync
 
@@ -254,22 +381,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
        /* Save R1 in the PACA */
        std     r1, HSTATE_HOST_R1(r13)
 
-       /* Increment yield count if they have a VPA */
-       ld      r3, VCPU_VPA(r4)
-       cmpdi   r3, 0
-       beq     25f
-       lwz     r5, LPPACA_YIELDCOUNT(r3)
-       addi    r5, r5, 1
-       stw     r5, LPPACA_YIELDCOUNT(r3)
-       li      r6, 1
-       stb     r6, VCPU_VPA_DIRTY(r4)
-25:
        /* Load up DAR and DSISR */
        ld      r5, VCPU_DAR(r4)
        lwz     r6, VCPU_DSISR(r4)
        mtspr   SPRN_DAR, r5
        mtspr   SPRN_DSISR, r6
 
+       li      r6, KVM_GUEST_MODE_HOST_HV
+       stb     r6, HSTATE_IN_GUEST(r13)
+
 BEGIN_FTR_SECTION
        /* Restore AMR and UAMOR, set AMOR to all 1s */
        ld      r5,VCPU_AMR(r4)
@@ -343,7 +463,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
        bdnz    28b
        ptesync
 
-22:    li      r0,1
+       /* Add timebase offset onto timebase */
+22:    ld      r8,VCORE_TB_OFFSET(r5)
+       cmpdi   r8,0
+       beq     37f
+       mftb    r6              /* current host timebase */
+       add     r8,r8,r6
+       mtspr   SPRN_TBU40,r8   /* update upper 40 bits */
+       mftb    r7              /* check if lower 24 bits overflowed */
+       clrldi  r6,r6,40
+       clrldi  r7,r7,40
+       cmpld   r7,r6
+       bge     37f
+       addis   r8,r8,0x100     /* if so, increment upper 40 bits */
+       mtspr   SPRN_TBU40,r8
+
+       /* Load guest PCR value to select appropriate compat mode */
+37:    ld      r7, VCORE_PCR(r5)
+       cmpdi   r7, 0
+       beq     38f
+       mtspr   SPRN_PCR, r7
+38:
+       li      r0,1
        stb     r0,VCORE_IN_GUEST(r5)   /* signal secondaries to continue */
        b       10f
 
@@ -353,12 +494,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
        beq     20b
 
        /* Set LPCR and RMOR. */
-10:    ld      r8,KVM_LPCR(r9)
+10:    ld      r8,VCORE_LPCR(r5)
        mtspr   SPRN_LPCR,r8
        ld      r8,KVM_RMOR(r9)
        mtspr   SPRN_RMOR,r8
        isync
 
+       /* Increment yield count if they have a VPA */
+       ld      r3, VCPU_VPA(r4)
+       cmpdi   r3, 0
+       beq     25f
+       lwz     r5, LPPACA_YIELDCOUNT(r3)
+       addi    r5, r5, 1
+       stw     r5, LPPACA_YIELDCOUNT(r3)
+       li      r6, 1
+       stb     r6, VCPU_VPA_DIRTY(r4)
+25:
        /* Check if HDEC expires soon */
        mfspr   r3,SPRN_HDEC
        cmpwi   r3,10
@@ -405,7 +556,8 @@ toc_tlbie_lock:
        bne     24b
        isync
 
-       ld      r7,KVM_LPCR(r9)         /* use kvm->arch.lpcr to store HID4 */
+       ld      r5,HSTATE_KVM_VCORE(r13)
+       ld      r7,VCORE_LPCR(r5)       /* use vcore->lpcr to store HID4 */
        li      r0,0x18f
        rotldi  r0,r0,HID4_LPID5_SH     /* all lpid bits in HID4 = 1 */
        or      r0,r7,r0
@@ -541,7 +693,7 @@ fast_guest_return:
        mtspr   SPRN_HSRR1,r11
 
        /* Activate guest mode, so faults get handled by KVM */
-       li      r9, KVM_GUEST_MODE_GUEST
+       li      r9, KVM_GUEST_MODE_GUEST_HV
        stb     r9, HSTATE_IN_GUEST(r13)
 
        /* Enter guest */
@@ -550,13 +702,15 @@ BEGIN_FTR_SECTION
        ld      r5, VCPU_CFAR(r4)
        mtspr   SPRN_CFAR, r5
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+       ld      r0, VCPU_PPR(r4)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
        ld      r5, VCPU_LR(r4)
        lwz     r6, VCPU_CR(r4)
        mtlr    r5
        mtcr    r6
 
-       ld      r0, VCPU_GPR(R0)(r4)
        ld      r1, VCPU_GPR(R1)(r4)
        ld      r2, VCPU_GPR(R2)(r4)
        ld      r3, VCPU_GPR(R3)(r4)
@@ -570,6 +724,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
        ld      r12, VCPU_GPR(R12)(r4)
        ld      r13, VCPU_GPR(R13)(r4)
 
+BEGIN_FTR_SECTION
+       mtspr   SPRN_PPR, r0
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+       ld      r0, VCPU_GPR(R0)(r4)
        ld      r4, VCPU_GPR(R4)(r4)
 
        hrfid
@@ -584,8 +742,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 /*
  * We come here from the first-level interrupt handlers.
  */
-       .globl  kvmppc_interrupt
-kvmppc_interrupt:
+       .globl  kvmppc_interrupt_hv
+kvmppc_interrupt_hv:
        /*
         * Register contents:
         * R12          = interrupt vector
@@ -595,6 +753,19 @@ kvmppc_interrupt:
         */
        /* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */
        std     r9, HSTATE_HOST_R2(r13)
+
+       lbz     r9, HSTATE_IN_GUEST(r13)
+       cmpwi   r9, KVM_GUEST_MODE_HOST_HV
+       beq     kvmppc_bad_host_intr
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+       cmpwi   r9, KVM_GUEST_MODE_GUEST
+       ld      r9, HSTATE_HOST_R2(r13)
+       beq     kvmppc_interrupt_pr
+#endif
+       /* We're now back in the host but in guest MMU context */
+       li      r9, KVM_GUEST_MODE_HOST_HV
+       stb     r9, HSTATE_IN_GUEST(r13)
+
        ld      r9, HSTATE_KVM_VCPU(r13)
 
        /* Save registers */
@@ -620,6 +791,10 @@ BEGIN_FTR_SECTION
        ld      r3, HSTATE_CFAR(r13)
        std     r3, VCPU_CFAR(r9)
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+       ld      r4, HSTATE_PPR(r13)
+       std     r4, VCPU_PPR(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
        /* Restore R1/R2 so we can handle faults */
        ld      r1, HSTATE_HOST_R1(r13)
@@ -642,10 +817,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
        std     r3, VCPU_GPR(R13)(r9)
        std     r4, VCPU_LR(r9)
 
-       /* Unset guest mode */
-       li      r0, KVM_GUEST_MODE_NONE
-       stb     r0, HSTATE_IN_GUEST(r13)
-
        stw     r12,VCPU_TRAP(r9)
 
        /* Save HEIR (HV emulation assist reg) in last_inst
@@ -696,46 +867,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
         * set, we know the host wants us out so let's do it now
         */
 do_ext_interrupt:
-       lbz     r0, HSTATE_HOST_IPI(r13)
-       cmpwi   r0, 0
-       bne     ext_interrupt_to_host
-
-       /* Now read the interrupt from the ICP */
-       ld      r5, HSTATE_XICS_PHYS(r13)
-       li      r7, XICS_XIRR
-       cmpdi   r5, 0
-       beq-    ext_interrupt_to_host
-       lwzcix  r3, r5, r7
-       rlwinm. r0, r3, 0, 0xffffff
-       sync
-       beq     3f              /* if nothing pending in the ICP */
-
-       /* We found something in the ICP...
-        *
-        * If it's not an IPI, stash it in the PACA and return to
-        * the host, we don't (yet) handle directing real external
-        * interrupts directly to the guest
-        */
-       cmpwi   r0, XICS_IPI
-       bne     ext_stash_for_host
-
-       /* It's an IPI, clear the MFRR and EOI it */
-       li      r0, 0xff
-       li      r6, XICS_MFRR
-       stbcix  r0, r5, r6              /* clear the IPI */
-       stwcix  r3, r5, r7              /* EOI it */
-       sync
-
-       /* We need to re-check host IPI now in case it got set in the
-        * meantime. If it's clear, we bounce the interrupt to the
-        * guest
-        */
-       lbz     r0, HSTATE_HOST_IPI(r13)
-       cmpwi   r0, 0
-       bne-    1f
+       bl      kvmppc_read_intr
+       cmpdi   r3, 0
+       bgt     ext_interrupt_to_host
 
        /* Allright, looks like an IPI for the guest, we need to set MER */
-3:
        /* Check if any CPU is heading out to the host, if so head out too */
        ld      r5, HSTATE_KVM_VCORE(r13)
        lwz     r0, VCORE_ENTRY_EXIT(r5)
@@ -764,27 +900,9 @@ do_ext_interrupt:
        mtspr   SPRN_LPCR, r8
        b       fast_guest_return
 
-       /* We raced with the host, we need to resend that IPI, bummer */
-1:     li      r0, IPI_PRIORITY
-       stbcix  r0, r5, r6              /* set the IPI */
-       sync
-       b       ext_interrupt_to_host
-
-ext_stash_for_host:
-       /* It's not an IPI and it's for the host, stash it in the PACA
-        * before exit, it will be picked up by the host ICP driver
-        */
-       stw     r3, HSTATE_SAVED_XIRR(r13)
 ext_interrupt_to_host:
 
 guest_exit_cont:               /* r9 = vcpu, r12 = trap, r13 = paca */
-       /* Save DEC */
-       mfspr   r5,SPRN_DEC
-       mftb    r6
-       extsw   r5,r5
-       add     r5,r5,r6
-       std     r5,VCPU_DEC_EXPIRES(r9)
-
        /* Save more register state  */
        mfdar   r6
        mfdsisr r7
@@ -954,7 +1072,30 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
        mtspr   SPRN_SDR1,r6            /* switch to partition page table */
        mtspr   SPRN_LPID,r7
        isync
-       li      r0,0
+
+       /* Subtract timebase offset from timebase */
+       ld      r8,VCORE_TB_OFFSET(r5)
+       cmpdi   r8,0
+       beq     17f
+       mftb    r6                      /* current host timebase */
+       subf    r8,r8,r6
+       mtspr   SPRN_TBU40,r8           /* update upper 40 bits */
+       mftb    r7                      /* check if lower 24 bits overflowed */
+       clrldi  r6,r6,40
+       clrldi  r7,r7,40
+       cmpld   r7,r6
+       bge     17f
+       addis   r8,r8,0x100             /* if so, increment upper 40 bits */
+       mtspr   SPRN_TBU40,r8
+
+       /* Reset PCR */
+17:    ld      r0, VCORE_PCR(r5)
+       cmpdi   r0, 0
+       beq     18f
+       li      r0, 0
+       mtspr   SPRN_PCR, r0
+18:
+       /* Signal secondary CPUs to continue */
        stb     r0,VCORE_IN_GUEST(r5)
        lis     r8,0x7fff               /* MAX_INT@h */
        mtspr   SPRN_HDEC,r8
@@ -1052,6 +1193,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 1:     addi    r8,r8,16
        .endr
 
+       /* Save DEC */
+       mfspr   r5,SPRN_DEC
+       mftb    r6
+       extsw   r5,r5
+       add     r5,r5,r6
+       std     r5,VCPU_DEC_EXPIRES(r9)
+
        /* Save and reset AMR and UAMOR before turning on the MMU */
 BEGIN_FTR_SECTION
        mfspr   r5,SPRN_AMR
@@ -1062,11 +1210,15 @@ BEGIN_FTR_SECTION
        mtspr   SPRN_AMR,r6
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
+       /* Unset guest mode */
+       li      r0, KVM_GUEST_MODE_NONE
+       stb     r0, HSTATE_IN_GUEST(r13)
+
        /* Switch DSCR back to host value */
 BEGIN_FTR_SECTION
        mfspr   r8, SPRN_DSCR
        ld      r7, HSTATE_DSCR(r13)
-       std     r8, VCPU_DSCR(r7)
+       std     r8, VCPU_DSCR(r9)
        mtspr   SPRN_DSCR, r7
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
@@ -1134,9 +1286,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
        std     r3, VCPU_MMCR(r9)       /* if not, set saved MMCR0 to FC */
        b       22f
 21:    mfspr   r5, SPRN_MMCR1
+       mfspr   r7, SPRN_SIAR
+       mfspr   r8, SPRN_SDAR
        std     r4, VCPU_MMCR(r9)
        std     r5, VCPU_MMCR + 8(r9)
        std     r6, VCPU_MMCR + 16(r9)
+       std     r7, VCPU_SIAR(r9)
+       std     r8, VCPU_SDAR(r9)
        mfspr   r3, SPRN_PMC1
        mfspr   r4, SPRN_PMC2
        mfspr   r5, SPRN_PMC3
@@ -1158,103 +1314,30 @@ BEGIN_FTR_SECTION
        stw     r11, VCPU_PMC + 28(r9)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 22:
+       ld      r0, 112+PPC_LR_STKOFF(r1)
+       addi    r1, r1, 112
+       mtlr    r0
+       blr
+secondary_too_late:
+       ld      r5,HSTATE_KVM_VCORE(r13)
+       HMT_LOW
+13:    lbz     r3,VCORE_IN_GUEST(r5)
+       cmpwi   r3,0
+       bne     13b
+       HMT_MEDIUM
+       li      r0, KVM_GUEST_MODE_NONE
+       stb     r0, HSTATE_IN_GUEST(r13)
+       ld      r11,PACA_SLBSHADOWPTR(r13)
 
-       /* Secondary threads go off to take a nap on POWER7 */
-BEGIN_FTR_SECTION
-       lwz     r0,VCPU_PTID(r9)
-       cmpwi   r0,0
-       bne     secondary_nap
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-
-       /* Restore host DABR and DABRX */
-       ld      r5,HSTATE_DABR(r13)
-       li      r6,7
-       mtspr   SPRN_DABR,r5
-       mtspr   SPRN_DABRX,r6
-
-       /* Restore SPRG3 */
-       ld      r3,PACA_SPRG3(r13)
-       mtspr   SPRN_SPRG3,r3
-
-       /*
-        * Reload DEC.  HDEC interrupts were disabled when
-        * we reloaded the host's LPCR value.
-        */
-       ld      r3, HSTATE_DECEXP(r13)
-       mftb    r4
-       subf    r4, r4, r3
-       mtspr   SPRN_DEC, r4
-
-       /* Reload the host's PMU registers */
-       ld      r3, PACALPPACAPTR(r13)  /* is the host using the PMU? */
-       lbz     r4, LPPACA_PMCINUSE(r3)
-       cmpwi   r4, 0
-       beq     23f                     /* skip if not */
-       lwz     r3, HSTATE_PMC(r13)
-       lwz     r4, HSTATE_PMC + 4(r13)
-       lwz     r5, HSTATE_PMC + 8(r13)
-       lwz     r6, HSTATE_PMC + 12(r13)
-       lwz     r8, HSTATE_PMC + 16(r13)
-       lwz     r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
-       lwz     r10, HSTATE_PMC + 24(r13)
-       lwz     r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-       mtspr   SPRN_PMC1, r3
-       mtspr   SPRN_PMC2, r4
-       mtspr   SPRN_PMC3, r5
-       mtspr   SPRN_PMC4, r6
-       mtspr   SPRN_PMC5, r8
-       mtspr   SPRN_PMC6, r9
-BEGIN_FTR_SECTION
-       mtspr   SPRN_PMC7, r10
-       mtspr   SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-       ld      r3, HSTATE_MMCR(r13)
-       ld      r4, HSTATE_MMCR + 8(r13)
-       ld      r5, HSTATE_MMCR + 16(r13)
-       mtspr   SPRN_MMCR1, r4
-       mtspr   SPRN_MMCRA, r5
-       mtspr   SPRN_MMCR0, r3
-       isync
-23:
-       /*
-        * For external and machine check interrupts, we need
-        * to call the Linux handler to process the interrupt.
-        * We do that by jumping to absolute address 0x500 for
-        * external interrupts, or the machine_check_fwnmi label
-        * for machine checks (since firmware might have patched
-        * the vector area at 0x200).  The [h]rfid at the end of the
-        * handler will return to the book3s_hv_interrupts.S code.
-        * For other interrupts we do the rfid to get back
-        * to the book3s_hv_interrupts.S code here.
-        */
-       ld      r8, HSTATE_VMHANDLER(r13)
-       ld      r7, HSTATE_HOST_MSR(r13)
-
-       cmpwi   cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
-       cmpwi   r12, BOOK3S_INTERRUPT_EXTERNAL
-BEGIN_FTR_SECTION
-       beq     11f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-
-       /* RFI into the highmem handler, or branch to interrupt handler */
-       mfmsr   r6
-       li      r0, MSR_RI
-       andc    r6, r6, r0
-       mtmsrd  r6, 1                   /* Clear RI in MSR */
-       mtsrr0  r8
-       mtsrr1  r7
-       beqa    0x500                   /* external interrupt (PPC970) */
-       beq     cr1, 13f                /* machine check */
-       RFI
-
-       /* On POWER7, we have external interrupts set to use HSRR0/1 */
-11:    mtspr   SPRN_HSRR0, r8
-       mtspr   SPRN_HSRR1, r7
-       ba      0x500
-
-13:    b       machine_check_fwnmi
+       .rept   SLB_NUM_BOLTED
+       ld      r5,SLBSHADOW_SAVEAREA(r11)
+       ld      r6,SLBSHADOW_SAVEAREA+8(r11)
+       andis.  r7,r5,SLB_ESID_V@h
+       beq     1f
+       slbmte  r6,r5
+1:     addi    r11,r11,16
+       .endr
+       b       22b
 
 /*
  * Check whether an HDSI is an HPTE not found fault or something else.
@@ -1333,7 +1416,7 @@ fast_interrupt_c_return:
        stw     r8, VCPU_LAST_INST(r9)
 
        /* Unset guest mode. */
-       li      r0, KVM_GUEST_MODE_NONE
+       li      r0, KVM_GUEST_MODE_HOST_HV
        stb     r0, HSTATE_IN_GUEST(r13)
        b       guest_exit_cont
 
@@ -1701,67 +1784,70 @@ machine_check_realmode:
        rotldi  r11, r11, 63
        b       fast_interrupt_c_return
 
-secondary_too_late:
-       ld      r5,HSTATE_KVM_VCORE(r13)
-       HMT_LOW
-13:    lbz     r3,VCORE_IN_GUEST(r5)
-       cmpwi   r3,0
-       bne     13b
-       HMT_MEDIUM
-       ld      r11,PACA_SLBSHADOWPTR(r13)
-
-       .rept   SLB_NUM_BOLTED
-       ld      r5,SLBSHADOW_SAVEAREA(r11)
-       ld      r6,SLBSHADOW_SAVEAREA+8(r11)
-       andis.  r7,r5,SLB_ESID_V@h
-       beq     1f
-       slbmte  r6,r5
-1:     addi    r11,r11,16
-       .endr
+/*
+ * Determine what sort of external interrupt is pending (if any).
+ * Returns:
+ *     0 if no interrupt is pending
+ *     1 if an interrupt is pending that needs to be handled by the host
+ *     -1 if there was a guest wakeup IPI (which has now been cleared)
+ */
+kvmppc_read_intr:
+       /* see if a host IPI is pending */
+       li      r3, 1
+       lbz     r0, HSTATE_HOST_IPI(r13)
+       cmpwi   r0, 0
+       bne     1f
 
-secondary_nap:
-       /* Clear our vcpu pointer so we don't come back in early */
-       li      r0, 0
-       std     r0, HSTATE_KVM_VCPU(r13)
-       lwsync
-       /* Clear any pending IPI - assume we're a secondary thread */
-       ld      r5, HSTATE_XICS_PHYS(r13)
+       /* Now read the interrupt from the ICP */
+       ld      r6, HSTATE_XICS_PHYS(r13)
        li      r7, XICS_XIRR
-       lwzcix  r3, r5, r7              /* ack any pending interrupt */
-       rlwinm. r0, r3, 0, 0xffffff     /* any pending? */
-       beq     37f
+       cmpdi   r6, 0
+       beq-    1f
+       lwzcix  r0, r6, r7
+       rlwinm. r3, r0, 0, 0xffffff
        sync
-       li      r0, 0xff
-       li      r6, XICS_MFRR
-       stbcix  r0, r5, r6              /* clear the IPI */
-       stwcix  r3, r5, r7              /* EOI it */
-37:    sync
+       beq     1f                      /* if nothing pending in the ICP */
 
-       /* increment the nap count and then go to nap mode */
-       ld      r4, HSTATE_KVM_VCORE(r13)
-       addi    r4, r4, VCORE_NAP_COUNT
-       lwsync                          /* make previous updates visible */
-51:    lwarx   r3, 0, r4
-       addi    r3, r3, 1
-       stwcx.  r3, 0, r4
-       bne     51b
+       /* We found something in the ICP...
+        *
+        * If it's not an IPI, stash it in the PACA and return to
+        * the host, we don't (yet) handle directing real external
+        * interrupts directly to the guest
+        */
+       cmpwi   r3, XICS_IPI            /* if there is, is it an IPI? */
+       li      r3, 1
+       bne     42f
 
-kvm_no_guest:
-       li      r0, KVM_HWTHREAD_IN_NAP
-       stb     r0, HSTATE_HWTHREAD_STATE(r13)
+       /* It's an IPI, clear the MFRR and EOI it */
+       li      r3, 0xff
+       li      r8, XICS_MFRR
+       stbcix  r3, r6, r8              /* clear the IPI */
+       stwcix  r0, r6, r7              /* EOI it */
+       sync
 
-       li      r3, LPCR_PECE0
-       mfspr   r4, SPRN_LPCR
-       rlwimi  r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
-       mtspr   SPRN_LPCR, r4
-       isync
-       std     r0, HSTATE_SCRATCH0(r13)
-       ptesync
-       ld      r0, HSTATE_SCRATCH0(r13)
-1:     cmpd    r0, r0
-       bne     1b
-       nap
-       b       .
+       /* We need to re-check host IPI now in case it got set in the
+        * meantime. If it's clear, we bounce the interrupt to the
+        * guest
+        */
+       lbz     r0, HSTATE_HOST_IPI(r13)
+       cmpwi   r0, 0
+       bne-    43f
+
+       /* OK, it's an IPI for us */
+       li      r3, -1
+1:     blr
+
+42:    /* It's not an IPI and it's for the host, stash it in the PACA
+        * before exit, it will be picked up by the host ICP driver
+        */
+       stw     r0, HSTATE_SAVED_XIRR(r13)
+       b       1b
+
+43:    /* We raced with the host, we need to resend that IPI, bummer */
+       li      r0, IPI_PRIORITY
+       stbcix  r0, r6, r8              /* set the IPI */
+       sync
+       b       1b
 
 /*
  * Save away FP, VMX and VSX registers.
@@ -1879,3 +1965,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
        lwz     r7,VCPU_VRSAVE(r4)
        mtspr   SPRN_VRSAVE,r7
        blr
+
+/*
+ * We come here if we get any exception or interrupt while we are
+ * executing host real mode code while in guest MMU context.
+ * For now just spin, but we should do something better.
+ */
+kvmppc_bad_host_intr:
+       b       .
index 17cfae5..f4dd041 100644 (file)
 
 #if defined(CONFIG_PPC_BOOK3S_64)
 #define FUNC(name)             GLUE(.,name)
+#define GET_SHADOW_VCPU(reg)    addi   reg, r13, PACA_SVCPU
+
 #elif defined(CONFIG_PPC_BOOK3S_32)
 #define FUNC(name)             name
+#define GET_SHADOW_VCPU(reg)   lwz     reg, (THREAD + THREAD_KVM_SVCPU)(r2)
+
 #endif /* CONFIG_PPC_BOOK3S_XX */
 
 #define VCPU_LOAD_NVGPRS(vcpu) \
@@ -87,8 +91,14 @@ kvm_start_entry:
        VCPU_LOAD_NVGPRS(r4)
 
 kvm_start_lightweight:
+       /* Copy registers into shadow vcpu so we can access them in real mode */
+       GET_SHADOW_VCPU(r3)
+       bl      FUNC(kvmppc_copy_to_svcpu)
+       nop
+       REST_GPR(4, r1)
 
 #ifdef CONFIG_PPC_BOOK3S_64
+       /* Get the dcbz32 flag */
        PPC_LL  r3, VCPU_HFLAGS(r4)
        rldicl  r3, r3, 0, 63           /* r3 &= 1 */
        stb     r3, HSTATE_RESTORE_HID5(r13)
@@ -111,9 +121,6 @@ kvm_start_lightweight:
  *
  */
 
-.global kvmppc_handler_highmem
-kvmppc_handler_highmem:
-
        /*
         * Register usage at this point:
         *
@@ -125,18 +132,31 @@ kvmppc_handler_highmem:
         *
         */
 
-       /* R7 = vcpu */
-       PPC_LL  r7, GPR4(r1)
+       /* Transfer reg values from shadow vcpu back to vcpu struct */
+       /* On 64-bit, interrupts are still off at this point */
+       PPC_LL  r3, GPR4(r1)            /* vcpu pointer */
+       GET_SHADOW_VCPU(r4)
+       bl      FUNC(kvmppc_copy_from_svcpu)
+       nop
 
 #ifdef CONFIG_PPC_BOOK3S_64
+       /* Re-enable interrupts */
+       ld      r3, HSTATE_HOST_MSR(r13)
+       ori     r3, r3, MSR_EE
+       MTMSR_EERI(r3)
+
        /*
         * Reload kernel SPRG3 value.
         * No need to save guest value as usermode can't modify SPRG3.
         */
        ld      r3, PACA_SPRG3(r13)
        mtspr   SPRN_SPRG3, r3
+
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
+       /* R7 = vcpu */
+       PPC_LL  r7, GPR4(r1)
+
        PPC_STL r14, VCPU_GPR(R14)(r7)
        PPC_STL r15, VCPU_GPR(R15)(r7)
        PPC_STL r16, VCPU_GPR(R16)(r7)
@@ -161,7 +181,7 @@ kvmppc_handler_highmem:
 
        /* Restore r3 (kvm_run) and r4 (vcpu) */
        REST_2GPRS(3, r1)
-       bl      FUNC(kvmppc_handle_exit)
+       bl      FUNC(kvmppc_handle_exit_pr)
 
        /* If RESUME_GUEST, get back in the loop */
        cmpwi   r3, RESUME_GUEST
index da8b13c..5a1ab12 100644 (file)
@@ -28,7 +28,7 @@
 #include <asm/mmu_context.h>
 #include <asm/hw_irq.h>
 
-#include "trace.h"
+#include "trace_pr.h"
 
 #define PTE_SIZE       12
 
@@ -56,6 +56,14 @@ static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage)
                       HPTEG_HASH_BITS_VPTE_LONG);
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline u64 kvmppc_mmu_hash_vpte_64k(u64 vpage)
+{
+       return hash_64((vpage & 0xffffffff0ULL) >> 4,
+                      HPTEG_HASH_BITS_VPTE_64K);
+}
+#endif
+
 void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 {
        u64 index;
@@ -83,6 +91,15 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
        hlist_add_head_rcu(&pte->list_vpte_long,
                           &vcpu3s->hpte_hash_vpte_long[index]);
 
+#ifdef CONFIG_PPC_BOOK3S_64
+       /* Add to vPTE_64k list */
+       index = kvmppc_mmu_hash_vpte_64k(pte->pte.vpage);
+       hlist_add_head_rcu(&pte->list_vpte_64k,
+                          &vcpu3s->hpte_hash_vpte_64k[index]);
+#endif
+
+       vcpu3s->hpte_cache_count++;
+
        spin_unlock(&vcpu3s->mmu_lock);
 }
 
@@ -113,10 +130,13 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
        hlist_del_init_rcu(&pte->list_pte_long);
        hlist_del_init_rcu(&pte->list_vpte);
        hlist_del_init_rcu(&pte->list_vpte_long);
+#ifdef CONFIG_PPC_BOOK3S_64
+       hlist_del_init_rcu(&pte->list_vpte_64k);
+#endif
+       vcpu3s->hpte_cache_count--;
 
        spin_unlock(&vcpu3s->mmu_lock);
 
-       vcpu3s->hpte_cache_count--;
        call_rcu(&pte->rcu_head, free_pte_rcu);
 }
 
@@ -219,6 +239,29 @@ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
        rcu_read_unlock();
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+/* Flush with mask 0xffffffff0 */
+static void kvmppc_mmu_pte_vflush_64k(struct kvm_vcpu *vcpu, u64 guest_vp)
+{
+       struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
+       struct hlist_head *list;
+       struct hpte_cache *pte;
+       u64 vp_mask = 0xffffffff0ULL;
+
+       list = &vcpu3s->hpte_hash_vpte_64k[
+               kvmppc_mmu_hash_vpte_64k(guest_vp)];
+
+       rcu_read_lock();
+
+       /* Check the list for matching entries and invalidate */
+       hlist_for_each_entry_rcu(pte, list, list_vpte_64k)
+               if ((pte->pte.vpage & vp_mask) == guest_vp)
+                       invalidate_pte(vcpu, pte);
+
+       rcu_read_unlock();
+}
+#endif
+
 /* Flush with mask 0xffffff000 */
 static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
 {
@@ -249,6 +292,11 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
        case 0xfffffffffULL:
                kvmppc_mmu_pte_vflush_short(vcpu, guest_vp);
                break;
+#ifdef CONFIG_PPC_BOOK3S_64
+       case 0xffffffff0ULL:
+               kvmppc_mmu_pte_vflush_64k(vcpu, guest_vp);
+               break;
+#endif
        case 0xffffff000ULL:
                kvmppc_mmu_pte_vflush_long(vcpu, guest_vp);
                break;
@@ -285,15 +333,19 @@ struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
        struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
        struct hpte_cache *pte;
 
-       pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
-       vcpu3s->hpte_cache_count++;
-
        if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM)
                kvmppc_mmu_pte_flush_all(vcpu);
 
+       pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
+
        return pte;
 }
 
+void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte)
+{
+       kmem_cache_free(hpte_cache, pte);
+}
+
 void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu)
 {
        kvmppc_mmu_pte_flush(vcpu, 0, 0);
@@ -320,6 +372,10 @@ int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu)
                                  ARRAY_SIZE(vcpu3s->hpte_hash_vpte));
        kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_long,
                                  ARRAY_SIZE(vcpu3s->hpte_hash_vpte_long));
+#ifdef CONFIG_PPC_BOOK3S_64
+       kvmppc_mmu_hpte_init_hash(vcpu3s->hpte_hash_vpte_64k,
+                                 ARRAY_SIZE(vcpu3s->hpte_hash_vpte_64k));
+#endif
 
        spin_lock_init(&vcpu3s->mmu_lock);
 
index 27db1e6..df36cf2 100644 (file)
 #include <linux/sched.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
+#include <linux/module.h>
 
-#include "trace.h"
+#include "book3s.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace_pr.h"
 
 /* #define EXIT_DEBUG */
 /* #define DEBUG_EXT */
@@ -56,29 +60,25 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 #define HW_PAGE_SIZE PAGE_SIZE
 #endif
 
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
        struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
        memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
-       memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
-              sizeof(get_paca()->shadow_vcpu));
        svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
        svcpu_put(svcpu);
 #endif
        vcpu->cpu = smp_processor_id();
 #ifdef CONFIG_PPC_BOOK3S_32
-       current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
+       current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu;
 #endif
 }
 
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
        struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
        memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
-       memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
-              sizeof(get_paca()->shadow_vcpu));
        to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
        svcpu_put(svcpu);
 #endif
@@ -87,7 +87,61 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
        vcpu->cpu = -1;
 }
 
-int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+/* Copy data needed by real-mode code from vcpu to shadow vcpu */
+void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
+                         struct kvm_vcpu *vcpu)
+{
+       svcpu->gpr[0] = vcpu->arch.gpr[0];
+       svcpu->gpr[1] = vcpu->arch.gpr[1];
+       svcpu->gpr[2] = vcpu->arch.gpr[2];
+       svcpu->gpr[3] = vcpu->arch.gpr[3];
+       svcpu->gpr[4] = vcpu->arch.gpr[4];
+       svcpu->gpr[5] = vcpu->arch.gpr[5];
+       svcpu->gpr[6] = vcpu->arch.gpr[6];
+       svcpu->gpr[7] = vcpu->arch.gpr[7];
+       svcpu->gpr[8] = vcpu->arch.gpr[8];
+       svcpu->gpr[9] = vcpu->arch.gpr[9];
+       svcpu->gpr[10] = vcpu->arch.gpr[10];
+       svcpu->gpr[11] = vcpu->arch.gpr[11];
+       svcpu->gpr[12] = vcpu->arch.gpr[12];
+       svcpu->gpr[13] = vcpu->arch.gpr[13];
+       svcpu->cr  = vcpu->arch.cr;
+       svcpu->xer = vcpu->arch.xer;
+       svcpu->ctr = vcpu->arch.ctr;
+       svcpu->lr  = vcpu->arch.lr;
+       svcpu->pc  = vcpu->arch.pc;
+}
+
+/* Copy data touched by real-mode code from shadow vcpu back to vcpu */
+void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
+                           struct kvmppc_book3s_shadow_vcpu *svcpu)
+{
+       vcpu->arch.gpr[0] = svcpu->gpr[0];
+       vcpu->arch.gpr[1] = svcpu->gpr[1];
+       vcpu->arch.gpr[2] = svcpu->gpr[2];
+       vcpu->arch.gpr[3] = svcpu->gpr[3];
+       vcpu->arch.gpr[4] = svcpu->gpr[4];
+       vcpu->arch.gpr[5] = svcpu->gpr[5];
+       vcpu->arch.gpr[6] = svcpu->gpr[6];
+       vcpu->arch.gpr[7] = svcpu->gpr[7];
+       vcpu->arch.gpr[8] = svcpu->gpr[8];
+       vcpu->arch.gpr[9] = svcpu->gpr[9];
+       vcpu->arch.gpr[10] = svcpu->gpr[10];
+       vcpu->arch.gpr[11] = svcpu->gpr[11];
+       vcpu->arch.gpr[12] = svcpu->gpr[12];
+       vcpu->arch.gpr[13] = svcpu->gpr[13];
+       vcpu->arch.cr  = svcpu->cr;
+       vcpu->arch.xer = svcpu->xer;
+       vcpu->arch.ctr = svcpu->ctr;
+       vcpu->arch.lr  = svcpu->lr;
+       vcpu->arch.pc  = svcpu->pc;
+       vcpu->arch.shadow_srr1 = svcpu->shadow_srr1;
+       vcpu->arch.fault_dar   = svcpu->fault_dar;
+       vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
+       vcpu->arch.last_inst   = svcpu->last_inst;
+}
+
+static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
 {
        int r = 1; /* Indicate we want to get back into the guest */
 
@@ -100,44 +154,69 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 }
 
 /************* MMU Notifiers *************/
+static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start,
+                            unsigned long end)
+{
+       long i;
+       struct kvm_vcpu *vcpu;
+       struct kvm_memslots *slots;
+       struct kvm_memory_slot *memslot;
+
+       slots = kvm_memslots(kvm);
+       kvm_for_each_memslot(memslot, slots) {
+               unsigned long hva_start, hva_end;
+               gfn_t gfn, gfn_end;
+
+               hva_start = max(start, memslot->userspace_addr);
+               hva_end = min(end, memslot->userspace_addr +
+                                       (memslot->npages << PAGE_SHIFT));
+               if (hva_start >= hva_end)
+                       continue;
+               /*
+                * {gfn(page) | page intersects with [hva_start, hva_end)} =
+                * {gfn, gfn+1, ..., gfn_end-1}.
+                */
+               gfn = hva_to_gfn_memslot(hva_start, memslot);
+               gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
+               kvm_for_each_vcpu(i, vcpu, kvm)
+                       kvmppc_mmu_pte_pflush(vcpu, gfn << PAGE_SHIFT,
+                                             gfn_end << PAGE_SHIFT);
+       }
+}
 
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)
 {
        trace_kvm_unmap_hva(hva);
 
-       /*
-        * Flush all shadow tlb entries everywhere. This is slow, but
-        * we are 100% sure that we catch the to be unmapped page
-        */
-       kvm_flush_remote_tlbs(kvm);
+       do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
 
        return 0;
 }
 
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
+                                 unsigned long end)
 {
-       /* kvm_unmap_hva flushes everything anyways */
-       kvm_unmap_hva(kvm, start);
+       do_kvm_unmap_hva(kvm, start, end);
 
        return 0;
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)
 {
        /* XXX could be more clever ;) */
        return 0;
 }
 
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+static int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva)
 {
        /* XXX could be more clever ;) */
        return 0;
 }
 
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
        /* The page will get remapped properly on its next fault */
-       kvm_unmap_hva(kvm, hva);
+       do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
 }
 
 /*****************************************/
@@ -159,7 +238,7 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
        vcpu->arch.shadow_msr = smsr;
 }
 
-void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 {
        ulong old_msr = vcpu->arch.shared->msr;
 
@@ -219,7 +298,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
                kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 }
 
-void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
+void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
 {
        u32 host_pvr;
 
@@ -256,6 +335,23 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
        if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
                to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
 
+       /*
+        * If they're asking for POWER6 or later, set the flag
+        * indicating that we can do multiple large page sizes
+        * and 1TB segments.
+        * Also set the flag that indicates that tlbie has the large
+        * page bit in the RB operand instead of the instruction.
+        */
+       switch (PVR_VER(pvr)) {
+       case PVR_POWER6:
+       case PVR_POWER7:
+       case PVR_POWER7p:
+       case PVR_POWER8:
+               vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
+                       BOOK3S_HFLAG_NEW_TLBIE;
+               break;
+       }
+
 #ifdef CONFIG_PPC_BOOK3S_32
        /* 32 bit Book3S always has 32 byte dcbz */
        vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
@@ -334,6 +430,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                            ulong eaddr, int vec)
 {
        bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
+       bool iswrite = false;
        int r = RESUME_GUEST;
        int relocated;
        int page_found = 0;
@@ -344,10 +441,12 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        u64 vsid;
 
        relocated = data ? dr : ir;
+       if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE))
+               iswrite = true;
 
        /* Resolve real address if translation turned on */
        if (relocated) {
-               page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data);
+               page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite);
        } else {
                pte.may_execute = true;
                pte.may_read = true;
@@ -355,6 +454,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                pte.raddr = eaddr & KVM_PAM;
                pte.eaddr = eaddr;
                pte.vpage = eaddr >> 12;
+               pte.page_size = MMU_PAGE_64K;
        }
 
        switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
@@ -388,22 +488,18 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        if (page_found == -ENOENT) {
                /* Page not found in guest PTE entries */
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
                vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
-               vcpu->arch.shared->dsisr = svcpu->fault_dsisr;
+               vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr;
                vcpu->arch.shared->msr |=
-                       (svcpu->shadow_srr1 & 0x00000000f8000000ULL);
-               svcpu_put(svcpu);
+                       vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL;
                kvmppc_book3s_queue_irqprio(vcpu, vec);
        } else if (page_found == -EPERM) {
                /* Storage protection */
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
                vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
-               vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE;
+               vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
                vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
                vcpu->arch.shared->msr |=
-                       svcpu->shadow_srr1 & 0x00000000f8000000ULL;
-               svcpu_put(svcpu);
+                       vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL;
                kvmppc_book3s_queue_irqprio(vcpu, vec);
        } else if (page_found == -EINVAL) {
                /* Page not found in guest SLB */
@@ -411,12 +507,20 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
        } else if (!is_mmio &&
                   kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
+               if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
+                       /*
+                        * There is already a host HPTE there, presumably
+                        * a read-only one for a page the guest thinks
+                        * is writable, so get rid of it first.
+                        */
+                       kvmppc_mmu_unmap_page(vcpu, &pte);
+               }
                /* The guest's PTE is not mapped yet. Map on the host */
-               kvmppc_mmu_map_page(vcpu, &pte);
+               kvmppc_mmu_map_page(vcpu, &pte, iswrite);
                if (data)
                        vcpu->stat.sp_storage++;
                else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
-                       (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
+                        (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
                        kvmppc_patch_dcbz(vcpu, &pte);
        } else {
                /* MMIO */
@@ -619,13 +723,15 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
 
        if (lost_ext & MSR_FP)
                kvmppc_load_up_fpu();
+#ifdef CONFIG_ALTIVEC
        if (lost_ext & MSR_VEC)
                kvmppc_load_up_altivec();
+#endif
        current->thread.regs->msr |= lost_ext;
 }
 
-int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                       unsigned int exit_nr)
+int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                         unsigned int exit_nr)
 {
        int r = RESUME_HOST;
        int s;
@@ -643,25 +749,32 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
        switch (exit_nr) {
        case BOOK3S_INTERRUPT_INST_STORAGE:
        {
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-               ulong shadow_srr1 = svcpu->shadow_srr1;
+               ulong shadow_srr1 = vcpu->arch.shadow_srr1;
                vcpu->stat.pf_instruc++;
 
 #ifdef CONFIG_PPC_BOOK3S_32
                /* We set segments as unused segments when invalidating them. So
                 * treat the respective fault as segment fault. */
-               if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) {
-                       kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
-                       r = RESUME_GUEST;
+               {
+                       struct kvmppc_book3s_shadow_vcpu *svcpu;
+                       u32 sr;
+
+                       svcpu = svcpu_get(vcpu);
+                       sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT];
                        svcpu_put(svcpu);
-                       break;
+                       if (sr == SR_INVALID) {
+                               kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+                               r = RESUME_GUEST;
+                               break;
+                       }
                }
 #endif
-               svcpu_put(svcpu);
 
                /* only care about PTEG not found errors, but leave NX alone */
                if (shadow_srr1 & 0x40000000) {
+                       int idx = srcu_read_lock(&vcpu->kvm->srcu);
                        r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
+                       srcu_read_unlock(&vcpu->kvm->srcu, idx);
                        vcpu->stat.sp_instruc++;
                } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
                          (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
@@ -682,25 +795,36 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
        case BOOK3S_INTERRUPT_DATA_STORAGE:
        {
                ulong dar = kvmppc_get_fault_dar(vcpu);
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-               u32 fault_dsisr = svcpu->fault_dsisr;
+               u32 fault_dsisr = vcpu->arch.fault_dsisr;
                vcpu->stat.pf_storage++;
 
 #ifdef CONFIG_PPC_BOOK3S_32
                /* We set segments as unused segments when invalidating them. So
                 * treat the respective fault as segment fault. */
-               if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) {
-                       kvmppc_mmu_map_segment(vcpu, dar);
-                       r = RESUME_GUEST;
+               {
+                       struct kvmppc_book3s_shadow_vcpu *svcpu;
+                       u32 sr;
+
+                       svcpu = svcpu_get(vcpu);
+                       sr = svcpu->sr[dar >> SID_SHIFT];
                        svcpu_put(svcpu);
-                       break;
+                       if (sr == SR_INVALID) {
+                               kvmppc_mmu_map_segment(vcpu, dar);
+                               r = RESUME_GUEST;
+                               break;
+                       }
                }
 #endif
-               svcpu_put(svcpu);
 
-               /* The only case we need to handle is missing shadow PTEs */
-               if (fault_dsisr & DSISR_NOHPTE) {
+               /*
+                * We need to handle missing shadow PTEs, and
+                * protection faults due to us mapping a page read-only
+                * when the guest thinks it is writable.
+                */
+               if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) {
+                       int idx = srcu_read_lock(&vcpu->kvm->srcu);
                        r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
+                       srcu_read_unlock(&vcpu->kvm->srcu, idx);
                } else {
                        vcpu->arch.shared->dar = dar;
                        vcpu->arch.shared->dsisr = fault_dsisr;
@@ -743,13 +867,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
        case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
        {
                enum emulation_result er;
-               struct kvmppc_book3s_shadow_vcpu *svcpu;
                ulong flags;
 
 program_interrupt:
-               svcpu = svcpu_get(vcpu);
-               flags = svcpu->shadow_srr1 & 0x1f0000ull;
-               svcpu_put(svcpu);
+               flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
 
                if (vcpu->arch.shared->msr & MSR_PR) {
 #ifdef EXIT_DEBUG
@@ -798,7 +919,7 @@ program_interrupt:
                        ulong cmd = kvmppc_get_gpr(vcpu, 3);
                        int i;
 
-#ifdef CONFIG_KVM_BOOK3S_64_PR
+#ifdef CONFIG_PPC_BOOK3S_64
                        if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
                                r = RESUME_GUEST;
                                break;
@@ -881,9 +1002,7 @@ program_interrupt:
                break;
        default:
        {
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-               ulong shadow_srr1 = svcpu->shadow_srr1;
-               svcpu_put(svcpu);
+               ulong shadow_srr1 = vcpu->arch.shadow_srr1;
                /* Ugh - bork here! What did we get? */
                printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
                        exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
@@ -920,8 +1039,8 @@ program_interrupt:
        return r;
 }
 
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-                                  struct kvm_sregs *sregs)
+static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu,
+                                           struct kvm_sregs *sregs)
 {
        struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
        int i;
@@ -947,13 +1066,13 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
        return 0;
 }
 
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-                                  struct kvm_sregs *sregs)
+static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu,
+                                           struct kvm_sregs *sregs)
 {
        struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
        int i;
 
-       kvmppc_set_pvr(vcpu, sregs->pvr);
+       kvmppc_set_pvr_pr(vcpu, sregs->pvr);
 
        vcpu3s->sdr1 = sregs->u.s.sdr1;
        if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
@@ -983,7 +1102,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        return 0;
 }
 
-int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
+static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
+                                union kvmppc_one_reg *val)
 {
        int r = 0;
 
@@ -1012,7 +1132,8 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
        return r;
 }
 
-int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
+static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
+                                union kvmppc_one_reg *val)
 {
        int r = 0;
 
@@ -1042,28 +1163,30 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
        return r;
 }
 
-int kvmppc_core_check_processor_compat(void)
-{
-       return 0;
-}
-
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
+                                                  unsigned int id)
 {
        struct kvmppc_vcpu_book3s *vcpu_book3s;
        struct kvm_vcpu *vcpu;
        int err = -ENOMEM;
        unsigned long p;
 
-       vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
-       if (!vcpu_book3s)
+       vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+       if (!vcpu)
                goto out;
 
-       vcpu_book3s->shadow_vcpu =
-               kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
-       if (!vcpu_book3s->shadow_vcpu)
+       vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
+       if (!vcpu_book3s)
                goto free_vcpu;
+       vcpu->arch.book3s = vcpu_book3s;
+
+#ifdef CONFIG_KVM_BOOK3S_32
+       vcpu->arch.shadow_vcpu =
+               kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL);
+       if (!vcpu->arch.shadow_vcpu)
+               goto free_vcpu3s;
+#endif
 
-       vcpu = &vcpu_book3s->vcpu;
        err = kvm_vcpu_init(vcpu, kvm, id);
        if (err)
                goto free_shadow_vcpu;
@@ -1076,13 +1199,19 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
        vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
 
 #ifdef CONFIG_PPC_BOOK3S_64
-       /* default to book3s_64 (970fx) */
+       /*
+        * Default to the same as the host if we're on sufficiently
+        * recent machine that we have 1TB segments;
+        * otherwise default to PPC970FX.
+        */
        vcpu->arch.pvr = 0x3C0301;
+       if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+               vcpu->arch.pvr = mfspr(SPRN_PVR);
 #else
        /* default to book3s_32 (750) */
        vcpu->arch.pvr = 0x84202;
 #endif
-       kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+       kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
        vcpu->arch.slb_nr = 64;
 
        vcpu->arch.shadow_msr = MSR_USER64;
@@ -1096,24 +1225,31 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 uninit_vcpu:
        kvm_vcpu_uninit(vcpu);
 free_shadow_vcpu:
-       kfree(vcpu_book3s->shadow_vcpu);
-free_vcpu:
+#ifdef CONFIG_KVM_BOOK3S_32
+       kfree(vcpu->arch.shadow_vcpu);
+free_vcpu3s:
+#endif
        vfree(vcpu_book3s);
+free_vcpu:
+       kmem_cache_free(kvm_vcpu_cache, vcpu);
 out:
        return ERR_PTR(err);
 }
 
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
 {
        struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 
        free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
        kvm_vcpu_uninit(vcpu);
-       kfree(vcpu_book3s->shadow_vcpu);
+#ifdef CONFIG_KVM_BOOK3S_32
+       kfree(vcpu->arch.shadow_vcpu);
+#endif
        vfree(vcpu_book3s);
+       kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
-int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
        int ret;
        double fpr[32][TS_FPRWIDTH];
@@ -1222,8 +1358,8 @@ out:
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
-                                     struct kvm_dirty_log *log)
+static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
+                                        struct kvm_dirty_log *log)
 {
        struct kvm_memory_slot *memslot;
        struct kvm_vcpu *vcpu;
@@ -1258,67 +1394,100 @@ out:
        return r;
 }
 
-#ifdef CONFIG_PPC64
-int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
+static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
+                                        struct kvm_memory_slot *memslot)
 {
-       info->flags = KVM_PPC_1T_SEGMENTS;
-
-       /* SLB is always 64 entries */
-       info->slb_size = 64;
-
-       /* Standard 4k base page size segment */
-       info->sps[0].page_shift = 12;
-       info->sps[0].slb_enc = 0;
-       info->sps[0].enc[0].page_shift = 12;
-       info->sps[0].enc[0].pte_enc = 0;
-
-       /* Standard 16M large page size segment */
-       info->sps[1].page_shift = 24;
-       info->sps[1].slb_enc = SLB_VSID_L;
-       info->sps[1].enc[0].page_shift = 24;
-       info->sps[1].enc[0].pte_enc = 0;
+       return;
+}
 
+static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
+                                       struct kvm_memory_slot *memslot,
+                                       struct kvm_userspace_memory_region *mem)
+{
        return 0;
 }
-#endif /* CONFIG_PPC64 */
 
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
-                             struct kvm_memory_slot *dont)
+static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
+                               struct kvm_userspace_memory_region *mem,
+                               const struct kvm_memory_slot *old)
 {
+       return;
 }
 
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
-                              unsigned long npages)
+static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free,
+                                       struct kvm_memory_slot *dont)
 {
-       return 0;
+       return;
 }
 
-int kvmppc_core_prepare_memory_region(struct kvm *kvm,
-                                     struct kvm_memory_slot *memslot,
-                                     struct kvm_userspace_memory_region *mem)
+static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot,
+                                        unsigned long npages)
 {
        return 0;
 }
 
-void kvmppc_core_commit_memory_region(struct kvm *kvm,
-                               struct kvm_userspace_memory_region *mem,
-                               const struct kvm_memory_slot *old)
+
+#ifdef CONFIG_PPC64
+static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
+                                        struct kvm_ppc_smmu_info *info)
 {
-}
+       long int i;
+       struct kvm_vcpu *vcpu;
+
+       info->flags = 0;
+
+       /* SLB is always 64 entries */
+       info->slb_size = 64;
+
+       /* Standard 4k base page size segment */
+       info->sps[0].page_shift = 12;
+       info->sps[0].slb_enc = 0;
+       info->sps[0].enc[0].page_shift = 12;
+       info->sps[0].enc[0].pte_enc = 0;
+
+       /*
+        * 64k large page size.
+        * We only want to put this in if the CPUs we're emulating
+        * support it, but unfortunately we don't have a vcpu easily
+        * to hand here to test.  Just pick the first vcpu, and if
+        * that doesn't exist yet, report the minimum capability,
+        * i.e., no 64k pages.
+        * 1T segment support goes along with 64k pages.
+        */
+       i = 1;
+       vcpu = kvm_get_vcpu(kvm, 0);
+       if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
+               info->flags = KVM_PPC_1T_SEGMENTS;
+               info->sps[i].page_shift = 16;
+               info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01;
+               info->sps[i].enc[0].page_shift = 16;
+               info->sps[i].enc[0].pte_enc = 1;
+               ++i;
+       }
+
+       /* Standard 16M large page size segment */
+       info->sps[i].page_shift = 24;
+       info->sps[i].slb_enc = SLB_VSID_L;
+       info->sps[i].enc[0].page_shift = 24;
+       info->sps[i].enc[0].pte_enc = 0;
 
-void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+       return 0;
+}
+#else
+static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
+                                        struct kvm_ppc_smmu_info *info)
 {
+       /* We should not get called */
+       BUG();
 }
+#endif /* CONFIG_PPC64 */
 
 static unsigned int kvm_global_user_count = 0;
 static DEFINE_SPINLOCK(kvm_global_user_count_lock);
 
-int kvmppc_core_init_vm(struct kvm *kvm)
+static int kvmppc_core_init_vm_pr(struct kvm *kvm)
 {
-#ifdef CONFIG_PPC64
-       INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
-       INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
-#endif
+       mutex_init(&kvm->arch.hpt_mutex);
 
        if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
                spin_lock(&kvm_global_user_count_lock);
@@ -1329,7 +1498,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
        return 0;
 }
 
-void kvmppc_core_destroy_vm(struct kvm *kvm)
+static void kvmppc_core_destroy_vm_pr(struct kvm *kvm)
 {
 #ifdef CONFIG_PPC64
        WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
@@ -1344,26 +1513,81 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
        }
 }
 
-static int kvmppc_book3s_init(void)
+static int kvmppc_core_check_processor_compat_pr(void)
 {
-       int r;
+       /* we are always compatible */
+       return 0;
+}
 
-       r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0,
-                    THIS_MODULE);
+static long kvm_arch_vm_ioctl_pr(struct file *filp,
+                                unsigned int ioctl, unsigned long arg)
+{
+       return -ENOTTY;
+}
 
-       if (r)
+static struct kvmppc_ops kvm_ops_pr = {
+       .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
+       .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
+       .get_one_reg = kvmppc_get_one_reg_pr,
+       .set_one_reg = kvmppc_set_one_reg_pr,
+       .vcpu_load   = kvmppc_core_vcpu_load_pr,
+       .vcpu_put    = kvmppc_core_vcpu_put_pr,
+       .set_msr     = kvmppc_set_msr_pr,
+       .vcpu_run    = kvmppc_vcpu_run_pr,
+       .vcpu_create = kvmppc_core_vcpu_create_pr,
+       .vcpu_free   = kvmppc_core_vcpu_free_pr,
+       .check_requests = kvmppc_core_check_requests_pr,
+       .get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr,
+       .flush_memslot = kvmppc_core_flush_memslot_pr,
+       .prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
+       .commit_memory_region = kvmppc_core_commit_memory_region_pr,
+       .unmap_hva = kvm_unmap_hva_pr,
+       .unmap_hva_range = kvm_unmap_hva_range_pr,
+       .age_hva  = kvm_age_hva_pr,
+       .test_age_hva = kvm_test_age_hva_pr,
+       .set_spte_hva = kvm_set_spte_hva_pr,
+       .mmu_destroy  = kvmppc_mmu_destroy_pr,
+       .free_memslot = kvmppc_core_free_memslot_pr,
+       .create_memslot = kvmppc_core_create_memslot_pr,
+       .init_vm = kvmppc_core_init_vm_pr,
+       .destroy_vm = kvmppc_core_destroy_vm_pr,
+       .get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr,
+       .emulate_op = kvmppc_core_emulate_op_pr,
+       .emulate_mtspr = kvmppc_core_emulate_mtspr_pr,
+       .emulate_mfspr = kvmppc_core_emulate_mfspr_pr,
+       .fast_vcpu_kick = kvm_vcpu_kick,
+       .arch_vm_ioctl  = kvm_arch_vm_ioctl_pr,
+};
+
+
+int kvmppc_book3s_init_pr(void)
+{
+       int r;
+
+       r = kvmppc_core_check_processor_compat_pr();
+       if (r < 0)
                return r;
 
-       r = kvmppc_mmu_hpte_sysinit();
+       kvm_ops_pr.owner = THIS_MODULE;
+       kvmppc_pr_ops = &kvm_ops_pr;
 
+       r = kvmppc_mmu_hpte_sysinit();
        return r;
 }
 
-static void kvmppc_book3s_exit(void)
+void kvmppc_book3s_exit_pr(void)
 {
+       kvmppc_pr_ops = NULL;
        kvmppc_mmu_hpte_sysexit();
-       kvm_exit();
 }
 
-module_init(kvmppc_book3s_init);
-module_exit(kvmppc_book3s_exit);
+/*
+ * We only support separate modules for book3s 64
+ */
+#ifdef CONFIG_PPC_BOOK3S_64
+
+module_init(kvmppc_book3s_init_pr);
+module_exit(kvmppc_book3s_exit_pr);
+
+MODULE_LICENSE("GPL");
+#endif
index da0e0bc..5efa97b 100644 (file)
@@ -21,6 +21,8 @@
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 
+#define HPTE_SIZE      16              /* bytes per HPT entry */
+
 static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index)
 {
        struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
@@ -40,32 +42,41 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
        long pte_index = kvmppc_get_gpr(vcpu, 5);
        unsigned long pteg[2 * 8];
        unsigned long pteg_addr, i, *hpte;
+       long int ret;
 
+       i = pte_index & 7;
        pte_index &= ~7UL;
        pteg_addr = get_pteg_addr(vcpu, pte_index);
 
+       mutex_lock(&vcpu->kvm->arch.hpt_mutex);
        copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
        hpte = pteg;
 
+       ret = H_PTEG_FULL;
        if (likely((flags & H_EXACT) == 0)) {
-               pte_index &= ~7UL;
                for (i = 0; ; ++i) {
                        if (i == 8)
-                               return H_PTEG_FULL;
+                               goto done;
                        if ((*hpte & HPTE_V_VALID) == 0)
                                break;
                        hpte += 2;
                }
        } else {
-               i = kvmppc_get_gpr(vcpu, 5) & 7UL;
                hpte += i * 2;
+               if (*hpte & HPTE_V_VALID)
+                       goto done;
        }
 
        hpte[0] = kvmppc_get_gpr(vcpu, 6);
        hpte[1] = kvmppc_get_gpr(vcpu, 7);
-       copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg));
-       kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+       pteg_addr += i * HPTE_SIZE;
+       copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE);
        kvmppc_set_gpr(vcpu, 4, pte_index | i);
+       ret = H_SUCCESS;
+
+ done:
+       mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+       kvmppc_set_gpr(vcpu, 3, ret);
 
        return EMULATE_DONE;
 }
@@ -77,26 +88,31 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
        unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
        unsigned long v = 0, pteg, rb;
        unsigned long pte[2];
+       long int ret;
 
        pteg = get_pteg_addr(vcpu, pte_index);
+       mutex_lock(&vcpu->kvm->arch.hpt_mutex);
        copy_from_user(pte, (void __user *)pteg, sizeof(pte));
 
+       ret = H_NOT_FOUND;
        if ((pte[0] & HPTE_V_VALID) == 0 ||
            ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) ||
-           ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) {
-               kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
-               return EMULATE_DONE;
-       }
+           ((flags & H_ANDCOND) && (pte[0] & avpn) != 0))
+               goto done;
 
        copy_to_user((void __user *)pteg, &v, sizeof(v));
 
        rb = compute_tlbie_rb(pte[0], pte[1], pte_index);
        vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
 
-       kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+       ret = H_SUCCESS;
        kvmppc_set_gpr(vcpu, 4, pte[0]);
        kvmppc_set_gpr(vcpu, 5, pte[1]);
 
+ done:
+       mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+       kvmppc_set_gpr(vcpu, 3, ret);
+
        return EMULATE_DONE;
 }
 
@@ -124,6 +140,7 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
        int paramnr = 4;
        int ret = H_SUCCESS;
 
+       mutex_lock(&vcpu->kvm->arch.hpt_mutex);
        for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
                unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i));
                unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1);
@@ -172,6 +189,7 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
                }
                kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh);
        }
+       mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
        kvmppc_set_gpr(vcpu, 3, ret);
 
        return EMULATE_DONE;
@@ -184,15 +202,16 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
        unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
        unsigned long rb, pteg, r, v;
        unsigned long pte[2];
+       long int ret;
 
        pteg = get_pteg_addr(vcpu, pte_index);
+       mutex_lock(&vcpu->kvm->arch.hpt_mutex);
        copy_from_user(pte, (void __user *)pteg, sizeof(pte));
 
+       ret = H_NOT_FOUND;
        if ((pte[0] & HPTE_V_VALID) == 0 ||
-           ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) {
-               kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
-               return EMULATE_DONE;
-       }
+           ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn))
+               goto done;
 
        v = pte[0];
        r = pte[1];
@@ -207,8 +226,11 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
        rb = compute_tlbie_rb(v, r, pte_index);
        vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
        copy_to_user((void __user *)pteg, pte, sizeof(pte));
+       ret = H_SUCCESS;
 
-       kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+ done:
+       mutex_unlock(&vcpu->kvm->arch.hpt_mutex);
+       kvmppc_set_gpr(vcpu, 3, ret);
 
        return EMULATE_DONE;
 }
index 8f7633e..a38c4c9 100644 (file)
 
 #define FUNC(name)             GLUE(.,name)
 
-       .globl  kvmppc_skip_interrupt
-kvmppc_skip_interrupt:
-       /*
-        * Here all GPRs are unchanged from when the interrupt happened
-        * except for r13, which is saved in SPRG_SCRATCH0.
-        */
-       mfspr   r13, SPRN_SRR0
-       addi    r13, r13, 4
-       mtspr   SPRN_SRR0, r13
-       GET_SCRATCH0(r13)
-       rfid
-       b       .
-
-       .globl  kvmppc_skip_Hinterrupt
-kvmppc_skip_Hinterrupt:
-       /*
-        * Here all GPRs are unchanged from when the interrupt happened
-        * except for r13, which is saved in SPRG_SCRATCH0.
-        */
-       mfspr   r13, SPRN_HSRR0
-       addi    r13, r13, 4
-       mtspr   SPRN_HSRR0, r13
-       GET_SCRATCH0(r13)
-       hrfid
-       b       .
-
 #elif defined(CONFIG_PPC_BOOK3S_32)
 
 #define FUNC(name)             name
@@ -179,11 +153,15 @@ _GLOBAL(kvmppc_entry_trampoline)
 
        li      r6, MSR_IR | MSR_DR
        andc    r6, r5, r6      /* Clear DR and IR in MSR value */
+#ifdef CONFIG_PPC_BOOK3S_32
        /*
         * Set EE in HOST_MSR so that it's enabled when we get into our
-        * C exit handler function
+        * C exit handler function.  On 64-bit we delay enabling
+        * interrupts until we have finished transferring stuff
+        * to or from the PACA.
         */
        ori     r5, r5, MSR_EE
+#endif
        mtsrr0  r7
        mtsrr1  r6
        RFI
index 3219ba8..cf95cde 100644 (file)
@@ -260,6 +260,7 @@ fail:
         */
        return rc;
 }
+EXPORT_SYMBOL_GPL(kvmppc_rtas_hcall);
 
 void kvmppc_rtas_tokens_free(struct kvm *kvm)
 {
index 1abe478..bc50c97 100644 (file)
@@ -161,8 +161,8 @@ kvmppc_handler_trampoline_enter_end:
 .global kvmppc_handler_trampoline_exit
 kvmppc_handler_trampoline_exit:
 
-.global kvmppc_interrupt
-kvmppc_interrupt:
+.global kvmppc_interrupt_pr
+kvmppc_interrupt_pr:
 
        /* Register usage at this point:
         *
index a3a5cb8..02a17dc 100644 (file)
@@ -818,7 +818,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
        }
 
        /* Check for real mode returning too hard */
-       if (xics->real_mode)
+       if (xics->real_mode && is_kvmppc_hv_enabled(vcpu->kvm))
                return kvmppc_xics_rm_complete(vcpu, req);
 
        switch (req) {
@@ -840,6 +840,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
 
        return rc;
 }
+EXPORT_SYMBOL_GPL(kvmppc_xics_hcall);
 
 
 /* -- Initialisation code etc. -- */
@@ -1250,13 +1251,13 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
 
        xics_debugfs_init(xics);
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        if (cpu_has_feature(CPU_FTR_ARCH_206)) {
                /* Enable real mode support */
                xics->real_mode = ENABLE_REALMODE;
                xics->real_mode_dbg = DEBUG_REALMODE;
        }
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
        return 0;
 }
index 17722d8..15d0149 100644 (file)
@@ -40,7 +40,9 @@
 
 #include "timing.h"
 #include "booke.h"
-#include "trace.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace_booke.h"
 
 unsigned long kvmppc_booke_handlers;
 
@@ -133,6 +135,29 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 #endif
 }
 
+static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
+{
+       /* Synchronize guest's desire to get debug interrupts into shadow MSR */
+#ifndef CONFIG_KVM_BOOKE_HV
+       vcpu->arch.shadow_msr &= ~MSR_DE;
+       vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE;
+#endif
+
+       /* Force enable debug interrupts when user space wants to debug */
+       if (vcpu->guest_debug) {
+#ifdef CONFIG_KVM_BOOKE_HV
+               /*
+                * Since there is no shadow MSR, sync MSR_DE into the guest
+                * visible MSR.
+                */
+               vcpu->arch.shared->msr |= MSR_DE;
+#else
+               vcpu->arch.shadow_msr |= MSR_DE;
+               vcpu->arch.shared->msr &= ~MSR_DE;
+#endif
+       }
+}
+
 /*
  * Helper function for "full" MSR writes.  No need to call this if only
  * EE/CE/ME/DE/RI are changing.
@@ -150,6 +175,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
        kvmppc_mmu_msr_notify(vcpu, old_msr);
        kvmppc_vcpu_sync_spe(vcpu);
        kvmppc_vcpu_sync_fpu(vcpu);
+       kvmppc_vcpu_sync_debug(vcpu);
 }
 
 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
@@ -655,6 +681,7 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
        int ret, s;
+       struct thread_struct thread;
 #ifdef CONFIG_PPC_FPU
        unsigned int fpscr;
        int fpexc_mode;
@@ -696,6 +723,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
        kvmppc_load_guest_fp(vcpu);
 #endif
 
+       /* Switch to guest debug context */
+       thread.debug = vcpu->arch.shadow_dbg_reg;
+       switch_booke_debug_regs(&thread);
+       thread.debug = current->thread.debug;
+       current->thread.debug = vcpu->arch.shadow_dbg_reg;
+
        kvmppc_fix_ee_before_entry();
 
        ret = __kvmppc_vcpu_run(kvm_run, vcpu);
@@ -703,6 +736,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
        /* No need for kvm_guest_exit. It's done in handle_exit.
           We also get here with interrupts enabled. */
 
+       /* Switch back to user space debug context */
+       switch_booke_debug_regs(&thread);
+       current->thread.debug = thread.debug;
+
 #ifdef CONFIG_PPC_FPU
        kvmppc_save_guest_fp(vcpu);
 
@@ -758,6 +795,30 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
        }
 }
 
+static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+       struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+       u32 dbsr = vcpu->arch.dbsr;
+
+       run->debug.arch.status = 0;
+       run->debug.arch.address = vcpu->arch.pc;
+
+       if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
+               run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
+       } else {
+               if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
+                       run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
+               else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
+                       run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
+               if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
+                       run->debug.arch.address = dbg_reg->dac1;
+               else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
+                       run->debug.arch.address = dbg_reg->dac2;
+       }
+
+       return RESUME_HOST;
+}
+
 static void kvmppc_fill_pt_regs(struct pt_regs *regs)
 {
        ulong r1, ip, msr, lr;
@@ -818,6 +879,11 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
        case BOOKE_INTERRUPT_CRITICAL:
                unknown_exception(&regs);
                break;
+       case BOOKE_INTERRUPT_DEBUG:
+               /* Save DBSR before preemption is enabled */
+               vcpu->arch.dbsr = mfspr(SPRN_DBSR);
+               kvmppc_clear_dbsr();
+               break;
        }
 }
 
@@ -1135,18 +1201,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
        }
 
        case BOOKE_INTERRUPT_DEBUG: {
-               u32 dbsr;
-
-               vcpu->arch.pc = mfspr(SPRN_CSRR0);
-
-               /* clear IAC events in DBSR register */
-               dbsr = mfspr(SPRN_DBSR);
-               dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
-               mtspr(SPRN_DBSR, dbsr);
-
-               run->exit_reason = KVM_EXIT_DEBUG;
+               r = kvmppc_handle_debug(run, vcpu);
+               if (r == RESUME_HOST)
+                       run->exit_reason = KVM_EXIT_DEBUG;
                kvmppc_account_exit(vcpu, DEBUG_EXITS);
-               r = RESUME_HOST;
                break;
        }
 
@@ -1197,7 +1255,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
        kvmppc_set_msr(vcpu, 0);
 
 #ifndef CONFIG_KVM_BOOKE_HV
-       vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
+       vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
        vcpu->arch.shadow_pid = 1;
        vcpu->arch.shared->msr = 0;
 #endif
@@ -1359,7 +1417,7 @@ static int set_sregs_arch206(struct kvm_vcpu *vcpu,
        return 0;
 }
 
-void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
        sregs->u.e.features |= KVM_SREGS_E_IVOR;
 
@@ -1379,6 +1437,7 @@ void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
        sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
        sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
        sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+       return 0;
 }
 
 int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
@@ -1413,8 +1472,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
        get_sregs_base(vcpu, sregs);
        get_sregs_arch206(vcpu, sregs);
-       kvmppc_core_get_sregs(vcpu, sregs);
-       return 0;
+       return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
 }
 
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
@@ -1433,7 +1491,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        if (ret < 0)
                return ret;
 
-       return kvmppc_core_set_sregs(vcpu, sregs);
+       return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
 }
 
 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
@@ -1441,7 +1499,6 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
        int r = 0;
        union kvmppc_one_reg val;
        int size;
-       long int i;
 
        size = one_reg_size(reg->id);
        if (size > sizeof(val))
@@ -1449,16 +1506,24 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 
        switch (reg->id) {
        case KVM_REG_PPC_IAC1:
+               val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1);
+               break;
        case KVM_REG_PPC_IAC2:
+               val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2);
+               break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
        case KVM_REG_PPC_IAC3:
+               val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3);
+               break;
        case KVM_REG_PPC_IAC4:
-               i = reg->id - KVM_REG_PPC_IAC1;
-               val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]);
+               val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4);
                break;
+#endif
        case KVM_REG_PPC_DAC1:
+               val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1);
+               break;
        case KVM_REG_PPC_DAC2:
-               i = reg->id - KVM_REG_PPC_DAC1;
-               val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]);
+               val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2);
                break;
        case KVM_REG_PPC_EPR: {
                u32 epr = get_guest_epr(vcpu);
@@ -1477,10 +1542,13 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
                val = get_reg_val(reg->id, vcpu->arch.tsr);
                break;
        case KVM_REG_PPC_DEBUG_INST:
-               val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV);
+               val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG);
+               break;
+       case KVM_REG_PPC_VRSAVE:
+               val = get_reg_val(reg->id, vcpu->arch.vrsave);
                break;
        default:
-               r = kvmppc_get_one_reg(vcpu, reg->id, &val);
+               r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
                break;
        }
 
@@ -1498,7 +1566,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
        int r = 0;
        union kvmppc_one_reg val;
        int size;
-       long int i;
 
        size = one_reg_size(reg->id);
        if (size > sizeof(val))
@@ -1509,16 +1576,24 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 
        switch (reg->id) {
        case KVM_REG_PPC_IAC1:
+               vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val);
+               break;
        case KVM_REG_PPC_IAC2:
+               vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val);
+               break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
        case KVM_REG_PPC_IAC3:
+               vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val);
+               break;
        case KVM_REG_PPC_IAC4:
-               i = reg->id - KVM_REG_PPC_IAC1;
-               vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val);
+               vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val);
                break;
+#endif
        case KVM_REG_PPC_DAC1:
+               vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val);
+               break;
        case KVM_REG_PPC_DAC2:
-               i = reg->id - KVM_REG_PPC_DAC1;
-               vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val);
+               vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val);
                break;
        case KVM_REG_PPC_EPR: {
                u32 new_epr = set_reg_val(reg->id, val);
@@ -1552,20 +1627,17 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
                kvmppc_set_tcr(vcpu, tcr);
                break;
        }
+       case KVM_REG_PPC_VRSAVE:
+               vcpu->arch.vrsave = set_reg_val(reg->id, val);
+               break;
        default:
-               r = kvmppc_set_one_reg(vcpu, reg->id, &val);
+               r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
                break;
        }
 
        return r;
 }
 
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-                                        struct kvm_guest_debug *dbg)
-{
-       return -EINVAL;
-}
-
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
        return -ENOTSUPP;
@@ -1590,12 +1662,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
        return -ENOTSUPP;
 }
 
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
                              struct kvm_memory_slot *dont)
 {
 }
 
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
                               unsigned long npages)
 {
        return 0;
@@ -1671,6 +1743,157 @@ void kvmppc_decrementer_func(unsigned long data)
        kvmppc_set_tsr_bits(vcpu, TSR_DIS);
 }
 
+static int kvmppc_booke_add_breakpoint(struct debug_reg *dbg_reg,
+                                      uint64_t addr, int index)
+{
+       switch (index) {
+       case 0:
+               dbg_reg->dbcr0 |= DBCR0_IAC1;
+               dbg_reg->iac1 = addr;
+               break;
+       case 1:
+               dbg_reg->dbcr0 |= DBCR0_IAC2;
+               dbg_reg->iac2 = addr;
+               break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+       case 2:
+               dbg_reg->dbcr0 |= DBCR0_IAC3;
+               dbg_reg->iac3 = addr;
+               break;
+       case 3:
+               dbg_reg->dbcr0 |= DBCR0_IAC4;
+               dbg_reg->iac4 = addr;
+               break;
+#endif
+       default:
+               return -EINVAL;
+       }
+
+       dbg_reg->dbcr0 |= DBCR0_IDM;
+       return 0;
+}
+
+static int kvmppc_booke_add_watchpoint(struct debug_reg *dbg_reg, uint64_t addr,
+                                      int type, int index)
+{
+       switch (index) {
+       case 0:
+               if (type & KVMPPC_DEBUG_WATCH_READ)
+                       dbg_reg->dbcr0 |= DBCR0_DAC1R;
+               if (type & KVMPPC_DEBUG_WATCH_WRITE)
+                       dbg_reg->dbcr0 |= DBCR0_DAC1W;
+               dbg_reg->dac1 = addr;
+               break;
+       case 1:
+               if (type & KVMPPC_DEBUG_WATCH_READ)
+                       dbg_reg->dbcr0 |= DBCR0_DAC2R;
+               if (type & KVMPPC_DEBUG_WATCH_WRITE)
+                       dbg_reg->dbcr0 |= DBCR0_DAC2W;
+               dbg_reg->dac2 = addr;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       dbg_reg->dbcr0 |= DBCR0_IDM;
+       return 0;
+}
+void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set)
+{
+       /* XXX: Add similar MSR protection for BookE-PR */
+#ifdef CONFIG_KVM_BOOKE_HV
+       BUG_ON(prot_bitmap & ~(MSRP_UCLEP | MSRP_DEP | MSRP_PMMP));
+       if (set) {
+               if (prot_bitmap & MSR_UCLE)
+                       vcpu->arch.shadow_msrp |= MSRP_UCLEP;
+               if (prot_bitmap & MSR_DE)
+                       vcpu->arch.shadow_msrp |= MSRP_DEP;
+               if (prot_bitmap & MSR_PMM)
+                       vcpu->arch.shadow_msrp |= MSRP_PMMP;
+       } else {
+               if (prot_bitmap & MSR_UCLE)
+                       vcpu->arch.shadow_msrp &= ~MSRP_UCLEP;
+               if (prot_bitmap & MSR_DE)
+                       vcpu->arch.shadow_msrp &= ~MSRP_DEP;
+               if (prot_bitmap & MSR_PMM)
+                       vcpu->arch.shadow_msrp &= ~MSRP_PMMP;
+       }
+#endif
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+                                        struct kvm_guest_debug *dbg)
+{
+       struct debug_reg *dbg_reg;
+       int n, b = 0, w = 0;
+
+       if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
+               vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+               vcpu->guest_debug = 0;
+               kvm_guest_protect_msr(vcpu, MSR_DE, false);
+               return 0;
+       }
+
+       kvm_guest_protect_msr(vcpu, MSR_DE, true);
+       vcpu->guest_debug = dbg->control;
+       vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+       /* Set DBCR0_EDM in guest visible DBCR0 register. */
+       vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
+
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+               vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+
+       /* Code below handles only HW breakpoints */
+       dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+
+#ifdef CONFIG_KVM_BOOKE_HV
+       /*
+        * On BookE-HV (e500mc) the guest is always executed with MSR.GS=1
+        * DBCR1 and DBCR2 are set to trigger debug events when MSR.PR is 0
+        */
+       dbg_reg->dbcr1 = 0;
+       dbg_reg->dbcr2 = 0;
+#else
+       /*
+        * On BookE-PR (e500v2) the guest is always executed with MSR.PR=1
+        * We set DBCR1 and DBCR2 to only trigger debug events when MSR.PR
+        * is set.
+        */
+       dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
+                         DBCR1_IAC4US;
+       dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+#endif
+
+       if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+               return 0;
+
+       for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
+               uint64_t addr = dbg->arch.bp[n].addr;
+               uint32_t type = dbg->arch.bp[n].type;
+
+               if (type == KVMPPC_DEBUG_NONE)
+                       continue;
+
+               if (type & !(KVMPPC_DEBUG_WATCH_READ |
+                            KVMPPC_DEBUG_WATCH_WRITE |
+                            KVMPPC_DEBUG_BREAKPOINT))
+                       return -EINVAL;
+
+               if (type & KVMPPC_DEBUG_BREAKPOINT) {
+                       /* Setting H/W breakpoint */
+                       if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++))
+                               return -EINVAL;
+               } else {
+                       /* Setting H/W watchpoint */
+                       if (kvmppc_booke_add_watchpoint(dbg_reg, addr,
+                                                       type, w++))
+                               return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
 void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        vcpu->cpu = smp_processor_id();
@@ -1681,6 +1904,44 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
 {
        current->thread.kvm_vcpu = NULL;
        vcpu->cpu = -1;
+
+       /* Clear pending debug event in DBSR */
+       kvmppc_clear_dbsr();
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+       vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
+}
+
+int kvmppc_core_init_vm(struct kvm *kvm)
+{
+       return kvm->arch.kvm_ops->init_vm(kvm);
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+       return kvm->arch.kvm_ops->vcpu_create(kvm, id);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+       vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
+}
+
+void kvmppc_core_destroy_vm(struct kvm *kvm)
+{
+       kvm->arch.kvm_ops->destroy_vm(kvm);
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+       vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+       vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);
 }
 
 int __init kvmppc_booke_init(void)
index 5fd1ba6..09bfd9b 100644 (file)
@@ -99,6 +99,30 @@ enum int_class {
 
 void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type);
 
+extern void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                                     unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn,
+                                        ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn,
+                                        ulong *spr_val);
+extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
+                                      struct kvm_vcpu *vcpu,
+                                      unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
+                                         ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
+                                         ulong *spr_val);
+extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
+                                      struct kvm_vcpu *vcpu,
+                                      unsigned int inst, int *advance);
+extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
+                                         ulong spr_val);
+extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
+                                         ulong *spr_val);
+
 /*
  * Load up guest vcpu FP state if it's needed.
  * It also set the MSR_FP in thread so that host know
@@ -129,4 +153,9 @@ static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
                giveup_fpu(current);
 #endif
 }
+
+static inline void kvmppc_clear_dbsr(void)
+{
+       mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
+}
 #endif /* __KVM_BOOKE_H__ */
index ce6b73c..497b142 100644 (file)
@@ -305,7 +305,7 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 {
 }
 
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)
 {
        kvmppc_booke_vcpu_load(vcpu, cpu);
 
@@ -313,7 +313,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
 }
 
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_e500(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_SPE
        if (vcpu->arch.shadow_msr & MSR_SPE)
@@ -367,7 +367,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_get_sregs_e500(struct kvm_vcpu *vcpu,
+                                     struct kvm_sregs *sregs)
 {
        struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 
@@ -388,9 +389,11 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 
        kvmppc_get_sregs_ivor(vcpu, sregs);
        kvmppc_get_sregs_e500_tlb(vcpu, sregs);
+       return 0;
 }
 
-int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_set_sregs_e500(struct kvm_vcpu *vcpu,
+                                     struct kvm_sregs *sregs)
 {
        struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
        int ret;
@@ -425,21 +428,22 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
        return kvmppc_set_sregs_ivor(vcpu, sregs);
 }
 
-int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
-                       union kvmppc_one_reg *val)
+static int kvmppc_get_one_reg_e500(struct kvm_vcpu *vcpu, u64 id,
+                                  union kvmppc_one_reg *val)
 {
        int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
        return r;
 }
 
-int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
-                      union kvmppc_one_reg *val)
+static int kvmppc_set_one_reg_e500(struct kvm_vcpu *vcpu, u64 id,
+                                  union kvmppc_one_reg *val)
 {
        int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
        return r;
 }
 
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
+                                                    unsigned int id)
 {
        struct kvmppc_vcpu_e500 *vcpu_e500;
        struct kvm_vcpu *vcpu;
@@ -481,7 +485,7 @@ out:
        return ERR_PTR(err);
 }
 
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu)
 {
        struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 
@@ -492,15 +496,32 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
        kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
 }
 
-int kvmppc_core_init_vm(struct kvm *kvm)
+static int kvmppc_core_init_vm_e500(struct kvm *kvm)
 {
        return 0;
 }
 
-void kvmppc_core_destroy_vm(struct kvm *kvm)
+static void kvmppc_core_destroy_vm_e500(struct kvm *kvm)
 {
 }
 
+static struct kvmppc_ops kvm_ops_e500 = {
+       .get_sregs = kvmppc_core_get_sregs_e500,
+       .set_sregs = kvmppc_core_set_sregs_e500,
+       .get_one_reg = kvmppc_get_one_reg_e500,
+       .set_one_reg = kvmppc_set_one_reg_e500,
+       .vcpu_load   = kvmppc_core_vcpu_load_e500,
+       .vcpu_put    = kvmppc_core_vcpu_put_e500,
+       .vcpu_create = kvmppc_core_vcpu_create_e500,
+       .vcpu_free   = kvmppc_core_vcpu_free_e500,
+       .mmu_destroy  = kvmppc_mmu_destroy_e500,
+       .init_vm = kvmppc_core_init_vm_e500,
+       .destroy_vm = kvmppc_core_destroy_vm_e500,
+       .emulate_op = kvmppc_core_emulate_op_e500,
+       .emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
+       .emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
+};
+
 static int __init kvmppc_e500_init(void)
 {
        int r, i;
@@ -512,11 +533,11 @@ static int __init kvmppc_e500_init(void)
 
        r = kvmppc_core_check_processor_compat();
        if (r)
-               return r;
+               goto err_out;
 
        r = kvmppc_booke_init();
        if (r)
-               return r;
+               goto err_out;
 
        /* copy extra E500 exception handlers */
        ivor[0] = mfspr(SPRN_IVOR32);
@@ -534,11 +555,19 @@ static int __init kvmppc_e500_init(void)
        flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers +
                           ivor[max_ivor] + handler_len);
 
-       return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+       r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+       if (r)
+               goto err_out;
+       kvm_ops_e500.owner = THIS_MODULE;
+       kvmppc_pr_ops = &kvm_ops_e500;
+
+err_out:
+       return r;
 }
 
 static void __exit kvmppc_e500_exit(void)
 {
+       kvmppc_pr_ops = NULL;
        kvmppc_booke_exit();
 }
 
index c2e5e98..4fd9650 100644 (file)
@@ -117,7 +117,7 @@ static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
 #define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
 #define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
 #define MAS2_ATTRIB_MASK \
-         (MAS2_X0 | MAS2_X1)
+         (MAS2_X0 | MAS2_X1 | MAS2_E | MAS2_G)
 #define MAS3_ATTRIB_MASK \
          (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
           | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
index b10a012..89b7f82 100644 (file)
@@ -26,6 +26,7 @@
 #define XOP_TLBRE   946
 #define XOP_TLBWE   978
 #define XOP_TLBILX  18
+#define XOP_EHPRIV  270
 
 #ifdef CONFIG_KVM_E500MC
 static int dbell2prio(ulong param)
@@ -82,8 +83,28 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
 }
 #endif
 
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                           unsigned int inst, int *advance)
+static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                                  unsigned int inst, int *advance)
+{
+       int emulated = EMULATE_DONE;
+
+       switch (get_oc(inst)) {
+       case EHPRIV_OC_DEBUG:
+               run->exit_reason = KVM_EXIT_DEBUG;
+               run->debug.arch.address = vcpu->arch.pc;
+               run->debug.arch.status = 0;
+               kvmppc_account_exit(vcpu, DEBUG_EXITS);
+               emulated = EMULATE_EXIT_USER;
+               *advance = 0;
+               break;
+       default:
+               emulated = EMULATE_FAIL;
+       }
+       return emulated;
+}
+
+int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                               unsigned int inst, int *advance)
 {
        int emulated = EMULATE_DONE;
        int ra = get_ra(inst);
@@ -130,6 +151,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
                        break;
 
+               case XOP_EHPRIV:
+                       emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst,
+                                                          advance);
+                       break;
+
                default:
                        emulated = EMULATE_FAIL;
                }
@@ -146,7 +172,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
        return emulated;
 }
 
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
+int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 {
        struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
        int emulated = EMULATE_DONE;
@@ -237,7 +263,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
        return emulated;
 }
 
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
+int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 {
        struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
        int emulated = EMULATE_DONE;
index 6d6f153..ebca6b8 100644 (file)
@@ -32,7 +32,7 @@
 #include <asm/kvm_ppc.h>
 
 #include "e500.h"
-#include "trace.h"
+#include "trace_booke.h"
 #include "timing.h"
 #include "e500_mmu_host.h"
 
@@ -536,7 +536,7 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
        return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
 }
 
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu)
 {
 }
 
index 1c6a9d7..ecf2247 100644 (file)
 #include <asm/kvm_ppc.h>
 
 #include "e500.h"
-#include "trace.h"
 #include "timing.h"
 #include "e500_mmu_host.h"
 
+#include "trace_booke.h"
+
 #define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1)
 
 static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM];
@@ -253,6 +254,9 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
        ref->pfn = pfn;
        ref->flags |= E500_TLB_VALID;
 
+       /* Mark the page accessed */
+       kvm_set_pfn_accessed(pfn);
+
        if (tlbe_is_writable(gtlbe))
                kvm_set_pfn_dirty(pfn);
 }
@@ -332,6 +336,13 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
        unsigned long hva;
        int pfnmap = 0;
        int tsize = BOOK3E_PAGESZ_4K;
+       int ret = 0;
+       unsigned long mmu_seq;
+       struct kvm *kvm = vcpu_e500->vcpu.kvm;
+
+       /* used to check for invalidations in progress */
+       mmu_seq = kvm->mmu_notifier_seq;
+       smp_rmb();
 
        /*
         * Translate guest physical to true physical, acquiring
@@ -449,6 +460,12 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
                gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
        }
 
+       spin_lock(&kvm->mmu_lock);
+       if (mmu_notifier_retry(kvm, mmu_seq)) {
+               ret = -EAGAIN;
+               goto out;
+       }
+
        kvmppc_e500_ref_setup(ref, gtlbe, pfn);
 
        kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
@@ -457,10 +474,13 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
        /* Clear i-cache for new pages */
        kvmppc_mmu_flush_icache(pfn);
 
+out:
+       spin_unlock(&kvm->mmu_lock);
+
        /* Drop refcount on page, so that mmu notifiers can clear it */
        kvm_release_pfn_clean(pfn);
 
-       return 0;
+       return ret;
 }
 
 /* XXX only map the one-one case, for now use TLB0 */
index 19c8379..4132cd2 100644 (file)
@@ -110,7 +110,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
 
 static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu);
 
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
 {
        struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 
@@ -147,7 +147,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        kvmppc_load_guest_fp(vcpu);
 }
 
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu)
 {
        vcpu->arch.eplc = mfspr(SPRN_EPLC);
        vcpu->arch.epsc = mfspr(SPRN_EPSC);
@@ -204,7 +204,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_get_sregs_e500mc(struct kvm_vcpu *vcpu,
+                                       struct kvm_sregs *sregs)
 {
        struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 
@@ -224,10 +225,11 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
        sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL];
        sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT];
 
-       kvmppc_get_sregs_ivor(vcpu, sregs);
+       return kvmppc_get_sregs_ivor(vcpu, sregs);
 }
 
-int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu *vcpu,
+                                       struct kvm_sregs *sregs)
 {
        struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
        int ret;
@@ -260,21 +262,22 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
        return kvmppc_set_sregs_ivor(vcpu, sregs);
 }
 
-int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
-                       union kvmppc_one_reg *val)
+static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
+                             union kvmppc_one_reg *val)
 {
        int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
        return r;
 }
 
-int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
-                      union kvmppc_one_reg *val)
+static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
+                             union kvmppc_one_reg *val)
 {
        int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
        return r;
 }
 
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm,
+                                                      unsigned int id)
 {
        struct kvmppc_vcpu_e500 *vcpu_e500;
        struct kvm_vcpu *vcpu;
@@ -315,7 +318,7 @@ out:
        return ERR_PTR(err);
 }
 
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu)
 {
        struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 
@@ -325,7 +328,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
        kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
 }
 
-int kvmppc_core_init_vm(struct kvm *kvm)
+static int kvmppc_core_init_vm_e500mc(struct kvm *kvm)
 {
        int lpid;
 
@@ -337,27 +340,52 @@ int kvmppc_core_init_vm(struct kvm *kvm)
        return 0;
 }
 
-void kvmppc_core_destroy_vm(struct kvm *kvm)
+static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm)
 {
        kvmppc_free_lpid(kvm->arch.lpid);
 }
 
+static struct kvmppc_ops kvm_ops_e500mc = {
+       .get_sregs = kvmppc_core_get_sregs_e500mc,
+       .set_sregs = kvmppc_core_set_sregs_e500mc,
+       .get_one_reg = kvmppc_get_one_reg_e500mc,
+       .set_one_reg = kvmppc_set_one_reg_e500mc,
+       .vcpu_load   = kvmppc_core_vcpu_load_e500mc,
+       .vcpu_put    = kvmppc_core_vcpu_put_e500mc,
+       .vcpu_create = kvmppc_core_vcpu_create_e500mc,
+       .vcpu_free   = kvmppc_core_vcpu_free_e500mc,
+       .mmu_destroy  = kvmppc_mmu_destroy_e500,
+       .init_vm = kvmppc_core_init_vm_e500mc,
+       .destroy_vm = kvmppc_core_destroy_vm_e500mc,
+       .emulate_op = kvmppc_core_emulate_op_e500,
+       .emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
+       .emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
+};
+
 static int __init kvmppc_e500mc_init(void)
 {
        int r;
 
        r = kvmppc_booke_init();
        if (r)
-               return r;
+               goto err_out;
 
        kvmppc_init_lpid(64);
        kvmppc_claim_lpid(0); /* host */
 
-       return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+       r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+       if (r)
+               goto err_out;
+       kvm_ops_e500mc.owner = THIS_MODULE;
+       kvmppc_pr_ops = &kvm_ops_e500mc;
+
+err_out:
+       return r;
 }
 
 static void __exit kvmppc_e500mc_exit(void)
 {
+       kvmppc_pr_ops = NULL;
        kvmppc_booke_exit();
 }
 
index 751cd45..2f9a087 100644 (file)
@@ -130,8 +130,8 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
        case SPRN_PIR: break;
 
        default:
-               emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
-                                                    spr_val);
+               emulated = vcpu->kvm->arch.kvm_ops->emulate_mtspr(vcpu, sprn,
+                                                                 spr_val);
                if (emulated == EMULATE_FAIL)
                        printk(KERN_INFO "mtspr: unknown spr "
                                "0x%x\n", sprn);
@@ -191,8 +191,8 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
                spr_val = kvmppc_get_dec(vcpu, get_tb());
                break;
        default:
-               emulated = kvmppc_core_emulate_mfspr(vcpu, sprn,
-                                                    &spr_val);
+               emulated = vcpu->kvm->arch.kvm_ops->emulate_mfspr(vcpu, sprn,
+                                                                 &spr_val);
                if (unlikely(emulated == EMULATE_FAIL)) {
                        printk(KERN_INFO "mfspr: unknown spr "
                                "0x%x\n", sprn);
@@ -464,7 +464,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
        }
 
        if (emulated == EMULATE_FAIL) {
-               emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
+               emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst,
+                                                              &advance);
                if (emulated == EMULATE_AGAIN) {
                        advance = 0;
                } else if (emulated == EMULATE_FAIL) {
@@ -483,3 +484,4 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
        return emulated;
 }
+EXPORT_SYMBOL_GPL(kvmppc_emulate_instruction);
index 07c0106..9ae9768 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/file.h>
+#include <linux/module.h>
 #include <asm/cputable.h>
 #include <asm/uaccess.h>
 #include <asm/kvm_ppc.h>
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
+struct kvmppc_ops *kvmppc_hv_ops;
+EXPORT_SYMBOL_GPL(kvmppc_hv_ops);
+struct kvmppc_ops *kvmppc_pr_ops;
+EXPORT_SYMBOL_GPL(kvmppc_pr_ops);
+
+
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
        return !!(v->arch.pending_exceptions) ||
@@ -50,7 +57,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
        return 1;
 }
 
-#ifndef CONFIG_KVM_BOOK3S_64_HV
 /*
  * Common checks before entering the guest world.  Call with interrupts
  * disabled.
@@ -125,7 +131,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 
        return r;
 }
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
+EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter);
 
 int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 {
@@ -179,6 +185,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 
        return r;
 }
+EXPORT_SYMBOL_GPL(kvmppc_kvm_pv);
 
 int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
 {
@@ -192,11 +199,9 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
        if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled)
                goto out;
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
        /* HV KVM can only do PAPR mode for now */
-       if (!vcpu->arch.papr_enabled)
+       if (!vcpu->arch.papr_enabled && is_kvmppc_hv_enabled(vcpu->kvm))
                goto out;
-#endif
 
 #ifdef CONFIG_KVM_BOOKE_HV
        if (!cpu_has_feature(CPU_FTR_EMB_HV))
@@ -209,6 +214,7 @@ out:
        vcpu->arch.sane = r;
        return r ? 0 : -EINVAL;
 }
+EXPORT_SYMBOL_GPL(kvmppc_sanity_check);
 
 int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
@@ -243,6 +249,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
        return r;
 }
+EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio);
 
 int kvm_arch_hardware_enable(void *garbage)
 {
@@ -269,10 +276,35 @@ void kvm_arch_check_processor_compat(void *rtn)
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
-       if (type)
-               return -EINVAL;
-
+       struct kvmppc_ops *kvm_ops = NULL;
+       /*
+        * if we have both HV and PR enabled, default is HV
+        */
+       if (type == 0) {
+               if (kvmppc_hv_ops)
+                       kvm_ops = kvmppc_hv_ops;
+               else
+                       kvm_ops = kvmppc_pr_ops;
+               if (!kvm_ops)
+                       goto err_out;
+       } else  if (type == KVM_VM_PPC_HV) {
+               if (!kvmppc_hv_ops)
+                       goto err_out;
+               kvm_ops = kvmppc_hv_ops;
+       } else if (type == KVM_VM_PPC_PR) {
+               if (!kvmppc_pr_ops)
+                       goto err_out;
+               kvm_ops = kvmppc_pr_ops;
+       } else
+               goto err_out;
+
+       if (kvm_ops->owner && !try_module_get(kvm_ops->owner))
+               return -ENOENT;
+
+       kvm->arch.kvm_ops = kvm_ops;
        return kvmppc_core_init_vm(kvm);
+err_out:
+       return -EINVAL;
 }
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
@@ -292,6 +324,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
        kvmppc_core_destroy_vm(kvm);
 
        mutex_unlock(&kvm->lock);
+
+       /* drop the module reference */
+       module_put(kvm->arch.kvm_ops->owner);
 }
 
 void kvm_arch_sync_events(struct kvm *kvm)
@@ -301,6 +336,10 @@ void kvm_arch_sync_events(struct kvm *kvm)
 int kvm_dev_ioctl_check_extension(long ext)
 {
        int r;
+       /* FIXME!!
+        * Should some of this be vm ioctl ? is it possible now ?
+        */
+       int hv_enabled = kvmppc_hv_ops ? 1 : 0;
 
        switch (ext) {
 #ifdef CONFIG_BOOKE
@@ -320,22 +359,26 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_DEVICE_CTRL:
                r = 1;
                break;
-#ifndef CONFIG_KVM_BOOK3S_64_HV
        case KVM_CAP_PPC_PAIRED_SINGLES:
        case KVM_CAP_PPC_OSI:
        case KVM_CAP_PPC_GET_PVINFO:
 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
        case KVM_CAP_SW_TLB:
 #endif
-#ifdef CONFIG_KVM_MPIC
-       case KVM_CAP_IRQ_MPIC:
-#endif
-               r = 1;
+               /* We support this only for PR */
+               r = !hv_enabled;
                break;
+#ifdef CONFIG_KVM_MMIO
        case KVM_CAP_COALESCED_MMIO:
                r = KVM_COALESCED_MMIO_PAGE_OFFSET;
                break;
 #endif
+#ifdef CONFIG_KVM_MPIC
+       case KVM_CAP_IRQ_MPIC:
+               r = 1;
+               break;
+#endif
+
 #ifdef CONFIG_PPC_BOOK3S_64
        case KVM_CAP_SPAPR_TCE:
        case KVM_CAP_PPC_ALLOC_HTAB:
@@ -346,32 +389,37 @@ int kvm_dev_ioctl_check_extension(long ext)
                r = 1;
                break;
 #endif /* CONFIG_PPC_BOOK3S_64 */
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        case KVM_CAP_PPC_SMT:
-               r = threads_per_core;
+               if (hv_enabled)
+                       r = threads_per_core;
+               else
+                       r = 0;
                break;
        case KVM_CAP_PPC_RMA:
-               r = 1;
+               r = hv_enabled;
                /* PPC970 requires an RMA */
-               if (cpu_has_feature(CPU_FTR_ARCH_201))
+               if (r && cpu_has_feature(CPU_FTR_ARCH_201))
                        r = 2;
                break;
 #endif
        case KVM_CAP_SYNC_MMU:
-#ifdef CONFIG_KVM_BOOK3S_64_HV
-               r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+               if (hv_enabled)
+                       r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
+               else
+                       r = 0;
 #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
                r = 1;
 #else
                r = 0;
-               break;
 #endif
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+               break;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        case KVM_CAP_PPC_HTAB_FD:
-               r = 1;
+               r = hv_enabled;
                break;
 #endif
-               break;
        case KVM_CAP_NR_VCPUS:
                /*
                 * Recommending a number of CPUs is somewhat arbitrary; we
@@ -379,11 +427,10 @@ int kvm_dev_ioctl_check_extension(long ext)
                 * will have secondary threads "offline"), and for other KVM
                 * implementations just count online CPUs.
                 */
-#ifdef CONFIG_KVM_BOOK3S_64_HV
-               r = num_present_cpus();
-#else
-               r = num_online_cpus();
-#endif
+               if (hv_enabled)
+                       r = num_present_cpus();
+               else
+                       r = num_online_cpus();
                break;
        case KVM_CAP_MAX_VCPUS:
                r = KVM_MAX_VCPUS;
@@ -407,15 +454,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
        return -EINVAL;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
                           struct kvm_memory_slot *dont)
 {
-       kvmppc_core_free_memslot(free, dont);
+       kvmppc_core_free_memslot(kvm, free, dont);
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+                           unsigned long npages)
 {
-       return kvmppc_core_create_memslot(slot, npages);
+       return kvmppc_core_create_memslot(kvm, slot, npages);
 }
 
 void kvm_arch_memslots_updated(struct kvm *kvm)
@@ -659,6 +707,7 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        return EMULATE_DO_MMIO;
 }
+EXPORT_SYMBOL_GPL(kvmppc_handle_load);
 
 /* Same as above, but sign extends */
 int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
@@ -720,6 +769,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        return EMULATE_DO_MMIO;
 }
+EXPORT_SYMBOL_GPL(kvmppc_handle_store);
 
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
@@ -1024,52 +1074,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
                r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
                goto out;
        }
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-#ifdef CONFIG_KVM_BOOK3S_64_HV
-       case KVM_ALLOCATE_RMA: {
-               struct kvm_allocate_rma rma;
-               struct kvm *kvm = filp->private_data;
-
-               r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
-               if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
-                       r = -EFAULT;
-               break;
-       }
-
-       case KVM_PPC_ALLOCATE_HTAB: {
-               u32 htab_order;
-
-               r = -EFAULT;
-               if (get_user(htab_order, (u32 __user *)argp))
-                       break;
-               r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
-               if (r)
-                       break;
-               r = -EFAULT;
-               if (put_user(htab_order, (u32 __user *)argp))
-                       break;
-               r = 0;
-               break;
-       }
-
-       case KVM_PPC_GET_HTAB_FD: {
-               struct kvm_get_htab_fd ghf;
-
-               r = -EFAULT;
-               if (copy_from_user(&ghf, argp, sizeof(ghf)))
-                       break;
-               r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
-               break;
-       }
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
-
-#ifdef CONFIG_PPC_BOOK3S_64
        case KVM_PPC_GET_SMMU_INFO: {
                struct kvm_ppc_smmu_info info;
+               struct kvm *kvm = filp->private_data;
 
                memset(&info, 0, sizeof(info));
-               r = kvm_vm_ioctl_get_smmu_info(kvm, &info);
+               r = kvm->arch.kvm_ops->get_smmu_info(kvm, &info);
                if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
                        r = -EFAULT;
                break;
@@ -1080,11 +1090,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
                r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
                break;
        }
-#endif /* CONFIG_PPC_BOOK3S_64 */
+       default: {
+               struct kvm *kvm = filp->private_data;
+               r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
+       }
+#else /* CONFIG_PPC_BOOK3S_64 */
        default:
                r = -ENOTTY;
+#endif
        }
-
 out:
        return r;
 }
@@ -1106,22 +1120,26 @@ long kvmppc_alloc_lpid(void)
 
        return lpid;
 }
+EXPORT_SYMBOL_GPL(kvmppc_alloc_lpid);
 
 void kvmppc_claim_lpid(long lpid)
 {
        set_bit(lpid, lpid_inuse);
 }
+EXPORT_SYMBOL_GPL(kvmppc_claim_lpid);
 
 void kvmppc_free_lpid(long lpid)
 {
        clear_bit(lpid, lpid_inuse);
 }
+EXPORT_SYMBOL_GPL(kvmppc_free_lpid);
 
 void kvmppc_init_lpid(unsigned long nr_lpids_param)
 {
        nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param);
        memset(lpid_inuse, 0, sizeof(lpid_inuse));
 }
+EXPORT_SYMBOL_GPL(kvmppc_init_lpid);
 
 int kvm_arch_init(void *opaque)
 {
@@ -1130,4 +1148,5 @@ int kvm_arch_init(void *opaque)
 
 void kvm_arch_exit(void)
 {
+
 }
index e326489..2e0e67e 100644 (file)
@@ -31,126 +31,6 @@ TRACE_EVENT(kvm_ppc_instr,
                  __entry->inst, __entry->pc, __entry->emulate)
 );
 
-#ifdef CONFIG_PPC_BOOK3S
-#define kvm_trace_symbol_exit \
-       {0x100, "SYSTEM_RESET"}, \
-       {0x200, "MACHINE_CHECK"}, \
-       {0x300, "DATA_STORAGE"}, \
-       {0x380, "DATA_SEGMENT"}, \
-       {0x400, "INST_STORAGE"}, \
-       {0x480, "INST_SEGMENT"}, \
-       {0x500, "EXTERNAL"}, \
-       {0x501, "EXTERNAL_LEVEL"}, \
-       {0x502, "EXTERNAL_HV"}, \
-       {0x600, "ALIGNMENT"}, \
-       {0x700, "PROGRAM"}, \
-       {0x800, "FP_UNAVAIL"}, \
-       {0x900, "DECREMENTER"}, \
-       {0x980, "HV_DECREMENTER"}, \
-       {0xc00, "SYSCALL"}, \
-       {0xd00, "TRACE"}, \
-       {0xe00, "H_DATA_STORAGE"}, \
-       {0xe20, "H_INST_STORAGE"}, \
-       {0xe40, "H_EMUL_ASSIST"}, \
-       {0xf00, "PERFMON"}, \
-       {0xf20, "ALTIVEC"}, \
-       {0xf40, "VSX"}
-#else
-#define kvm_trace_symbol_exit \
-       {0, "CRITICAL"}, \
-       {1, "MACHINE_CHECK"}, \
-       {2, "DATA_STORAGE"}, \
-       {3, "INST_STORAGE"}, \
-       {4, "EXTERNAL"}, \
-       {5, "ALIGNMENT"}, \
-       {6, "PROGRAM"}, \
-       {7, "FP_UNAVAIL"}, \
-       {8, "SYSCALL"}, \
-       {9, "AP_UNAVAIL"}, \
-       {10, "DECREMENTER"}, \
-       {11, "FIT"}, \
-       {12, "WATCHDOG"}, \
-       {13, "DTLB_MISS"}, \
-       {14, "ITLB_MISS"}, \
-       {15, "DEBUG"}, \
-       {32, "SPE_UNAVAIL"}, \
-       {33, "SPE_FP_DATA"}, \
-       {34, "SPE_FP_ROUND"}, \
-       {35, "PERFORMANCE_MONITOR"}, \
-       {36, "DOORBELL"}, \
-       {37, "DOORBELL_CRITICAL"}, \
-       {38, "GUEST_DBELL"}, \
-       {39, "GUEST_DBELL_CRIT"}, \
-       {40, "HV_SYSCALL"}, \
-       {41, "HV_PRIV"}
-#endif
-
-TRACE_EVENT(kvm_exit,
-       TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
-       TP_ARGS(exit_nr, vcpu),
-
-       TP_STRUCT__entry(
-               __field(        unsigned int,   exit_nr         )
-               __field(        unsigned long,  pc              )
-               __field(        unsigned long,  msr             )
-               __field(        unsigned long,  dar             )
-#ifdef CONFIG_KVM_BOOK3S_PR
-               __field(        unsigned long,  srr1            )
-#endif
-               __field(        unsigned long,  last_inst       )
-       ),
-
-       TP_fast_assign(
-#ifdef CONFIG_KVM_BOOK3S_PR
-               struct kvmppc_book3s_shadow_vcpu *svcpu;
-#endif
-               __entry->exit_nr        = exit_nr;
-               __entry->pc             = kvmppc_get_pc(vcpu);
-               __entry->dar            = kvmppc_get_fault_dar(vcpu);
-               __entry->msr            = vcpu->arch.shared->msr;
-#ifdef CONFIG_KVM_BOOK3S_PR
-               svcpu = svcpu_get(vcpu);
-               __entry->srr1           = svcpu->shadow_srr1;
-               svcpu_put(svcpu);
-#endif
-               __entry->last_inst      = vcpu->arch.last_inst;
-       ),
-
-       TP_printk("exit=%s"
-               " | pc=0x%lx"
-               " | msr=0x%lx"
-               " | dar=0x%lx"
-#ifdef CONFIG_KVM_BOOK3S_PR
-               " | srr1=0x%lx"
-#endif
-               " | last_inst=0x%lx"
-               ,
-               __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
-               __entry->pc,
-               __entry->msr,
-               __entry->dar,
-#ifdef CONFIG_KVM_BOOK3S_PR
-               __entry->srr1,
-#endif
-               __entry->last_inst
-               )
-);
-
-TRACE_EVENT(kvm_unmap_hva,
-       TP_PROTO(unsigned long hva),
-       TP_ARGS(hva),
-
-       TP_STRUCT__entry(
-               __field(        unsigned long,  hva             )
-       ),
-
-       TP_fast_assign(
-               __entry->hva            = hva;
-       ),
-
-       TP_printk("unmap hva 0x%lx\n", __entry->hva)
-);
-
 TRACE_EVENT(kvm_stlb_inval,
        TP_PROTO(unsigned int stlb_index),
        TP_ARGS(stlb_index),
@@ -236,315 +116,6 @@ TRACE_EVENT(kvm_check_requests,
                __entry->cpu_nr, __entry->requests)
 );
 
-
-/*************************************************************************
- *                         Book3S trace points                           *
- *************************************************************************/
-
-#ifdef CONFIG_KVM_BOOK3S_PR
-
-TRACE_EVENT(kvm_book3s_reenter,
-       TP_PROTO(int r, struct kvm_vcpu *vcpu),
-       TP_ARGS(r, vcpu),
-
-       TP_STRUCT__entry(
-               __field(        unsigned int,   r               )
-               __field(        unsigned long,  pc              )
-       ),
-
-       TP_fast_assign(
-               __entry->r              = r;
-               __entry->pc             = kvmppc_get_pc(vcpu);
-       ),
-
-       TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc)
-);
-
-#ifdef CONFIG_PPC_BOOK3S_64
-
-TRACE_EVENT(kvm_book3s_64_mmu_map,
-       TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr,
-                struct kvmppc_pte *orig_pte),
-       TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte),
-
-       TP_STRUCT__entry(
-               __field(        unsigned char,          flag_w          )
-               __field(        unsigned char,          flag_x          )
-               __field(        unsigned long,          eaddr           )
-               __field(        unsigned long,          hpteg           )
-               __field(        unsigned long,          va              )
-               __field(        unsigned long long,     vpage           )
-               __field(        unsigned long,          hpaddr          )
-       ),
-
-       TP_fast_assign(
-               __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w';
-               __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x';
-               __entry->eaddr  = orig_pte->eaddr;
-               __entry->hpteg  = hpteg;
-               __entry->va     = va;
-               __entry->vpage  = orig_pte->vpage;
-               __entry->hpaddr = hpaddr;
-       ),
-
-       TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx",
-                 __entry->flag_w, __entry->flag_x, __entry->eaddr,
-                 __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr)
-);
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-TRACE_EVENT(kvm_book3s_mmu_map,
-       TP_PROTO(struct hpte_cache *pte),
-       TP_ARGS(pte),
-
-       TP_STRUCT__entry(
-               __field(        u64,            host_vpn        )
-               __field(        u64,            pfn             )
-               __field(        ulong,          eaddr           )
-               __field(        u64,            vpage           )
-               __field(        ulong,          raddr           )
-               __field(        int,            flags           )
-       ),
-
-       TP_fast_assign(
-               __entry->host_vpn       = pte->host_vpn;
-               __entry->pfn            = pte->pfn;
-               __entry->eaddr          = pte->pte.eaddr;
-               __entry->vpage          = pte->pte.vpage;
-               __entry->raddr          = pte->pte.raddr;
-               __entry->flags          = (pte->pte.may_read ? 0x4 : 0) |
-                                         (pte->pte.may_write ? 0x2 : 0) |
-                                         (pte->pte.may_execute ? 0x1 : 0);
-       ),
-
-       TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
-                 __entry->host_vpn, __entry->pfn, __entry->eaddr,
-                 __entry->vpage, __entry->raddr, __entry->flags)
-);
-
-TRACE_EVENT(kvm_book3s_mmu_invalidate,
-       TP_PROTO(struct hpte_cache *pte),
-       TP_ARGS(pte),
-
-       TP_STRUCT__entry(
-               __field(        u64,            host_vpn        )
-               __field(        u64,            pfn             )
-               __field(        ulong,          eaddr           )
-               __field(        u64,            vpage           )
-               __field(        ulong,          raddr           )
-               __field(        int,            flags           )
-       ),
-
-       TP_fast_assign(
-               __entry->host_vpn       = pte->host_vpn;
-               __entry->pfn            = pte->pfn;
-               __entry->eaddr          = pte->pte.eaddr;
-               __entry->vpage          = pte->pte.vpage;
-               __entry->raddr          = pte->pte.raddr;
-               __entry->flags          = (pte->pte.may_read ? 0x4 : 0) |
-                                         (pte->pte.may_write ? 0x2 : 0) |
-                                         (pte->pte.may_execute ? 0x1 : 0);
-       ),
-
-       TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
-                 __entry->host_vpn, __entry->pfn, __entry->eaddr,
-                 __entry->vpage, __entry->raddr, __entry->flags)
-);
-
-TRACE_EVENT(kvm_book3s_mmu_flush,
-       TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1,
-                unsigned long long p2),
-       TP_ARGS(type, vcpu, p1, p2),
-
-       TP_STRUCT__entry(
-               __field(        int,                    count           )
-               __field(        unsigned long long,     p1              )
-               __field(        unsigned long long,     p2              )
-               __field(        const char *,           type            )
-       ),
-
-       TP_fast_assign(
-               __entry->count          = to_book3s(vcpu)->hpte_cache_count;
-               __entry->p1             = p1;
-               __entry->p2             = p2;
-               __entry->type           = type;
-       ),
-
-       TP_printk("Flush %d %sPTEs: %llx - %llx",
-                 __entry->count, __entry->type, __entry->p1, __entry->p2)
-);
-
-TRACE_EVENT(kvm_book3s_slb_found,
-       TP_PROTO(unsigned long long gvsid, unsigned long long hvsid),
-       TP_ARGS(gvsid, hvsid),
-
-       TP_STRUCT__entry(
-               __field(        unsigned long long,     gvsid           )
-               __field(        unsigned long long,     hvsid           )
-       ),
-
-       TP_fast_assign(
-               __entry->gvsid          = gvsid;
-               __entry->hvsid          = hvsid;
-       ),
-
-       TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid)
-);
-
-TRACE_EVENT(kvm_book3s_slb_fail,
-       TP_PROTO(u16 sid_map_mask, unsigned long long gvsid),
-       TP_ARGS(sid_map_mask, gvsid),
-
-       TP_STRUCT__entry(
-               __field(        unsigned short,         sid_map_mask    )
-               __field(        unsigned long long,     gvsid           )
-       ),
-
-       TP_fast_assign(
-               __entry->sid_map_mask   = sid_map_mask;
-               __entry->gvsid          = gvsid;
-       ),
-
-       TP_printk("%x/%x: %llx", __entry->sid_map_mask,
-                 SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid)
-);
-
-TRACE_EVENT(kvm_book3s_slb_map,
-       TP_PROTO(u16 sid_map_mask, unsigned long long gvsid,
-                unsigned long long hvsid),
-       TP_ARGS(sid_map_mask, gvsid, hvsid),
-
-       TP_STRUCT__entry(
-               __field(        unsigned short,         sid_map_mask    )
-               __field(        unsigned long long,     guest_vsid      )
-               __field(        unsigned long long,     host_vsid       )
-       ),
-
-       TP_fast_assign(
-               __entry->sid_map_mask   = sid_map_mask;
-               __entry->guest_vsid     = gvsid;
-               __entry->host_vsid      = hvsid;
-       ),
-
-       TP_printk("%x: %llx -> %llx", __entry->sid_map_mask,
-                 __entry->guest_vsid, __entry->host_vsid)
-);
-
-TRACE_EVENT(kvm_book3s_slbmte,
-       TP_PROTO(u64 slb_vsid, u64 slb_esid),
-       TP_ARGS(slb_vsid, slb_esid),
-
-       TP_STRUCT__entry(
-               __field(        u64,    slb_vsid        )
-               __field(        u64,    slb_esid        )
-       ),
-
-       TP_fast_assign(
-               __entry->slb_vsid       = slb_vsid;
-               __entry->slb_esid       = slb_esid;
-       ),
-
-       TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid)
-);
-
-#endif /* CONFIG_PPC_BOOK3S */
-
-
-/*************************************************************************
- *                         Book3E trace points                           *
- *************************************************************************/
-
-#ifdef CONFIG_BOOKE
-
-TRACE_EVENT(kvm_booke206_stlb_write,
-       TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3),
-       TP_ARGS(mas0, mas8, mas1, mas2, mas7_3),
-
-       TP_STRUCT__entry(
-               __field(        __u32,  mas0            )
-               __field(        __u32,  mas8            )
-               __field(        __u32,  mas1            )
-               __field(        __u64,  mas2            )
-               __field(        __u64,  mas7_3          )
-       ),
-
-       TP_fast_assign(
-               __entry->mas0           = mas0;
-               __entry->mas8           = mas8;
-               __entry->mas1           = mas1;
-               __entry->mas2           = mas2;
-               __entry->mas7_3         = mas7_3;
-       ),
-
-       TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx",
-               __entry->mas0, __entry->mas8, __entry->mas1,
-               __entry->mas2, __entry->mas7_3)
-);
-
-TRACE_EVENT(kvm_booke206_gtlb_write,
-       TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3),
-       TP_ARGS(mas0, mas1, mas2, mas7_3),
-
-       TP_STRUCT__entry(
-               __field(        __u32,  mas0            )
-               __field(        __u32,  mas1            )
-               __field(        __u64,  mas2            )
-               __field(        __u64,  mas7_3          )
-       ),
-
-       TP_fast_assign(
-               __entry->mas0           = mas0;
-               __entry->mas1           = mas1;
-               __entry->mas2           = mas2;
-               __entry->mas7_3         = mas7_3;
-       ),
-
-       TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx",
-               __entry->mas0, __entry->mas1,
-               __entry->mas2, __entry->mas7_3)
-);
-
-TRACE_EVENT(kvm_booke206_ref_release,
-       TP_PROTO(__u64 pfn, __u32 flags),
-       TP_ARGS(pfn, flags),
-
-       TP_STRUCT__entry(
-               __field(        __u64,  pfn             )
-               __field(        __u32,  flags           )
-       ),
-
-       TP_fast_assign(
-               __entry->pfn            = pfn;
-               __entry->flags          = flags;
-       ),
-
-       TP_printk("pfn=%llx flags=%x",
-               __entry->pfn, __entry->flags)
-);
-
-TRACE_EVENT(kvm_booke_queue_irqprio,
-       TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
-       TP_ARGS(vcpu, priority),
-
-       TP_STRUCT__entry(
-               __field(        __u32,  cpu_nr          )
-               __field(        __u32,  priority                )
-               __field(        unsigned long,  pending         )
-       ),
-
-       TP_fast_assign(
-               __entry->cpu_nr         = vcpu->vcpu_id;
-               __entry->priority       = priority;
-               __entry->pending        = vcpu->arch.pending_exceptions;
-       ),
-
-       TP_printk("vcpu=%x prio=%x pending=%lx",
-               __entry->cpu_nr, __entry->priority, __entry->pending)
-);
-
-#endif
-
 #endif /* _TRACE_KVM_H */
 
 /* This part must be outside protection */
diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h
new file mode 100644 (file)
index 0000000..f7537cf
--- /dev/null
@@ -0,0 +1,177 @@
+#if !defined(_TRACE_KVM_BOOKE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_BOOKE_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_booke
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_booke
+
+#define kvm_trace_symbol_exit \
+       {0, "CRITICAL"}, \
+       {1, "MACHINE_CHECK"}, \
+       {2, "DATA_STORAGE"}, \
+       {3, "INST_STORAGE"}, \
+       {4, "EXTERNAL"}, \
+       {5, "ALIGNMENT"}, \
+       {6, "PROGRAM"}, \
+       {7, "FP_UNAVAIL"}, \
+       {8, "SYSCALL"}, \
+       {9, "AP_UNAVAIL"}, \
+       {10, "DECREMENTER"}, \
+       {11, "FIT"}, \
+       {12, "WATCHDOG"}, \
+       {13, "DTLB_MISS"}, \
+       {14, "ITLB_MISS"}, \
+       {15, "DEBUG"}, \
+       {32, "SPE_UNAVAIL"}, \
+       {33, "SPE_FP_DATA"}, \
+       {34, "SPE_FP_ROUND"}, \
+       {35, "PERFORMANCE_MONITOR"}, \
+       {36, "DOORBELL"}, \
+       {37, "DOORBELL_CRITICAL"}, \
+       {38, "GUEST_DBELL"}, \
+       {39, "GUEST_DBELL_CRIT"}, \
+       {40, "HV_SYSCALL"}, \
+       {41, "HV_PRIV"}
+
+TRACE_EVENT(kvm_exit,
+       TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
+       TP_ARGS(exit_nr, vcpu),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   exit_nr         )
+               __field(        unsigned long,  pc              )
+               __field(        unsigned long,  msr             )
+               __field(        unsigned long,  dar             )
+               __field(        unsigned long,  last_inst       )
+       ),
+
+       TP_fast_assign(
+               __entry->exit_nr        = exit_nr;
+               __entry->pc             = kvmppc_get_pc(vcpu);
+               __entry->dar            = kvmppc_get_fault_dar(vcpu);
+               __entry->msr            = vcpu->arch.shared->msr;
+               __entry->last_inst      = vcpu->arch.last_inst;
+       ),
+
+       TP_printk("exit=%s"
+               " | pc=0x%lx"
+               " | msr=0x%lx"
+               " | dar=0x%lx"
+               " | last_inst=0x%lx"
+               ,
+               __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
+               __entry->pc,
+               __entry->msr,
+               __entry->dar,
+               __entry->last_inst
+               )
+);
+
+TRACE_EVENT(kvm_unmap_hva,
+       TP_PROTO(unsigned long hva),
+       TP_ARGS(hva),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  hva             )
+       ),
+
+       TP_fast_assign(
+               __entry->hva            = hva;
+       ),
+
+       TP_printk("unmap hva 0x%lx\n", __entry->hva)
+);
+
+TRACE_EVENT(kvm_booke206_stlb_write,
+       TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3),
+       TP_ARGS(mas0, mas8, mas1, mas2, mas7_3),
+
+       TP_STRUCT__entry(
+               __field(        __u32,  mas0            )
+               __field(        __u32,  mas8            )
+               __field(        __u32,  mas1            )
+               __field(        __u64,  mas2            )
+               __field(        __u64,  mas7_3          )
+       ),
+
+       TP_fast_assign(
+               __entry->mas0           = mas0;
+               __entry->mas8           = mas8;
+               __entry->mas1           = mas1;
+               __entry->mas2           = mas2;
+               __entry->mas7_3         = mas7_3;
+       ),
+
+       TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx",
+               __entry->mas0, __entry->mas8, __entry->mas1,
+               __entry->mas2, __entry->mas7_3)
+);
+
+TRACE_EVENT(kvm_booke206_gtlb_write,
+       TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3),
+       TP_ARGS(mas0, mas1, mas2, mas7_3),
+
+       TP_STRUCT__entry(
+               __field(        __u32,  mas0            )
+               __field(        __u32,  mas1            )
+               __field(        __u64,  mas2            )
+               __field(        __u64,  mas7_3          )
+       ),
+
+       TP_fast_assign(
+               __entry->mas0           = mas0;
+               __entry->mas1           = mas1;
+               __entry->mas2           = mas2;
+               __entry->mas7_3         = mas7_3;
+       ),
+
+       TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx",
+               __entry->mas0, __entry->mas1,
+               __entry->mas2, __entry->mas7_3)
+);
+
+TRACE_EVENT(kvm_booke206_ref_release,
+       TP_PROTO(__u64 pfn, __u32 flags),
+       TP_ARGS(pfn, flags),
+
+       TP_STRUCT__entry(
+               __field(        __u64,  pfn             )
+               __field(        __u32,  flags           )
+       ),
+
+       TP_fast_assign(
+               __entry->pfn            = pfn;
+               __entry->flags          = flags;
+       ),
+
+       TP_printk("pfn=%llx flags=%x",
+               __entry->pfn, __entry->flags)
+);
+
+TRACE_EVENT(kvm_booke_queue_irqprio,
+       TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
+       TP_ARGS(vcpu, priority),
+
+       TP_STRUCT__entry(
+               __field(        __u32,  cpu_nr          )
+               __field(        __u32,  priority                )
+               __field(        unsigned long,  pending         )
+       ),
+
+       TP_fast_assign(
+               __entry->cpu_nr         = vcpu->vcpu_id;
+               __entry->priority       = priority;
+               __entry->pending        = vcpu->arch.pending_exceptions;
+       ),
+
+       TP_printk("vcpu=%x prio=%x pending=%lx",
+               __entry->cpu_nr, __entry->priority, __entry->pending)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
new file mode 100644 (file)
index 0000000..8b22e47
--- /dev/null
@@ -0,0 +1,297 @@
+
+#if !defined(_TRACE_KVM_PR_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_PR_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_pr
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_pr
+
+#define kvm_trace_symbol_exit \
+       {0x100, "SYSTEM_RESET"}, \
+       {0x200, "MACHINE_CHECK"}, \
+       {0x300, "DATA_STORAGE"}, \
+       {0x380, "DATA_SEGMENT"}, \
+       {0x400, "INST_STORAGE"}, \
+       {0x480, "INST_SEGMENT"}, \
+       {0x500, "EXTERNAL"}, \
+       {0x501, "EXTERNAL_LEVEL"}, \
+       {0x502, "EXTERNAL_HV"}, \
+       {0x600, "ALIGNMENT"}, \
+       {0x700, "PROGRAM"}, \
+       {0x800, "FP_UNAVAIL"}, \
+       {0x900, "DECREMENTER"}, \
+       {0x980, "HV_DECREMENTER"}, \
+       {0xc00, "SYSCALL"}, \
+       {0xd00, "TRACE"}, \
+       {0xe00, "H_DATA_STORAGE"}, \
+       {0xe20, "H_INST_STORAGE"}, \
+       {0xe40, "H_EMUL_ASSIST"}, \
+       {0xf00, "PERFMON"}, \
+       {0xf20, "ALTIVEC"}, \
+       {0xf40, "VSX"}
+
+TRACE_EVENT(kvm_book3s_reenter,
+       TP_PROTO(int r, struct kvm_vcpu *vcpu),
+       TP_ARGS(r, vcpu),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   r               )
+               __field(        unsigned long,  pc              )
+       ),
+
+       TP_fast_assign(
+               __entry->r              = r;
+               __entry->pc             = kvmppc_get_pc(vcpu);
+       ),
+
+       TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc)
+);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+TRACE_EVENT(kvm_book3s_64_mmu_map,
+       TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr,
+                struct kvmppc_pte *orig_pte),
+       TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte),
+
+       TP_STRUCT__entry(
+               __field(        unsigned char,          flag_w          )
+               __field(        unsigned char,          flag_x          )
+               __field(        unsigned long,          eaddr           )
+               __field(        unsigned long,          hpteg           )
+               __field(        unsigned long,          va              )
+               __field(        unsigned long long,     vpage           )
+               __field(        unsigned long,          hpaddr          )
+       ),
+
+       TP_fast_assign(
+               __entry->flag_w = ((rflags & HPTE_R_PP) == 3) ? '-' : 'w';
+               __entry->flag_x = (rflags & HPTE_R_N) ? '-' : 'x';
+               __entry->eaddr  = orig_pte->eaddr;
+               __entry->hpteg  = hpteg;
+               __entry->va     = va;
+               __entry->vpage  = orig_pte->vpage;
+               __entry->hpaddr = hpaddr;
+       ),
+
+       TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx",
+                 __entry->flag_w, __entry->flag_x, __entry->eaddr,
+                 __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr)
+);
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+TRACE_EVENT(kvm_book3s_mmu_map,
+       TP_PROTO(struct hpte_cache *pte),
+       TP_ARGS(pte),
+
+       TP_STRUCT__entry(
+               __field(        u64,            host_vpn        )
+               __field(        u64,            pfn             )
+               __field(        ulong,          eaddr           )
+               __field(        u64,            vpage           )
+               __field(        ulong,          raddr           )
+               __field(        int,            flags           )
+       ),
+
+       TP_fast_assign(
+               __entry->host_vpn       = pte->host_vpn;
+               __entry->pfn            = pte->pfn;
+               __entry->eaddr          = pte->pte.eaddr;
+               __entry->vpage          = pte->pte.vpage;
+               __entry->raddr          = pte->pte.raddr;
+               __entry->flags          = (pte->pte.may_read ? 0x4 : 0) |
+                                         (pte->pte.may_write ? 0x2 : 0) |
+                                         (pte->pte.may_execute ? 0x1 : 0);
+       ),
+
+       TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+                 __entry->host_vpn, __entry->pfn, __entry->eaddr,
+                 __entry->vpage, __entry->raddr, __entry->flags)
+);
+
+TRACE_EVENT(kvm_book3s_mmu_invalidate,
+       TP_PROTO(struct hpte_cache *pte),
+       TP_ARGS(pte),
+
+       TP_STRUCT__entry(
+               __field(        u64,            host_vpn        )
+               __field(        u64,            pfn             )
+               __field(        ulong,          eaddr           )
+               __field(        u64,            vpage           )
+               __field(        ulong,          raddr           )
+               __field(        int,            flags           )
+       ),
+
+       TP_fast_assign(
+               __entry->host_vpn       = pte->host_vpn;
+               __entry->pfn            = pte->pfn;
+               __entry->eaddr          = pte->pte.eaddr;
+               __entry->vpage          = pte->pte.vpage;
+               __entry->raddr          = pte->pte.raddr;
+               __entry->flags          = (pte->pte.may_read ? 0x4 : 0) |
+                                         (pte->pte.may_write ? 0x2 : 0) |
+                                         (pte->pte.may_execute ? 0x1 : 0);
+       ),
+
+       TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+                 __entry->host_vpn, __entry->pfn, __entry->eaddr,
+                 __entry->vpage, __entry->raddr, __entry->flags)
+);
+
+TRACE_EVENT(kvm_book3s_mmu_flush,
+       TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1,
+                unsigned long long p2),
+       TP_ARGS(type, vcpu, p1, p2),
+
+       TP_STRUCT__entry(
+               __field(        int,                    count           )
+               __field(        unsigned long long,     p1              )
+               __field(        unsigned long long,     p2              )
+               __field(        const char *,           type            )
+       ),
+
+       TP_fast_assign(
+               __entry->count          = to_book3s(vcpu)->hpte_cache_count;
+               __entry->p1             = p1;
+               __entry->p2             = p2;
+               __entry->type           = type;
+       ),
+
+       TP_printk("Flush %d %sPTEs: %llx - %llx",
+                 __entry->count, __entry->type, __entry->p1, __entry->p2)
+);
+
+TRACE_EVENT(kvm_book3s_slb_found,
+       TP_PROTO(unsigned long long gvsid, unsigned long long hvsid),
+       TP_ARGS(gvsid, hvsid),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long long,     gvsid           )
+               __field(        unsigned long long,     hvsid           )
+       ),
+
+       TP_fast_assign(
+               __entry->gvsid          = gvsid;
+               __entry->hvsid          = hvsid;
+       ),
+
+       TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid)
+);
+
+TRACE_EVENT(kvm_book3s_slb_fail,
+       TP_PROTO(u16 sid_map_mask, unsigned long long gvsid),
+       TP_ARGS(sid_map_mask, gvsid),
+
+       TP_STRUCT__entry(
+               __field(        unsigned short,         sid_map_mask    )
+               __field(        unsigned long long,     gvsid           )
+       ),
+
+       TP_fast_assign(
+               __entry->sid_map_mask   = sid_map_mask;
+               __entry->gvsid          = gvsid;
+       ),
+
+       TP_printk("%x/%x: %llx", __entry->sid_map_mask,
+                 SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid)
+);
+
+TRACE_EVENT(kvm_book3s_slb_map,
+       TP_PROTO(u16 sid_map_mask, unsigned long long gvsid,
+                unsigned long long hvsid),
+       TP_ARGS(sid_map_mask, gvsid, hvsid),
+
+       TP_STRUCT__entry(
+               __field(        unsigned short,         sid_map_mask    )
+               __field(        unsigned long long,     guest_vsid      )
+               __field(        unsigned long long,     host_vsid       )
+       ),
+
+       TP_fast_assign(
+               __entry->sid_map_mask   = sid_map_mask;
+               __entry->guest_vsid     = gvsid;
+               __entry->host_vsid      = hvsid;
+       ),
+
+       TP_printk("%x: %llx -> %llx", __entry->sid_map_mask,
+                 __entry->guest_vsid, __entry->host_vsid)
+);
+
+TRACE_EVENT(kvm_book3s_slbmte,
+       TP_PROTO(u64 slb_vsid, u64 slb_esid),
+       TP_ARGS(slb_vsid, slb_esid),
+
+       TP_STRUCT__entry(
+               __field(        u64,    slb_vsid        )
+               __field(        u64,    slb_esid        )
+       ),
+
+       TP_fast_assign(
+               __entry->slb_vsid       = slb_vsid;
+               __entry->slb_esid       = slb_esid;
+       ),
+
+       TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid)
+);
+
+TRACE_EVENT(kvm_exit,
+       TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
+       TP_ARGS(exit_nr, vcpu),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   exit_nr         )
+               __field(        unsigned long,  pc              )
+               __field(        unsigned long,  msr             )
+               __field(        unsigned long,  dar             )
+               __field(        unsigned long,  srr1            )
+               __field(        unsigned long,  last_inst       )
+       ),
+
+       TP_fast_assign(
+               __entry->exit_nr        = exit_nr;
+               __entry->pc             = kvmppc_get_pc(vcpu);
+               __entry->dar            = kvmppc_get_fault_dar(vcpu);
+               __entry->msr            = vcpu->arch.shared->msr;
+               __entry->srr1           = vcpu->arch.shadow_srr1;
+               __entry->last_inst      = vcpu->arch.last_inst;
+       ),
+
+       TP_printk("exit=%s"
+               " | pc=0x%lx"
+               " | msr=0x%lx"
+               " | dar=0x%lx"
+               " | srr1=0x%lx"
+               " | last_inst=0x%lx"
+               ,
+               __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
+               __entry->pc,
+               __entry->msr,
+               __entry->dar,
+               __entry->srr1,
+               __entry->last_inst
+               )
+);
+
+TRACE_EVENT(kvm_unmap_hva,
+       TP_PROTO(unsigned long hva),
+       TP_ARGS(hva),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  hva             )
+       ),
+
+       TP_fast_assign(
+               __entry->hva            = hva;
+       ),
+
+       TP_printk("unmap hva 0x%lx\n", __entry->hva)
+);
+
+#endif /* _TRACE_KVM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
index 167f725..57a0720 100644 (file)
@@ -226,19 +226,35 @@ _GLOBAL(csum_partial)
        blr
 
 
-       .macro source
+       .macro srcnr
 100:
        .section __ex_table,"a"
        .align 3
-       .llong 100b,.Lsrc_error
+       .llong 100b,.Lsrc_error_nr
        .previous
        .endm
 
-       .macro dest
+       .macro source
+150:
+       .section __ex_table,"a"
+       .align 3
+       .llong 150b,.Lsrc_error
+       .previous
+       .endm
+
+       .macro dstnr
 200:
        .section __ex_table,"a"
        .align 3
-       .llong 200b,.Ldest_error
+       .llong 200b,.Ldest_error_nr
+       .previous
+       .endm
+
+       .macro dest
+250:
+       .section __ex_table,"a"
+       .align 3
+       .llong 250b,.Ldest_error
        .previous
        .endm
 
@@ -269,16 +285,16 @@ _GLOBAL(csum_partial_copy_generic)
        rldicl. r6,r3,64-1,64-2         /* r6 = (r3 & 0x3) >> 1 */
        beq     .Lcopy_aligned
 
-       li      r7,4
-       sub     r6,r7,r6
+       li      r9,4
+       sub     r6,r9,r6
        mtctr   r6
 
 1:
-source;        lhz     r6,0(r3)                /* align to doubleword */
+srcnr; lhz     r6,0(r3)                /* align to doubleword */
        subi    r5,r5,2
        addi    r3,r3,2
        adde    r0,r0,r6
-dest;  sth     r6,0(r4)
+dstnr; sth     r6,0(r4)
        addi    r4,r4,2
        bdnz    1b
 
@@ -392,10 +408,10 @@ dest;     std     r16,56(r4)
 
        mtctr   r6
 3:
-source;        ld      r6,0(r3)
+srcnr; ld      r6,0(r3)
        addi    r3,r3,8
        adde    r0,r0,r6
-dest;  std     r6,0(r4)
+dstnr; std     r6,0(r4)
        addi    r4,r4,8
        bdnz    3b
 
@@ -405,10 +421,10 @@ dest;     std     r6,0(r4)
        srdi.   r6,r5,2
        beq     .Lcopy_tail_halfword
 
-source;        lwz     r6,0(r3)
+srcnr; lwz     r6,0(r3)
        addi    r3,r3,4
        adde    r0,r0,r6
-dest;  stw     r6,0(r4)
+dstnr; stw     r6,0(r4)
        addi    r4,r4,4
        subi    r5,r5,4
 
@@ -416,10 +432,10 @@ dest;     stw     r6,0(r4)
        srdi.   r6,r5,1
        beq     .Lcopy_tail_byte
 
-source;        lhz     r6,0(r3)
+srcnr; lhz     r6,0(r3)
        addi    r3,r3,2
        adde    r0,r0,r6
-dest;  sth     r6,0(r4)
+dstnr; sth     r6,0(r4)
        addi    r4,r4,2
        subi    r5,r5,2
 
@@ -427,10 +443,10 @@ dest;     sth     r6,0(r4)
        andi.   r6,r5,1
        beq     .Lcopy_finish
 
-source;        lbz     r6,0(r3)
+srcnr; lbz     r6,0(r3)
        sldi    r9,r6,8                 /* Pad the byte out to 16 bits */
        adde    r0,r0,r9
-dest;  stb     r6,0(r4)
+dstnr; stb     r6,0(r4)
 
 .Lcopy_finish:
        addze   r0,r0                   /* add in final carry */
@@ -440,6 +456,11 @@ dest;      stb     r6,0(r4)
        blr
 
 .Lsrc_error:
+       ld      r14,STK_REG(R14)(r1)
+       ld      r15,STK_REG(R15)(r1)
+       ld      r16,STK_REG(R16)(r1)
+       addi    r1,r1,STACKFRAMESIZE
+.Lsrc_error_nr:
        cmpdi   0,r7,0
        beqlr
        li      r6,-EFAULT
@@ -447,6 +468,11 @@ dest;      stb     r6,0(r4)
        blr
 
 .Ldest_error:
+       ld      r14,STK_REG(R14)(r1)
+       ld      r15,STK_REG(R15)(r1)
+       ld      r16,STK_REG(R16)(r1)
+       addi    r1,r1,STACKFRAMESIZE
+.Ldest_error_nr:
        cmpdi   0,r8,0
        beqlr
        li      r6,-EFAULT
index a7ee978..b1faa15 100644 (file)
@@ -1505,6 +1505,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
                 */
                if ((ra == 1) && !(regs->msr & MSR_PR) \
                        && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) {
+#ifdef CONFIG_PPC32
                        /*
                         * Check if we will touch kernel sack overflow
                         */
@@ -1513,7 +1514,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
                                err = -EINVAL;
                                break;
                        }
-
+#endif /* CONFIG_PPC32 */
                        /*
                         * Check if we already set since that means we'll
                         * lose the previous value.
index d0cd9e4..8ed035d 100644 (file)
@@ -300,5 +300,9 @@ void vmemmap_free(unsigned long start, unsigned long end)
 {
 }
 
+void register_page_bootmem_memmap(unsigned long section_nr,
+                                 struct page *start_page, unsigned long size)
+{
+}
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
index 1cf9c5b..3fa93dc 100644 (file)
@@ -297,12 +297,21 @@ void __init paging_init(void)
 }
 #endif /* ! CONFIG_NEED_MULTIPLE_NODES */
 
+static void __init register_page_bootmem_info(void)
+{
+       int i;
+
+       for_each_online_node(i)
+               register_page_bootmem_info_node(NODE_DATA(i));
+}
+
 void __init mem_init(void)
 {
 #ifdef CONFIG_SWIOTLB
        swiotlb_init(0);
 #endif
 
+       register_page_bootmem_info();
        high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
        set_max_mapnr(max_pfn);
        free_all_bootmem();
index 2ee4a70..a3f7abd 100644 (file)
 #define MMCR1_UNIT_SHIFT(pmc)          (60 - (4 * ((pmc) - 1)))
 #define MMCR1_COMBINE_SHIFT(pmc)       (35 - ((pmc) - 1))
 #define MMCR1_PMCSEL_SHIFT(pmc)                (24 - (((pmc) - 1)) * 8)
+#define MMCR1_FAB_SHIFT                        36
 #define MMCR1_DC_QUAL_SHIFT            47
 #define MMCR1_IC_QUAL_SHIFT            46
 
@@ -388,8 +389,8 @@ static int power8_compute_mmcr(u64 event[], int n_ev,
                 * the threshold bits are used for the match value.
                 */
                if (event_is_fab_match(event[i])) {
-                       mmcr1 |= (event[i] >> EVENT_THR_CTL_SHIFT) &
-                                 EVENT_THR_CTL_MASK;
+                       mmcr1 |= ((event[i] >> EVENT_THR_CTL_SHIFT) &
+                                 EVENT_THR_CTL_MASK) << MMCR1_FAB_SHIFT;
                } else {
                        val = (event[i] >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK;
                        mmcra |= val << MMCRA_THR_CTL_SHIFT;
index 1c1771a..24f58cb 100644 (file)
@@ -233,18 +233,24 @@ static void __init smp_init_pseries(void)
 
        alloc_bootmem_cpumask_var(&of_spin_mask);
 
-       /* Mark threads which are still spinning in hold loops. */
-       if (cpu_has_feature(CPU_FTR_SMT)) {
-               for_each_present_cpu(i) { 
-                       if (cpu_thread_in_core(i) == 0)
-                               cpumask_set_cpu(i, of_spin_mask);
-               }
-       } else {
-               cpumask_copy(of_spin_mask, cpu_present_mask);
+       /*
+        * Mark threads which are still spinning in hold loops
+        *
+        * We know prom_init will not have started them if RTAS supports
+        * query-cpu-stopped-state.
+        */
+       if (rtas_token("query-cpu-stopped-state") == RTAS_UNKNOWN_SERVICE) {
+               if (cpu_has_feature(CPU_FTR_SMT)) {
+                       for_each_present_cpu(i) {
+                               if (cpu_thread_in_core(i) == 0)
+                                       cpumask_set_cpu(i, of_spin_mask);
+                       }
+               } else
+                       cpumask_copy(of_spin_mask, cpu_present_mask);
+
+               cpumask_clear_cpu(boot_cpuid, of_spin_mask);
        }
 
-       cpumask_clear_cpu(boot_cpuid, of_spin_mask);
-
        /* Non-lpar has additional take/give timebase */
        if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
                smp_ops->give_timebase = rtas_give_timebase;
index dcc6ac2..7143793 100644 (file)
@@ -93,6 +93,7 @@ config S390
        select ARCH_INLINE_WRITE_UNLOCK_IRQ
        select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
        select ARCH_SAVE_PAGE_KEYS if HIBERNATION
+       select ARCH_USE_CMPXCHG_LOCKREF
        select ARCH_WANT_IPC_PARSE_VERSION
        select BUILDTIME_EXTABLE_SORT
        select CLONE_BACKWARDS2
@@ -102,7 +103,6 @@ config S390
        select GENERIC_TIME_VSYSCALL_OLD
        select HAVE_ALIGNED_STRUCT_PAGE if SLUB
        select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
-       select HAVE_ARCH_MUTEX_CPU_RELAX
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_TRACEHOOK
        select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT
index 6c32190..346b1c8 100644 (file)
@@ -15,7 +15,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-       asm goto("0:    brcl 0,0\n"
+       asm_volatile_goto("0:   brcl 0,0\n"
                ".pushsection __jump_table, \"aw\"\n"
                ASM_ALIGN "\n"
                ASM_PTR " 0b, %l[label], %0\n"
index e87ecaa..d5bc375 100644 (file)
@@ -38,13 +38,6 @@ struct sca_block {
        struct sca_entry cpu[64];
 } __attribute__((packed));
 
-#define KVM_NR_PAGE_SIZES 2
-#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 8)
-#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
-#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
-#define KVM_HPAGE_MASK(x)      (~(KVM_HPAGE_SIZE(x) - 1))
-#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
-
 #define CPUSTAT_STOPPED    0x80000000
 #define CPUSTAT_WAIT       0x10000000
 #define CPUSTAT_ECALL_PEND 0x08000000
@@ -220,7 +213,6 @@ struct kvm_s390_interrupt_info {
 /* for local_interrupt.action_flags */
 #define ACTION_STORE_ON_STOP           (1<<0)
 #define ACTION_STOP_ON_STOP            (1<<1)
-#define ACTION_RELOADVCPU_ON_STOP      (1<<2)
 
 struct kvm_s390_local_interrupt {
        spinlock_t lock;
index 688271f..458c1f7 100644 (file)
@@ -7,5 +7,3 @@
  */
 
 #include <asm-generic/mutex-dec.h>
-
-#define arch_mutex_cpu_relax() barrier()
index 0eb3750..ca7821f 100644 (file)
@@ -198,6 +198,8 @@ static inline void cpu_relax(void)
        barrier();
 }
 
+#define arch_mutex_cpu_relax()  barrier()
+
 static inline void psw_set_key(unsigned int key)
 {
        asm volatile("spka 0(%0)" : : "d" (key));
index 701fe8c..83e5d21 100644 (file)
@@ -44,6 +44,11 @@ extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
 extern int arch_spin_trylock_retry(arch_spinlock_t *);
 extern void arch_spin_relax(arch_spinlock_t *lock);
 
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+       return lock.owner_cpu == 0;
+}
+
 static inline void arch_spin_lock(arch_spinlock_t *lp)
 {
        int old;
index c84f33d..7dd2172 100644 (file)
@@ -40,28 +40,26 @@ static inline void *load_real_addr(void *addr)
 }
 
 /*
- * Copy up to one page to vmalloc or real memory
+ * Copy real to virtual or real memory
  */
-static ssize_t copy_page_real(void *buf, void *src, size_t csize)
+static int copy_from_realmem(void *dest, void *src, size_t count)
 {
-       size_t size;
+       unsigned long size;
+       int rc;
 
-       if (is_vmalloc_addr(buf)) {
-               BUG_ON(csize >= PAGE_SIZE);
-               /* If buf is not page aligned, copy first part */
-               size = min(roundup(__pa(buf), PAGE_SIZE) - __pa(buf), csize);
-               if (size) {
-                       if (memcpy_real(load_real_addr(buf), src, size))
-                               return -EFAULT;
-                       buf += size;
-                       src += size;
-               }
-               /* Copy second part */
-               size = csize - size;
-               return (size) ? memcpy_real(load_real_addr(buf), src, size) : 0;
-       } else {
-               return memcpy_real(buf, src, csize);
-       }
+       if (!count)
+               return 0;
+       if (!is_vmalloc_or_module_addr(dest))
+               return memcpy_real(dest, src, count);
+       do {
+               size = min(count, PAGE_SIZE - (__pa(dest) & ~PAGE_MASK));
+               if (memcpy_real(load_real_addr(dest), src, size))
+                       return -EFAULT;
+               count -= size;
+               dest += size;
+               src += size;
+       } while (count);
+       return 0;
 }
 
 /*
@@ -114,7 +112,7 @@ static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize,
                rc = copy_to_user_real((void __force __user *) buf,
                                       (void *) src, csize);
        else
-               rc = copy_page_real(buf, (void *) src, csize);
+               rc = copy_from_realmem(buf, (void *) src, csize);
        return (rc == 0) ? rc : csize;
 }
 
@@ -210,7 +208,7 @@ int copy_from_oldmem(void *dest, void *src, size_t count)
        if (OLDMEM_BASE) {
                if ((unsigned long) src < OLDMEM_SIZE) {
                        copied = min(count, OLDMEM_SIZE - (unsigned long) src);
-                       rc = memcpy_real(dest, src + OLDMEM_BASE, copied);
+                       rc = copy_from_realmem(dest, src + OLDMEM_BASE, copied);
                        if (rc)
                                return rc;
                }
@@ -223,7 +221,7 @@ int copy_from_oldmem(void *dest, void *src, size_t count)
                                return rc;
                }
        }
-       return memcpy_real(dest + copied, src + copied, count - copied);
+       return copy_from_realmem(dest + copied, src + copied, count - copied);
 }
 
 /*
index cc30d1f..0dc2b6d 100644 (file)
@@ -266,6 +266,7 @@ sysc_sigpending:
        tm      __TI_flags+3(%r12),_TIF_SYSCALL
        jno     sysc_return
        lm      %r2,%r7,__PT_R2(%r11)   # load svc arguments
+       l       %r10,__TI_sysc_table(%r12)      # 31 bit system call table
        xr      %r8,%r8                 # svc 0 returns -ENOSYS
        clc     __PT_INT_CODE+2(2,%r11),BASED(.Lnr_syscalls+2)
        jnl     sysc_nr_ok              # invalid svc number -> do svc 0
index 2b2188b..e5b43c9 100644 (file)
@@ -297,6 +297,7 @@ sysc_sigpending:
        tm      __TI_flags+7(%r12),_TIF_SYSCALL
        jno     sysc_return
        lmg     %r2,%r7,__PT_R2(%r11)   # load svc arguments
+       lg      %r10,__TI_sysc_table(%r12)      # address of system call table
        lghi    %r8,0                   # svc 0 returns -ENOSYS
        llgh    %r1,__PT_INT_CODE+2(%r11)       # load new svc number
        cghi    %r1,NR_syscalls
index 0ce9fb2..d86e64e 100644 (file)
@@ -67,6 +67,11 @@ static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn)
        case 0xac:      /* stnsm */
        case 0xad:      /* stosm */
                return -EINVAL;
+       case 0xc6:
+               switch (insn[0] & 0x0f) {
+               case 0x00: /* exrl   */
+                       return -EINVAL;
+               }
        }
        switch (insn[0]) {
        case 0x0101:    /* pr    */
@@ -180,7 +185,6 @@ static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn)
                break;
        case 0xc6:
                switch (insn[0] & 0x0f) {
-               case 0x00: /* exrl   */
                case 0x02: /* pfdrl  */
                case 0x04: /* cghrl  */
                case 0x05: /* chrl   */
index 3a74d8a..78d967f 100644 (file)
@@ -107,14 +107,13 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
 
 static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 {
-       int ret, idx;
+       int ret;
 
        /* No virtio-ccw notification? Get out quickly. */
        if (!vcpu->kvm->arch.css_support ||
            (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
                return -EOPNOTSUPP;
 
-       idx = srcu_read_lock(&vcpu->kvm->srcu);
        /*
         * The layout is as follows:
         * - gpr 2 contains the subchannel id (passed as addr)
@@ -125,7 +124,6 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
                                      vcpu->run->s.regs.gprs[2],
                                      8, &vcpu->run->s.regs.gprs[3],
                                      vcpu->run->s.regs.gprs[4]);
-       srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
        /*
         * Return cookie in gpr 2, but don't overwrite the register if the
index 99d789e..374a439 100644 (file)
 #include <asm/uaccess.h>
 #include "kvm-s390.h"
 
+/* Convert real to absolute address by applying the prefix of the CPU */
+static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
+                                                unsigned long gaddr)
+{
+       unsigned long prefix  = vcpu->arch.sie_block->prefix;
+       if (gaddr < 2 * PAGE_SIZE)
+               gaddr += prefix;
+       else if (gaddr >= prefix && gaddr < prefix + 2 * PAGE_SIZE)
+               gaddr -= prefix;
+       return gaddr;
+}
+
 static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
                                          void __user *gptr,
                                          int prefixing)
 {
-       unsigned long prefix  = vcpu->arch.sie_block->prefix;
        unsigned long gaddr = (unsigned long) gptr;
        unsigned long uaddr;
 
-       if (prefixing) {
-               if (gaddr < 2 * PAGE_SIZE)
-                       gaddr += prefix;
-               else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE))
-                       gaddr -= prefix;
-       }
+       if (prefixing)
+               gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
        uaddr = gmap_fault(gaddr, vcpu->arch.gmap);
        if (IS_ERR_VALUE(uaddr))
                uaddr = -EFAULT;
index 5ee56e5..5ddbbde 100644 (file)
@@ -62,12 +62,6 @@ static int handle_stop(struct kvm_vcpu *vcpu)
 
        trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
 
-       if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) {
-               vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP;
-               rc = SIE_INTERCEPT_RERUNVCPU;
-               vcpu->run->exit_reason = KVM_EXIT_INTR;
-       }
-
        if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
                atomic_set_mask(CPUSTAT_STOPPED,
                                &vcpu->arch.sie_block->cpuflags);
index 7f35cb3..e7323cd 100644 (file)
@@ -436,6 +436,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
        hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
        VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
 no_timer:
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
        spin_lock(&vcpu->arch.local_int.float_int->lock);
        spin_lock_bh(&vcpu->arch.local_int.lock);
        add_wait_queue(&vcpu->wq, &wait);
@@ -455,6 +456,8 @@ no_timer:
        remove_wait_queue(&vcpu->wq, &wait);
        spin_unlock_bh(&vcpu->arch.local_int.lock);
        spin_unlock(&vcpu->arch.local_int.float_int->lock);
+       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
        hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
        return 0;
 }
index 776dafe..bedda67 100644 (file)
@@ -689,9 +689,9 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-static int __vcpu_run(struct kvm_vcpu *vcpu)
+static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 {
-       int rc;
+       int rc, cpuflags;
 
        memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
 
@@ -709,28 +709,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                return rc;
 
        vcpu->arch.sie_block->icptcode = 0;
-       VCPU_EVENT(vcpu, 6, "entering sie flags %x",
-                  atomic_read(&vcpu->arch.sie_block->cpuflags));
-       trace_kvm_s390_sie_enter(vcpu,
-                                atomic_read(&vcpu->arch.sie_block->cpuflags));
+       cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
+       VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
+       trace_kvm_s390_sie_enter(vcpu, cpuflags);
 
-       /*
-        * As PF_VCPU will be used in fault handler, between guest_enter
-        * and guest_exit should be no uaccess.
-        */
-       preempt_disable();
-       kvm_guest_enter();
-       preempt_enable();
-       rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
-       kvm_guest_exit();
+       return 0;
+}
+
+static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
+{
+       int rc;
 
        VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
                   vcpu->arch.sie_block->icptcode);
        trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
 
-       if (rc > 0)
+       if (exit_reason >= 0) {
                rc = 0;
-       if (rc < 0) {
+       } else {
                if (kvm_is_ucontrol(vcpu->kvm)) {
                        rc = SIE_INTERCEPT_UCONTROL;
                } else {
@@ -741,6 +737,49 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
        }
 
        memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
+
+       if (rc == 0) {
+               if (kvm_is_ucontrol(vcpu->kvm))
+                       rc = -EOPNOTSUPP;
+               else
+                       rc = kvm_handle_sie_intercept(vcpu);
+       }
+
+       return rc;
+}
+
+static int __vcpu_run(struct kvm_vcpu *vcpu)
+{
+       int rc, exit_reason;
+
+       /*
+        * We try to hold kvm->srcu during most of vcpu_run (except when run-
+        * ning the guest), so that memslots (and other stuff) are protected
+        */
+       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+       do {
+               rc = vcpu_pre_run(vcpu);
+               if (rc)
+                       break;
+
+               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+               /*
+                * As PF_VCPU will be used in fault handler, between
+                * guest_enter and guest_exit should be no uaccess.
+                */
+               preempt_disable();
+               kvm_guest_enter();
+               preempt_enable();
+               exit_reason = sie64a(vcpu->arch.sie_block,
+                                    vcpu->run->s.regs.gprs);
+               kvm_guest_exit();
+               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+               rc = vcpu_post_run(vcpu, exit_reason);
+       } while (!signal_pending(current) && !rc);
+
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
        return rc;
 }
 
@@ -749,7 +788,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        int rc;
        sigset_t sigsaved;
 
-rerun_vcpu:
        if (vcpu->sigset_active)
                sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
@@ -782,19 +820,7 @@ rerun_vcpu:
        }
 
        might_fault();
-
-       do {
-               rc = __vcpu_run(vcpu);
-               if (rc)
-                       break;
-               if (kvm_is_ucontrol(vcpu->kvm))
-                       rc = -EOPNOTSUPP;
-               else
-                       rc = kvm_handle_sie_intercept(vcpu);
-       } while (!signal_pending(current) && !rc);
-
-       if (rc == SIE_INTERCEPT_RERUNVCPU)
-               goto rerun_vcpu;
+       rc = __vcpu_run(vcpu);
 
        if (signal_pending(current) && !rc) {
                kvm_run->exit_reason = KVM_EXIT_INTR;
@@ -951,6 +977,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 {
        struct kvm_vcpu *vcpu = filp->private_data;
        void __user *argp = (void __user *)arg;
+       int idx;
        long r;
 
        switch (ioctl) {
@@ -964,7 +991,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                break;
        }
        case KVM_S390_STORE_STATUS:
+               idx = srcu_read_lock(&vcpu->kvm->srcu);
                r = kvm_s390_vcpu_store_status(vcpu, arg);
+               srcu_read_unlock(&vcpu->kvm->srcu, idx);
                break;
        case KVM_S390_SET_INITIAL_PSW: {
                psw_t psw;
@@ -1060,12 +1089,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
        return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
                           struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+                           unsigned long npages)
 {
        return 0;
 }
index dc99f1c..b44912a 100644 (file)
@@ -28,8 +28,7 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
 extern unsigned long *vfacilities;
 
 /* negativ values are error codes, positive values for internal conditions */
-#define SIE_INTERCEPT_RERUNVCPU                (1<<0)
-#define SIE_INTERCEPT_UCONTROL         (1<<1)
+#define SIE_INTERCEPT_UCONTROL         (1<<0)
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
 
 #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
@@ -91,8 +90,10 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
 
 static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
 {
-       *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20;
-       *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
+       if (r1)
+               *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20;
+       if (r2)
+               *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
 }
 
 static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
index 59200ee..2440602 100644 (file)
 #include "kvm-s390.h"
 #include "trace.h"
 
+/* Handle SCK (SET CLOCK) interception */
+static int handle_set_clock(struct kvm_vcpu *vcpu)
+{
+       struct kvm_vcpu *cpup;
+       s64 hostclk, val;
+       u64 op2;
+       int i;
+
+       if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+               return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+       op2 = kvm_s390_get_base_disp_s(vcpu);
+       if (op2 & 7)    /* Operand must be on a doubleword boundary */
+               return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+       if (get_guest(vcpu, val, (u64 __user *) op2))
+               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+       if (store_tod_clock(&hostclk)) {
+               kvm_s390_set_psw_cc(vcpu, 3);
+               return 0;
+       }
+       val = (val - hostclk) & ~0x3fUL;
+
+       mutex_lock(&vcpu->kvm->lock);
+       kvm_for_each_vcpu(i, cpup, vcpu->kvm)
+               cpup->arch.sie_block->epoch = val;
+       mutex_unlock(&vcpu->kvm->lock);
+
+       kvm_s390_set_psw_cc(vcpu, 0);
+       return 0;
+}
+
 static int handle_set_prefix(struct kvm_vcpu *vcpu)
 {
        u64 operand2;
@@ -128,6 +160,33 @@ static int handle_skey(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+static int handle_test_block(struct kvm_vcpu *vcpu)
+{
+       unsigned long hva;
+       gpa_t addr;
+       int reg2;
+
+       if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+               return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+       kvm_s390_get_regs_rre(vcpu, NULL, &reg2);
+       addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+       addr = kvm_s390_real_to_abs(vcpu, addr);
+
+       hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
+       if (kvm_is_error_hva(hva))
+               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+       /*
+        * We don't expect errors on modern systems, and do not care
+        * about storage keys (yet), so let's just clear the page.
+        */
+       if (clear_user((void __user *)hva, PAGE_SIZE) != 0)
+               return -EFAULT;
+       kvm_s390_set_psw_cc(vcpu, 0);
+       vcpu->run->s.regs.gprs[0] = 0;
+       return 0;
+}
+
 static int handle_tpi(struct kvm_vcpu *vcpu)
 {
        struct kvm_s390_interrupt_info *inti;
@@ -438,12 +497,14 @@ out_exception:
 
 static const intercept_handler_t b2_handlers[256] = {
        [0x02] = handle_stidp,
+       [0x04] = handle_set_clock,
        [0x10] = handle_set_prefix,
        [0x11] = handle_store_prefix,
        [0x12] = handle_store_cpu_address,
        [0x29] = handle_skey,
        [0x2a] = handle_skey,
        [0x2b] = handle_skey,
+       [0x2c] = handle_test_block,
        [0x30] = handle_io_inst,
        [0x31] = handle_io_inst,
        [0x32] = handle_io_inst,
index a1be70d..305f7ee 100644 (file)
@@ -2,6 +2,7 @@ menu "Machine selection"
 
 config SCORE
        def_bool y
+       select HAVE_GENERIC_HARDIRQS
        select GENERIC_IRQ_SHOW
        select GENERIC_IOMAP
        select GENERIC_ATOMIC64
@@ -110,3 +111,6 @@ source "security/Kconfig"
 source "crypto/Kconfig"
 
 source "lib/Kconfig"
+
+config NO_IOMEM
+       def_bool y
index 974aefe..9e3e060 100644 (file)
@@ -20,8 +20,8 @@ cflags-y += -G0 -pipe -mel -mnhwloop -D__SCOREEL__ \
 #
 KBUILD_AFLAGS += $(cflags-y)
 KBUILD_CFLAGS += $(cflags-y)
-KBUILD_AFLAGS_MODULE += -mlong-calls
-KBUILD_CFLAGS_MODULE += -mlong-calls
+KBUILD_AFLAGS_MODULE +=
+KBUILD_CFLAGS_MODULE +=
 LDFLAGS += --oformat elf32-littlescore
 LDFLAGS_vmlinux        += -G0 -static -nostdlib
 
index f909ac3..961bd64 100644 (file)
@@ -184,48 +184,57 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
                                __wsum sum)
 {
        __asm__ __volatile__(
-               ".set\tnoreorder\t\t\t# csum_ipv6_magic\n\t"
-               ".set\tnoat\n\t"
-               "addu\t%0, %5\t\t\t# proto (long in network byte order)\n\t"
-               "sltu\t$1, %0, %5\n\t"
-               "addu\t%0, $1\n\t"
-               "addu\t%0, %6\t\t\t# csum\n\t"
-               "sltu\t$1, %0, %6\n\t"
-               "lw\t%1, 0(%2)\t\t\t# four words source address\n\t"
-               "addu\t%0, $1\n\t"
-               "addu\t%0, %1\n\t"
-               "sltu\t$1, %0, %1\n\t"
-               "lw\t%1, 4(%2)\n\t"
-               "addu\t%0, $1\n\t"
-               "addu\t%0, %1\n\t"
-               "sltu\t$1, %0, %1\n\t"
-               "lw\t%1, 8(%2)\n\t"
-               "addu\t%0, $1\n\t"
-               "addu\t%0, %1\n\t"
-               "sltu\t$1, %0, %1\n\t"
-               "lw\t%1, 12(%2)\n\t"
-               "addu\t%0, $1\n\t"
-               "addu\t%0, %1\n\t"
-               "sltu\t$1, %0, %1\n\t"
-               "lw\t%1, 0(%3)\n\t"
-               "addu\t%0, $1\n\t"
-               "addu\t%0, %1\n\t"
-               "sltu\t$1, %0, %1\n\t"
-               "lw\t%1, 4(%3)\n\t"
-               "addu\t%0, $1\n\t"
-               "addu\t%0, %1\n\t"
-               "sltu\t$1, %0, %1\n\t"
-               "lw\t%1, 8(%3)\n\t"
-               "addu\t%0, $1\n\t"
-               "addu\t%0, %1\n\t"
-               "sltu\t$1, %0, %1\n\t"
-               "lw\t%1, 12(%3)\n\t"
-               "addu\t%0, $1\n\t"
-               "addu\t%0, %1\n\t"
-               "sltu\t$1, %0, %1\n\t"
-               "addu\t%0, $1\t\t\t# Add final carry\n\t"
-               ".set\tnoat\n\t"
-               ".set\tnoreorder"
+               ".set\tvolatile\t\t\t# csum_ipv6_magic\n\t"
+               "add\t%0, %0, %5\t\t\t# proto (long in network byte order)\n\t"
+               "cmp.c\t%5, %0\n\t"
+               "bleu 1f\n\t"
+               "addi\t%0, 0x1\n\t"
+               "1:add\t%0, %0, %6\t\t\t# csum\n\t"
+               "cmp.c\t%6, %0\n\t"
+               "lw\t%1, [%2, 0]\t\t\t# four words source address\n\t"
+               "bleu 1f\n\t"
+               "addi\t%0, 0x1\n\t"
+               "1:add\t%0, %0, %1\n\t"
+               "cmp.c\t%1, %0\n\t"
+               "1:lw\t%1, [%2, 4]\n\t"
+               "bleu 1f\n\t"
+               "addi\t%0, 0x1\n\t"
+               "1:add\t%0, %0, %1\n\t"
+               "cmp.c\t%1, %0\n\t"
+               "lw\t%1, [%2,8]\n\t"
+               "bleu 1f\n\t"
+               "addi\t%0, 0x1\n\t"
+               "1:add\t%0, %0, %1\n\t"
+               "cmp.c\t%1, %0\n\t"
+               "lw\t%1, [%2, 12]\n\t"
+               "bleu 1f\n\t"
+               "addi\t%0, 0x1\n\t"
+               "1:add\t%0, %0,%1\n\t"
+               "cmp.c\t%1, %0\n\t"
+               "lw\t%1, [%3, 0]\n\t"
+               "bleu 1f\n\t"
+               "addi\t%0, 0x1\n\t"
+               "1:add\t%0, %0, %1\n\t"
+               "cmp.c\t%1, %0\n\t"
+               "lw\t%1, [%3, 4]\n\t"
+               "bleu 1f\n\t"
+               "addi\t%0, 0x1\n\t"
+               "1:add\t%0, %0, %1\n\t"
+               "cmp.c\t%1, %0\n\t"
+               "lw\t%1, [%3, 8]\n\t"
+               "bleu 1f\n\t"
+               "addi\t%0, 0x1\n\t"
+               "1:add\t%0, %0, %1\n\t"
+               "cmp.c\t%1, %0\n\t"
+               "lw\t%1, [%3, 12]\n\t"
+               "bleu 1f\n\t"
+               "addi\t%0, 0x1\n\t"
+               "1:add\t%0, %0, %1\n\t"
+               "cmp.c\t%1, %0\n\t"
+               "bleu 1f\n\t"
+               "addi\t%0, 0x1\n\t"
+               "1:\n\t"
+               ".set\toptimize"
                : "=r" (sum), "=r" (proto)
                : "r" (saddr), "r" (daddr),
                  "0" (htonl(len)), "1" (htonl(proto)), "r" (sum));
index fbbfd71..574c882 100644 (file)
@@ -5,5 +5,4 @@
 
 #define virt_to_bus    virt_to_phys
 #define bus_to_virt    phys_to_virt
-
 #endif /* _ASM_SCORE_IO_H */
index 059a61b..716b3fd 100644 (file)
@@ -2,7 +2,7 @@
 #define _ASM_SCORE_PGALLOC_H
 
 #include <linux/mm.h>
-
+#include <linux/highmem.h>
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
        pte_t *pte)
 {
index 7234ed0..befb87d 100644 (file)
@@ -264,7 +264,7 @@ resume_kernel:
        disable_irq
        lw      r8, [r28, TI_PRE_COUNT]
        cmpz.c  r8
-       bne     r8, restore_all
+       bne     restore_all
 need_resched:
        lw      r8, [r28, TI_FLAGS]
        andri.c r9, r8, _TIF_NEED_RESCHED
@@ -415,7 +415,7 @@ ENTRY(handle_sys)
        sw      r9, [r0, PT_EPC]
 
        cmpi.c  r27, __NR_syscalls      # check syscall number
-       bgeu    illegal_syscall
+       bcs     illegal_syscall
 
        slli    r8, r27, 2              # get syscall routine
        la      r11, sys_call_table
index f4c6d02..a1519ad 100644 (file)
@@ -78,8 +78,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
        p->thread.reg0 = (unsigned long) childregs;
        if (unlikely(p->flags & PF_KTHREAD)) {
                memset(childregs, 0, sizeof(struct pt_regs));
-               p->thread->reg12 = usp;
-               p->thread->reg13 = arg;
+               p->thread.reg12 = usp;
+               p->thread.reg13 = arg;
                p->thread.reg3 = (unsigned long) ret_from_kernel_thread;
        } else {
                *childregs = *current_pt_regs();
index 2137ad6..78c4fdb 100644 (file)
@@ -506,12 +506,17 @@ config SUN_OPENPROMFS
          Only choose N if you know in advance that you will not need to modify
          OpenPROM settings on the running system.
 
-# Makefile helper
+# Makefile helpers
 config SPARC64_PCI
        bool
        default y
        depends on SPARC64 && PCI
 
+config SPARC64_PCI_MSI
+       bool
+       default y
+       depends on SPARC64_PCI && PCI_MSI
+
 endmenu
 
 menu "Executable file formats"
index e204f90..7c90c50 100644 (file)
@@ -254,7 +254,7 @@ static int sun_fd_request_irq(void)
                once = 1;
 
                error = request_irq(FLOPPY_IRQ, sparc_floppy_irq,
-                                   IRQF_DISABLED, "floppy", NULL);
+                                   0, "floppy", NULL);
 
                return ((error == 0) ? 0 : -1);
        }
index 5080d16..ec2e2e2 100644 (file)
@@ -9,7 +9,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-               asm goto("1:\n\t"
+               asm_volatile_goto("1:\n\t"
                         "nop\n\t"
                         "nop\n\t"
                         ".pushsection __jump_table,  \"aw\"\n\t"
index d432fb2..d15cc17 100644 (file)
@@ -1,3 +1,4 @@
+
 #
 # Makefile for the linux kernel.
 #
@@ -99,7 +100,7 @@ obj-$(CONFIG_STACKTRACE)     += stacktrace.o
 obj-$(CONFIG_SPARC64_PCI)    += pci.o pci_common.o psycho_common.o
 obj-$(CONFIG_SPARC64_PCI)    += pci_psycho.o pci_sabre.o pci_schizo.o
 obj-$(CONFIG_SPARC64_PCI)    += pci_sun4v.o pci_sun4v_asm.o pci_fire.o
-obj-$(CONFIG_PCI_MSI)        += pci_msi.o
+obj-$(CONFIG_SPARC64_PCI_MSI) += pci_msi.o
 
 obj-$(CONFIG_COMPAT)         += sys32.o sys_sparc32.o signal32.o
 
index 62d6b15..dff60ab 100644 (file)
@@ -849,9 +849,8 @@ void ldom_reboot(const char *boot_command)
        if (boot_command && strlen(boot_command)) {
                unsigned long len;
 
-               strcpy(full_boot_str, "boot ");
-               strlcpy(full_boot_str + strlen("boot "), boot_command,
-                       sizeof(full_boot_str + strlen("boot ")));
+               snprintf(full_boot_str, sizeof(full_boot_str), "boot %s",
+                        boot_command);
                len = strlen(full_boot_str);
 
                if (reboot_data_supported) {
index 54df554..e01d75d 100644 (file)
@@ -1249,12 +1249,12 @@ int ldc_bind(struct ldc_channel *lp, const char *name)
        snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
        snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
 
-       err = request_irq(lp->cfg.rx_irq, ldc_rx, IRQF_DISABLED,
+       err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
                          lp->rx_irq_name, lp);
        if (err)
                return err;
 
-       err = request_irq(lp->cfg.tx_irq, ldc_tx, IRQF_DISABLED,
+       err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
                          lp->tx_irq_name, lp);
        if (err) {
                free_irq(lp->cfg.rx_irq, lp);
index d385eaa..7097984 100644 (file)
@@ -166,7 +166,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
  *
  * Atomically sets @v to @i and returns old @v
  */
-static inline u64 atomic64_xchg(atomic64_t *v, u64 n)
+static inline long long atomic64_xchg(atomic64_t *v, long long n)
 {
        return xchg64(&v->counter, n);
 }
@@ -180,7 +180,8 @@ static inline u64 atomic64_xchg(atomic64_t *v, u64 n)
  * Atomically checks if @v holds @o and replaces it with @n if so.
  * Returns the old value at @v.
  */
-static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
+static inline long long atomic64_cmpxchg(atomic64_t *v, long long o,
+                                       long long n)
 {
        return cmpxchg64(&v->counter, o, n);
 }
index 0d0395b..1ad4a1f 100644 (file)
@@ -80,7 +80,7 @@ static inline void atomic_set(atomic_t *v, int n)
 /* A 64bit atomic type */
 
 typedef struct {
-       u64 __aligned(8) counter;
+       long long counter;
 } atomic64_t;
 
 #define ATOMIC64_INIT(val) { (val) }
@@ -91,14 +91,14 @@ typedef struct {
  *
  * Atomically reads the value of @v.
  */
-static inline u64 atomic64_read(const atomic64_t *v)
+static inline long long atomic64_read(const atomic64_t *v)
 {
        /*
         * Requires an atomic op to read both 32-bit parts consistently.
         * Casting away const is safe since the atomic support routines
         * do not write to memory if the value has not been modified.
         */
-       return _atomic64_xchg_add((u64 *)&v->counter, 0);
+       return _atomic64_xchg_add((long long *)&v->counter, 0);
 }
 
 /**
@@ -108,7 +108,7 @@ static inline u64 atomic64_read(const atomic64_t *v)
  *
  * Atomically adds @i to @v.
  */
-static inline void atomic64_add(u64 i, atomic64_t *v)
+static inline void atomic64_add(long long i, atomic64_t *v)
 {
        _atomic64_xchg_add(&v->counter, i);
 }
@@ -120,7 +120,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v)
  *
  * Atomically adds @i to @v and returns @i + @v
  */
-static inline u64 atomic64_add_return(u64 i, atomic64_t *v)
+static inline long long atomic64_add_return(long long i, atomic64_t *v)
 {
        smp_mb();  /* barrier for proper semantics */
        return _atomic64_xchg_add(&v->counter, i) + i;
@@ -135,7 +135,8 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v)
  * Atomically adds @a to @v, so long as @v was not already @u.
  * Returns non-zero if @v was not @u, and zero otherwise.
  */
-static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u)
+static inline long long atomic64_add_unless(atomic64_t *v, long long a,
+                                       long long u)
 {
        smp_mb();  /* barrier for proper semantics */
        return _atomic64_xchg_add_unless(&v->counter, a, u) != u;
@@ -151,7 +152,7 @@ static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u)
  * atomic64_set() can't be just a raw store, since it would be lost if it
  * fell between the load and store of one of the other atomic ops.
  */
-static inline void atomic64_set(atomic64_t *v, u64 n)
+static inline void atomic64_set(atomic64_t *v, long long n)
 {
        _atomic64_xchg(&v->counter, n);
 }
@@ -236,11 +237,13 @@ extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
 extern struct __get_user __atomic_or(volatile int *p, int *lock, int n);
 extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n);
 extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n);
-extern u64 __atomic64_cmpxchg(volatile u64 *p, int *lock, u64 o, u64 n);
-extern u64 __atomic64_xchg(volatile u64 *p, int *lock, u64 n);
-extern u64 __atomic64_xchg_add(volatile u64 *p, int *lock, u64 n);
-extern u64 __atomic64_xchg_add_unless(volatile u64 *p,
-                                     int *lock, u64 o, u64 n);
+extern long long __atomic64_cmpxchg(volatile long long *p, int *lock,
+                                       long long o, long long n);
+extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_xchg_add(volatile long long *p, int *lock,
+                                       long long n);
+extern long long __atomic64_xchg_add_unless(volatile long long *p,
+                                       int *lock, long long o, long long n);
 
 /* Return failure from the atomic wrappers. */
 struct __get_user __atomic_bad_address(int __user *addr);
index 4001d5e..0ccda3c 100644 (file)
@@ -35,10 +35,10 @@ int _atomic_xchg(int *ptr, int n);
 int _atomic_xchg_add(int *v, int i);
 int _atomic_xchg_add_unless(int *v, int a, int u);
 int _atomic_cmpxchg(int *ptr, int o, int n);
-u64 _atomic64_xchg(u64 *v, u64 n);
-u64 _atomic64_xchg_add(u64 *v, u64 i);
-u64 _atomic64_xchg_add_unless(u64 *v, u64 a, u64 u);
-u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n);
+long long _atomic64_xchg(long long *v, long long n);
+long long _atomic64_xchg_add(long long *v, long long i);
+long long _atomic64_xchg_add_unless(long long *v, long long a, long long u);
+long long _atomic64_cmpxchg(long long *v, long long o, long long n);
 
 #define xchg(ptr, n)                                                   \
        ({                                                              \
@@ -53,7 +53,8 @@ u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n);
                if (sizeof(*(ptr)) != 4)                                \
                        __cmpxchg_called_with_bad_pointer();            \
                smp_mb();                                               \
-               (typeof(*(ptr)))_atomic_cmpxchg((int *)ptr, (int)o, (int)n); \
+               (typeof(*(ptr)))_atomic_cmpxchg((int *)ptr, (int)o,     \
+                                               (int)n);                \
        })
 
 #define xchg64(ptr, n)                                                 \
@@ -61,7 +62,8 @@ u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n);
                if (sizeof(*(ptr)) != 8)                                \
                        __xchg_called_with_bad_pointer();               \
                smp_mb();                                               \
-               (typeof(*(ptr)))_atomic64_xchg((u64 *)(ptr), (u64)(n)); \
+               (typeof(*(ptr)))_atomic64_xchg((long long *)(ptr),      \
+                                               (long long)(n));        \
        })
 
 #define cmpxchg64(ptr, o, n)                                           \
@@ -69,7 +71,8 @@ u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n);
                if (sizeof(*(ptr)) != 8)                                \
                        __cmpxchg_called_with_bad_pointer();            \
                smp_mb();                                               \
-               (typeof(*(ptr)))_atomic64_cmpxchg((u64 *)ptr, (u64)o, (u64)n); \
+               (typeof(*(ptr)))_atomic64_cmpxchg((long long *)ptr,     \
+                                       (long long)o, (long long)n);    \
        })
 
 #else
@@ -81,10 +84,11 @@ u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n);
                switch (sizeof(*(ptr))) {                               \
                case 4:                                                 \
                        __x = (typeof(__x))(unsigned long)              \
-                               __insn_exch4((ptr), (u32)(unsigned long)(n)); \
+                               __insn_exch4((ptr),                     \
+                                       (u32)(unsigned long)(n));       \
                        break;                                          \
                case 8:                                                 \
-                       __x = (typeof(__x))                     \
+                       __x = (typeof(__x))                             \
                                __insn_exch((ptr), (unsigned long)(n)); \
                        break;                                          \
                default:                                                \
@@ -103,10 +107,12 @@ u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n);
                switch (sizeof(*(ptr))) {                               \
                case 4:                                                 \
                        __x = (typeof(__x))(unsigned long)              \
-                               __insn_cmpexch4((ptr), (u32)(unsigned long)(n)); \
+                               __insn_cmpexch4((ptr),                  \
+                                       (u32)(unsigned long)(n));       \
                        break;                                          \
                case 8:                                                 \
-                       __x = (typeof(__x))__insn_cmpexch((ptr), (u64)(n)); \
+                       __x = (typeof(__x))__insn_cmpexch((ptr),        \
+                                               (long long)(n));        \
                        break;                                          \
                default:                                                \
                        __cmpxchg_called_with_bad_pointer();            \
index 63294f5..4f7ae39 100644 (file)
 #ifndef _ASM_TILE_PERCPU_H
 #define _ASM_TILE_PERCPU_H
 
-register unsigned long __my_cpu_offset __asm__("tp");
-#define __my_cpu_offset __my_cpu_offset
-#define set_my_cpu_offset(tp) (__my_cpu_offset = (tp))
+register unsigned long my_cpu_offset_reg asm("tp");
+
+#ifdef CONFIG_PREEMPT
+/*
+ * For full preemption, we can't just use the register variable
+ * directly, since we need barrier() to hazard against it, causing the
+ * compiler to reload anything computed from a previous "tp" value.
+ * But we also don't want to use volatile asm, since we'd like the
+ * compiler to be able to cache the value across multiple percpu reads.
+ * So we use a fake stack read as a hazard against barrier().
+ * The 'U' constraint is like 'm' but disallows postincrement.
+ */
+static inline unsigned long __my_cpu_offset(void)
+{
+       unsigned long tp;
+       register unsigned long *sp asm("sp");
+       asm("move %0, tp" : "=r" (tp) : "U" (*sp));
+       return tp;
+}
+#define __my_cpu_offset __my_cpu_offset()
+#else
+/*
+ * We don't need to hazard against barrier() since "tp" doesn't ever
+ * change with PREEMPT_NONE, and with PREEMPT_VOLUNTARY it only
+ * changes at function call points, at which we are already re-reading
+ * the value of "tp" due to "my_cpu_offset_reg" being a global variable.
+ */
+#define __my_cpu_offset my_cpu_offset_reg
+#endif
+
+#define set_my_cpu_offset(tp) (my_cpu_offset_reg = (tp))
 
 #include <asm-generic/percpu.h>
 
index df27a1f..531f4c3 100644 (file)
@@ -66,7 +66,7 @@ static struct hardwall_type hardwall_types[] = {
                0,
                "udn",
                LIST_HEAD_INIT(hardwall_types[HARDWALL_UDN].list),
-               __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_UDN].lock),
+               __SPIN_LOCK_UNLOCKED(hardwall_types[HARDWALL_UDN].lock),
                NULL
        },
 #ifndef __tilepro__
@@ -77,7 +77,7 @@ static struct hardwall_type hardwall_types[] = {
                1,  /* disabled pending hypervisor support */
                "idn",
                LIST_HEAD_INIT(hardwall_types[HARDWALL_IDN].list),
-               __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_IDN].lock),
+               __SPIN_LOCK_UNLOCKED(hardwall_types[HARDWALL_IDN].lock),
                NULL
        },
        {  /* access to user-space IPI */
@@ -87,7 +87,7 @@ static struct hardwall_type hardwall_types[] = {
                0,
                "ipi",
                LIST_HEAD_INIT(hardwall_types[HARDWALL_IPI].list),
-               __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_IPI].lock),
+               __SPIN_LOCK_UNLOCKED(hardwall_types[HARDWALL_IPI].lock),
                NULL
        },
 #endif
index 088d5c1..2cbe6d5 100644 (file)
@@ -815,6 +815,9 @@ STD_ENTRY(interrupt_return)
        }
        bzt     r28, 1f
        bnz     r29, 1f
+       /* Disable interrupts explicitly for preemption. */
+       IRQ_DISABLE(r20,r21)
+       TRACE_IRQS_OFF
        jal     preempt_schedule_irq
        FEEDBACK_REENTER(interrupt_return)
 1:
index ec755d3..b8fc497 100644 (file)
@@ -841,6 +841,9 @@ STD_ENTRY(interrupt_return)
        }
        beqzt   r28, 1f
        bnez    r29, 1f
+       /* Disable interrupts explicitly for preemption. */
+       IRQ_DISABLE(r20,r21)
+       TRACE_IRQS_OFF
        jal     preempt_schedule_irq
        FEEDBACK_REENTER(interrupt_return)
 1:
index 362284a..c93977a 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/mmzone.h>
 #include <linux/dcache.h>
 #include <linux/fs.h>
+#include <linux/string.h>
 #include <asm/backtrace.h>
 #include <asm/page.h>
 #include <asm/ucontext.h>
@@ -332,21 +333,18 @@ static void describe_addr(struct KBacktraceIterator *kbt,
        }
 
        if (vma->vm_file) {
-               char *s;
                p = d_path(&vma->vm_file->f_path, buf, bufsize);
                if (IS_ERR(p))
                        p = "?";
-               s = strrchr(p, '/');
-               if (s)
-                       p = s+1;
+               name = kbasename(p);
        } else {
-               p = "anon";
+               name = "anon";
        }
 
        /* Generate a string description of the vma info. */
-       namelen = strlen(p);
+       namelen = strlen(name);
        remaining = (bufsize - 1) - namelen;
-       memmove(buf, p, namelen);
+       memmove(buf, name, namelen);
        snprintf(buf + namelen, remaining, "[%lx+%lx] ",
                 vma->vm_start, vma->vm_end - vma->vm_start);
 }
index 759efa3..c89b211 100644 (file)
@@ -107,19 +107,19 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask)
 EXPORT_SYMBOL(_atomic_xor);
 
 
-u64 _atomic64_xchg(u64 *v, u64 n)
+long long _atomic64_xchg(long long *v, long long n)
 {
        return __atomic64_xchg(v, __atomic_setup(v), n);
 }
 EXPORT_SYMBOL(_atomic64_xchg);
 
-u64 _atomic64_xchg_add(u64 *v, u64 i)
+long long _atomic64_xchg_add(long long *v, long long i)
 {
        return __atomic64_xchg_add(v, __atomic_setup(v), i);
 }
 EXPORT_SYMBOL(_atomic64_xchg_add);
 
-u64 _atomic64_xchg_add_unless(u64 *v, u64 a, u64 u)
+long long _atomic64_xchg_add_unless(long long *v, long long a, long long u)
 {
        /*
         * Note: argument order is switched here since it is easier
@@ -130,7 +130,7 @@ u64 _atomic64_xchg_add_unless(u64 *v, u64 a, u64 u)
 }
 EXPORT_SYMBOL(_atomic64_xchg_add_unless);
 
-u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n)
+long long _atomic64_cmpxchg(long long *v, long long o, long long n)
 {
        return __atomic64_cmpxchg(v, __atomic_setup(v), o, n);
 }
index ee2fb9d..145d703 100644 (file)
@@ -860,7 +860,7 @@ source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
        bool "Local APIC support on uniprocessors"
-       depends on X86_32 && !SMP && !X86_32_NON_STANDARD
+       depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI
        ---help---
          A local APIC (Advanced Programmable Interrupt Controller) is an
          integrated interrupt controller in the CPU. If you have a single-CPU
@@ -885,11 +885,11 @@ config X86_UP_IOAPIC
 
 config X86_LOCAL_APIC
        def_bool y
-       depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
+       depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI
 
 config X86_IO_APIC
        def_bool y
-       depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC
+       depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI
 
 config X86_VISWS_APIC
        def_bool y
index d3f5c63..89270b4 100644 (file)
@@ -374,7 +374,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
                 * Catch too early usage of this before alternatives
                 * have run.
                 */
-               asm goto("1: jmp %l[t_warn]\n"
+               asm_volatile_goto("1: jmp %l[t_warn]\n"
                         "2:\n"
                         ".section .altinstructions,\"a\"\n"
                         " .long 1b - .\n"
@@ -388,7 +388,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 
 #endif
 
-               asm goto("1: jmp %l[t_no]\n"
+               asm_volatile_goto("1: jmp %l[t_no]\n"
                         "2:\n"
                         ".section .altinstructions,\"a\"\n"
                         " .long 1b - .\n"
@@ -453,7 +453,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
  * have. Thus, we force the jump to the widest, 4-byte, signed relative
  * offset even though the last would often fit in less bytes.
  */
-               asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
+               asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
                         "2:\n"
                         ".section .altinstructions,\"a\"\n"
                         " .long 1b - .\n"              /* src offset */
index 64507f3..6a2cefb 100644 (file)
@@ -18,7 +18,7 @@
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
-       asm goto("1:"
+       asm_volatile_goto("1:"
                ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
                ".pushsection __jump_table,  \"aw\" \n\t"
                _ASM_ALIGN "\n\t"
index 15f960c..24ec121 100644 (file)
@@ -274,13 +274,17 @@ struct x86_emulate_ctxt {
 
        bool guest_mode; /* guest running a nested guest */
        bool perm_ok; /* do not check permissions if true */
-       bool only_vendor_specific_insn;
+       bool ud;        /* inject an #UD if host doesn't support insn */
 
        bool have_exception;
        struct x86_exception exception;
 
-       /* decode cache */
-       u8 twobyte;
+       /*
+        * decode cache
+        */
+
+       /* current opcode length in bytes */
+       u8 opcode_len;
        u8 b;
        u8 intercept;
        u8 lock_prefix;
index c76ff74..ae5d783 100644 (file)
 #define KVM_HPAGE_MASK(x)      (~(KVM_HPAGE_SIZE(x) - 1))
 #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
+static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
+{
+       /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
+       return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
+               (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
+}
+
 #define SELECTOR_TI_MASK (1 << 2)
 #define SELECTOR_RPL_MASK 0x03
 
@@ -253,7 +260,6 @@ struct kvm_pio_request {
  * mode.
  */
 struct kvm_mmu {
-       void (*new_cr3)(struct kvm_vcpu *vcpu);
        void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
        unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
        u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
@@ -261,7 +267,6 @@ struct kvm_mmu {
                          bool prefault);
        void (*inject_page_fault)(struct kvm_vcpu *vcpu,
                                  struct x86_exception *fault);
-       void (*free)(struct kvm_vcpu *vcpu);
        gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
                            struct x86_exception *exception);
        gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
@@ -389,6 +394,8 @@ struct kvm_vcpu_arch {
 
        struct fpu guest_fpu;
        u64 xcr0;
+       u64 guest_supported_xcr0;
+       u32 guest_xstate_size;
 
        struct kvm_pio_request pio;
        void *pio_data;
@@ -557,7 +564,9 @@ struct kvm_arch {
 
        struct list_head assigned_dev_head;
        struct iommu_domain *iommu_domain;
-       int iommu_flags;
+       bool iommu_noncoherent;
+#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
+       atomic_t noncoherent_dma_count;
        struct kvm_pic *vpic;
        struct kvm_ioapic *vioapic;
        struct kvm_pit *vpit;
@@ -780,11 +789,11 @@ void kvm_mmu_module_exit(void);
 
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
 int kvm_mmu_create(struct kvm_vcpu *vcpu);
-int kvm_mmu_setup(struct kvm_vcpu *vcpu);
+void kvm_mmu_setup(struct kvm_vcpu *vcpu);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
                u64 dirty_mask, u64 nx_mask, u64 x_mask);
 
-int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
+void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
 void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
                                     struct kvm_memory_slot *slot,
@@ -922,13 +931,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
                       void *insn, int insn_len);
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
 
 void kvm_enable_tdp(void);
 void kvm_disable_tdp(void);
 
-int complete_pio(struct kvm_vcpu *vcpu);
-bool kvm_check_iopl(struct kvm_vcpu *vcpu);
-
 static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
 {
        return gpa;
index e7e6751..07537a4 100644 (file)
@@ -20,7 +20,7 @@
 static inline void __mutex_fastpath_lock(atomic_t *v,
                                         void (*fail_fn)(atomic_t *))
 {
-       asm volatile goto(LOCK_PREFIX "   decl %0\n"
+       asm_volatile_goto(LOCK_PREFIX "   decl %0\n"
                          "   jns %l[exit]\n"
                          : : "m" (v->counter)
                          : "memory", "cc"
@@ -75,7 +75,7 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count)
 static inline void __mutex_fastpath_unlock(atomic_t *v,
                                           void (*fail_fn)(atomic_t *))
 {
-       asm volatile goto(LOCK_PREFIX "   incl %0\n"
+       asm_volatile_goto(LOCK_PREFIX "   incl %0\n"
                          "   jg %l[exit]\n"
                          : : "m" (v->counter)
                          : "memory", "cc"
index be8269b..d6b078e 100644 (file)
@@ -14,6 +14,8 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
                            struct timespec *ts);
 void pvclock_resume(void);
 
+void pvclock_touch_watchdogs(void);
+
 /*
  * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
  * yielding a 64-bit result.
index 6aef9fb..b913915 100644 (file)
@@ -79,30 +79,38 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
        return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY;
 }
 
-static inline unsigned long mfn_to_pfn(unsigned long mfn)
+static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
 {
        unsigned long pfn;
-       int ret = 0;
+       int ret;
 
        if (xen_feature(XENFEAT_auto_translated_physmap))
                return mfn;
 
-       if (unlikely(mfn >= machine_to_phys_nr)) {
-               pfn = ~0;
-               goto try_override;
-       }
-       pfn = 0;
+       if (unlikely(mfn >= machine_to_phys_nr))
+               return ~0;
+
        /*
         * The array access can fail (e.g., device space beyond end of RAM).
         * In such cases it doesn't matter what we return (we return garbage),
         * but we must handle the fault without crashing!
         */
        ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
-try_override:
-       /* ret might be < 0 if there are no entries in the m2p for mfn */
        if (ret < 0)
-               pfn = ~0;
-       else if (get_phys_to_machine(pfn) != mfn)
+               return ~0;
+
+       return pfn;
+}
+
+static inline unsigned long mfn_to_pfn(unsigned long mfn)
+{
+       unsigned long pfn;
+
+       if (xen_feature(XENFEAT_auto_translated_physmap))
+               return mfn;
+
+       pfn = mfn_to_pfn_no_overrides(mfn);
+       if (get_phys_to_machine(pfn) != mfn) {
                /*
                 * If this appears to be a foreign mfn (because the pfn
                 * doesn't map back to the mfn), then check the local override
@@ -111,6 +119,7 @@ try_override:
                 * m2p_find_override_pfn returns ~0 if it doesn't find anything.
                 */
                pfn = m2p_find_override_pfn(mfn, ~0);
+       }
 
        /* 
         * pfn is ~0 if there are no entries in the m2p for mfn or if the
index 5d9a303..d3a8778 100644 (file)
@@ -211,9 +211,9 @@ struct kvm_cpuid_entry2 {
        __u32 padding[3];
 };
 
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
-#define KVM_CPUID_FLAG_STATEFUL_FUNC    2
-#define KVM_CPUID_FLAG_STATE_READ_NEXT  4
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX                BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC           BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT         BIT(2)
 
 /* for KVM_SET_CPUID2 */
 struct kvm_cpuid2 {
index bb04650..b93e09a 100644 (file)
 
 /* MSR_IA32_VMX_MISC bits */
 #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
+#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE   0x1F
 /* AMD-V MSRs */
 
 #define MSR_VM_CR                       0xc0010114
index 8355c84..9d84491 100644 (file)
@@ -1506,7 +1506,7 @@ static int __init init_hw_perf_events(void)
                err = amd_pmu_init();
                break;
        default:
-               return 0;
+               err = -ENOTSUPP;
        }
        if (err != 0) {
                pr_cont("no PMU driver, software events only.\n");
@@ -1883,26 +1883,21 @@ static struct pmu pmu = {
 
 void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
 {
-       userpg->cap_usr_time = 0;
-       userpg->cap_usr_time_zero = 0;
-       userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc;
+       userpg->cap_user_time = 0;
+       userpg->cap_user_time_zero = 0;
+       userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
        userpg->pmc_width = x86_pmu.cntval_bits;
 
-       if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
-               return;
-
-       if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
+       if (!sched_clock_stable)
                return;
 
-       userpg->cap_usr_time = 1;
+       userpg->cap_user_time = 1;
        userpg->time_mult = this_cpu_read(cyc2ns);
        userpg->time_shift = CYC2NS_SCALE_FACTOR;
        userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
 
-       if (sched_clock_stable && !check_tsc_disabled()) {
-               userpg->cap_usr_time_zero = 1;
-               userpg->time_zero = this_cpu_read(cyc2ns_offset);
-       }
+       userpg->cap_user_time_zero = 1;
+       userpg->time_zero = this_cpu_read(cyc2ns_offset);
 }
 
 /*
index 9db76c3..f31a165 100644 (file)
@@ -2325,6 +2325,7 @@ __init int intel_pmu_init(void)
                break;
 
        case 55: /* Atom 22nm "Silvermont" */
+       case 77: /* Avoton "Silvermont" */
                memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
                memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
index 8ed4458..4118f9f 100644 (file)
@@ -2706,14 +2706,14 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
        box->hrtimer.function = uncore_pmu_hrtimer;
 }
 
-struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu)
+static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
 {
        struct intel_uncore_box *box;
        int i, size;
 
        size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
 
-       box = kzalloc_node(size, GFP_KERNEL, cpu_to_node(cpu));
+       box = kzalloc_node(size, GFP_KERNEL, node);
        if (!box)
                return NULL;
 
@@ -3031,7 +3031,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
        struct intel_uncore_box *fake_box;
        int ret = -EINVAL, n;
 
-       fake_box = uncore_alloc_box(pmu->type, smp_processor_id());
+       fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
        if (!fake_box)
                return -ENOMEM;
 
@@ -3294,7 +3294,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
        }
 
        type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
-       box = uncore_alloc_box(type, 0);
+       box = uncore_alloc_box(type, NUMA_NO_NODE);
        if (!box)
                return -ENOMEM;
 
@@ -3499,7 +3499,7 @@ static int uncore_cpu_prepare(int cpu, int phys_id)
                        if (pmu->func_id < 0)
                                pmu->func_id = j;
 
-                       box = uncore_alloc_box(type, cpu);
+                       box = uncore_alloc_box(type, cpu_to_node(cpu));
                        if (!box)
                                return -ENOMEM;
 
index 697b93a..a0e2a8a 100644 (file)
@@ -775,11 +775,22 @@ void __init kvm_spinlock_init(void)
        if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
                return;
 
-       printk(KERN_INFO "KVM setup paravirtual spinlock\n");
+       pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
+       pv_lock_ops.unlock_kick = kvm_unlock_kick;
+}
+
+static __init int kvm_spinlock_init_jump(void)
+{
+       if (!kvm_para_available())
+               return 0;
+       if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
+               return 0;
 
        static_key_slow_inc(&paravirt_ticketlocks_enabled);
+       printk(KERN_INFO "KVM setup paravirtual spinlock\n");
 
-       pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
-       pv_lock_ops.unlock_kick = kvm_unlock_kick;
+       return 0;
 }
+early_initcall(kvm_spinlock_init_jump);
+
 #endif /* CONFIG_PARAVIRT_SPINLOCKS */
index 1570e07..e604109 100644 (file)
@@ -139,6 +139,7 @@ bool kvm_check_and_clear_guest_paused(void)
        src = &hv_clock[cpu].pvti;
        if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
                src->flags &= ~PVCLOCK_GUEST_STOPPED;
+               pvclock_touch_watchdogs();
                ret = true;
        }
 
index 7123b5d..af99f71 100644 (file)
@@ -216,6 +216,7 @@ int apply_microcode_amd(int cpu)
        /* need to apply patch? */
        if (rev >= mc_amd->hdr.patch_id) {
                c->microcode = rev;
+               uci->cpu_sig.rev = rev;
                return 0;
        }
 
index a16bae3..2f355d2 100644 (file)
@@ -43,6 +43,14 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
        return pv_tsc_khz;
 }
 
+void pvclock_touch_watchdogs(void)
+{
+       touch_softlockup_watchdog_sync();
+       clocksource_touch_watchdog();
+       rcu_cpu_stall_reset();
+       reset_hung_task_detector();
+}
+
 static atomic64_t last_value = ATOMIC64_INIT(0);
 
 void pvclock_resume(void)
@@ -74,6 +82,11 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
                version = __pvclock_read_cycles(src, &ret, &flags);
        } while ((src->version & 1) || version != src->version);
 
+       if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
+               src->flags &= ~PVCLOCK_GUEST_STOPPED;
+               pvclock_touch_watchdogs();
+       }
+
        if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
                (flags & PVCLOCK_TSC_STABLE_BIT))
                return ret;
index 563ed91..7e920bf 100644 (file)
@@ -326,6 +326,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"),
                },
        },
+       {       /* Handle problems with rebooting on the Latitude E5410. */
+               .callback = set_pci_reboot,
+               .ident = "Dell Latitude E5410",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5410"),
+               },
+       },
        {       /* Handle problems with rebooting on the Latitude E5420. */
                .callback = set_pci_reboot,
                .ident = "Dell Latitude E5420",
@@ -352,12 +360,28 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
        },
        {       /* Handle problems with rebooting on the Precision M6600. */
                .callback = set_pci_reboot,
-               .ident = "Dell OptiPlex 990",
+               .ident = "Dell Precision M6600",
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
                },
        },
+       {       /* Handle problems with rebooting on the Dell PowerEdge C6100. */
+               .callback = set_pci_reboot,
+               .ident = "Dell PowerEdge C6100",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "C6100"),
+               },
+       },
+       {       /* Some C6100 machines were shipped with vendor being 'Dell'. */
+               .callback = set_pci_reboot,
+               .ident = "Dell PowerEdge C6100",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "C6100"),
+               },
+       },
        { }
 };
 
index 22513e9..86179d4 100644 (file)
@@ -72,14 +72,14 @@ __init int create_simplefb(const struct screen_info *si,
         * the part that is occupied by the framebuffer */
        len = mode->height * mode->stride;
        len = PAGE_ALIGN(len);
-       if (len > si->lfb_size << 16) {
+       if (len > (u64)si->lfb_size << 16) {
                printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
                return -EINVAL;
        }
 
        /* setup IORESOURCE_MEM as framebuffer memory */
        memset(&res, 0, sizeof(res));
-       res.flags = IORESOURCE_MEM;
+       res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
        res.name = simplefb_resname;
        res.start = si->lfb_base;
        res.end = si->lfb_base + len - 1;
index a47a3e5..b89c5db 100644 (file)
@@ -38,6 +38,7 @@ config KVM
        select PERF_EVENTS
        select HAVE_KVM_MSI
        select HAVE_KVM_CPU_RELAX_INTERCEPT
+       select KVM_VFIO
        ---help---
          Support hosting fully virtualized guest machines using hardware
          virtualization extensions.  You will need a fairly recent
index bf4fb04..25d22b2 100644 (file)
@@ -9,7 +9,7 @@ KVM := ../../../virt/kvm
 
 kvm-y                  += $(KVM)/kvm_main.o $(KVM)/ioapic.o \
                                $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
-                               $(KVM)/eventfd.o $(KVM)/irqchip.o
+                               $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
 kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)    += $(KVM)/assigned-dev.o $(KVM)/iommu.o
 kvm-$(CONFIG_KVM_ASYNC_PF)     += $(KVM)/async_pf.o
 
index b110fe6..c697625 100644 (file)
 #include "mmu.h"
 #include "trace.h"
 
+static u32 xstate_required_size(u64 xstate_bv)
+{
+       int feature_bit = 0;
+       u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+
+       xstate_bv &= ~XSTATE_FPSSE;
+       while (xstate_bv) {
+               if (xstate_bv & 0x1) {
+                       u32 eax, ebx, ecx, edx;
+                       cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
+                       ret = max(ret, eax + ebx);
+               }
+
+               xstate_bv >>= 1;
+               feature_bit++;
+       }
+
+       return ret;
+}
+
 void kvm_update_cpuid(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpuid_entry2 *best;
@@ -46,6 +66,18 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
                        apic->lapic_timer.timer_mode_mask = 1 << 17;
        }
 
+       best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
+       if (!best) {
+               vcpu->arch.guest_supported_xcr0 = 0;
+               vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+       } else {
+               vcpu->arch.guest_supported_xcr0 =
+                       (best->eax | ((u64)best->edx << 32)) &
+                       host_xcr0 & KVM_SUPPORTED_XCR0;
+               vcpu->arch.guest_xstate_size =
+                       xstate_required_size(vcpu->arch.guest_supported_xcr0);
+       }
+
        kvm_pmu_cpuid_update(vcpu);
 }
 
@@ -182,13 +214,35 @@ static bool supported_xcr0_bit(unsigned bit)
 {
        u64 mask = ((u64)1 << bit);
 
-       return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0;
+       return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
 }
 
 #define F(x) bit(X86_FEATURE_##x)
 
-static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
-                        u32 index, int *nent, int maxnent)
+static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
+                                  u32 func, u32 index, int *nent, int maxnent)
+{
+       switch (func) {
+       case 0:
+               entry->eax = 1;         /* only one leaf currently */
+               ++*nent;
+               break;
+       case 1:
+               entry->ecx = F(MOVBE);
+               ++*nent;
+               break;
+       default:
+               break;
+       }
+
+       entry->function = func;
+       entry->index = index;
+
+       return 0;
+}
+
+static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+                                u32 index, int *nent, int maxnent)
 {
        int r;
        unsigned f_nx = is_efer_nx() ? F(NX) : 0;
@@ -383,6 +437,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
        case 0xd: {
                int idx, i;
 
+               entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0;
+               entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32;
                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                for (idx = 1, i = 1; idx < 64; ++idx) {
                        if (*nent >= maxnent)
@@ -481,6 +537,15 @@ out:
        return r;
 }
 
+static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func,
+                       u32 idx, int *nent, int maxnent, unsigned int type)
+{
+       if (type == KVM_GET_EMULATED_CPUID)
+               return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent);
+
+       return __do_cpuid_ent(entry, func, idx, nent, maxnent);
+}
+
 #undef F
 
 struct kvm_cpuid_param {
@@ -495,8 +560,36 @@ static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
        return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
 }
 
-int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
-                                     struct kvm_cpuid_entry2 __user *entries)
+static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries,
+                                __u32 num_entries, unsigned int ioctl_type)
+{
+       int i;
+       __u32 pad[3];
+
+       if (ioctl_type != KVM_GET_EMULATED_CPUID)
+               return false;
+
+       /*
+        * We want to make sure that ->padding is being passed clean from
+        * userspace in case we want to use it for something in the future.
+        *
+        * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we
+        * have to give ourselves satisfied only with the emulated side. /me
+        * sheds a tear.
+        */
+       for (i = 0; i < num_entries; i++) {
+               if (copy_from_user(pad, entries[i].padding, sizeof(pad)))
+                       return true;
+
+               if (pad[0] || pad[1] || pad[2])
+                       return true;
+       }
+       return false;
+}
+
+int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
+                           struct kvm_cpuid_entry2 __user *entries,
+                           unsigned int type)
 {
        struct kvm_cpuid_entry2 *cpuid_entries;
        int limit, nent = 0, r = -E2BIG, i;
@@ -513,8 +606,12 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
                goto out;
        if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
                cpuid->nent = KVM_MAX_CPUID_ENTRIES;
+
+       if (sanity_check_entries(entries, cpuid->nent, type))
+               return -EINVAL;
+
        r = -ENOMEM;
-       cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
+       cpuid_entries = vzalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
        if (!cpuid_entries)
                goto out;
 
@@ -526,7 +623,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
                        continue;
 
                r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx,
-                               &nent, cpuid->nent);
+                               &nent, cpuid->nent, type);
 
                if (r)
                        goto out_free;
@@ -537,7 +634,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
                limit = cpuid_entries[nent - 1].eax;
                for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
                        r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx,
-                                    &nent, cpuid->nent);
+                                    &nent, cpuid->nent, type);
 
                if (r)
                        goto out_free;
@@ -661,6 +758,7 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
                *edx = best->edx;
        } else
                *eax = *ebx = *ecx = *edx = 0;
+       trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx);
 }
 EXPORT_SYMBOL_GPL(kvm_cpuid);
 
@@ -676,6 +774,5 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
        kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
        kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
        kvm_x86_ops->skip_emulated_instruction(vcpu);
-       trace_kvm_cpuid(function, eax, ebx, ecx, edx);
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
index b7fd079..f1e4895 100644 (file)
@@ -6,8 +6,9 @@
 void kvm_update_cpuid(struct kvm_vcpu *vcpu);
 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
                                              u32 function, u32 index);
-int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
-                                     struct kvm_cpuid_entry2 __user *entries);
+int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
+                           struct kvm_cpuid_entry2 __user *entries,
+                           unsigned int type);
 int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
                             struct kvm_cpuid *cpuid,
                             struct kvm_cpuid_entry __user *entries);
index ddc3f3d..07ffca0 100644 (file)
 #define Mov         (1<<20)
 /* Misc flags */
 #define Prot        (1<<21) /* instruction generates #UD if not in prot-mode */
-#define VendorSpecific (1<<22) /* Vendor specific instruction */
+#define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
 #define NoAccess    (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
 #define Op3264      (1<<24) /* Operand is 64b in long mode, 32b otherwise */
 #define Undefined   (1<<25) /* No Such Instruction */
@@ -785,9 +785,10 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
  * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
  */
 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
-                            int highbyte_regs)
+                            int byteop)
 {
        void *p;
+       int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
 
        if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
                p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
@@ -1024,7 +1025,6 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
                                    struct operand *op)
 {
        unsigned reg = ctxt->modrm_reg;
-       int highbyte_regs = ctxt->rex_prefix == 0;
 
        if (!(ctxt->d & ModRM))
                reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
@@ -1045,13 +1045,9 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
        }
 
        op->type = OP_REG;
-       if (ctxt->d & ByteOp) {
-               op->addr.reg = decode_register(ctxt, reg, highbyte_regs);
-               op->bytes = 1;
-       } else {
-               op->addr.reg = decode_register(ctxt, reg, 0);
-               op->bytes = ctxt->op_bytes;
-       }
+       op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
+       op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
+
        fetch_register_operand(op);
        op->orig_val = op->val;
 }
@@ -1082,12 +1078,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
        ctxt->modrm_seg = VCPU_SREG_DS;
 
        if (ctxt->modrm_mod == 3) {
-               int highbyte_regs = ctxt->rex_prefix == 0;
-
                op->type = OP_REG;
                op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
                op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
-                                              highbyte_regs && (ctxt->d & ByteOp));
+                               ctxt->d & ByteOp);
                if (ctxt->d & Sse) {
                        op->type = OP_XMM;
                        op->bytes = 16;
@@ -2961,6 +2955,46 @@ static int em_mov(struct x86_emulate_ctxt *ctxt)
        return X86EMUL_CONTINUE;
 }
 
+#define FFL(x) bit(X86_FEATURE_##x)
+
+static int em_movbe(struct x86_emulate_ctxt *ctxt)
+{
+       u32 ebx, ecx, edx, eax = 1;
+       u16 tmp;
+
+       /*
+        * Check MOVBE is set in the guest-visible CPUID leaf.
+        */
+       ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+       if (!(ecx & FFL(MOVBE)))
+               return emulate_ud(ctxt);
+
+       switch (ctxt->op_bytes) {
+       case 2:
+               /*
+                * From MOVBE definition: "...When the operand size is 16 bits,
+                * the upper word of the destination register remains unchanged
+                * ..."
+                *
+                * Both casting ->valptr and ->val to u16 breaks strict aliasing
+                * rules so we have to do the operation almost per hand.
+                */
+               tmp = (u16)ctxt->src.val;
+               ctxt->dst.val &= ~0xffffUL;
+               ctxt->dst.val |= (unsigned long)swab16(tmp);
+               break;
+       case 4:
+               ctxt->dst.val = swab32((u32)ctxt->src.val);
+               break;
+       case 8:
+               ctxt->dst.val = swab64(ctxt->src.val);
+               break;
+       default:
+               return X86EMUL_PROPAGATE_FAULT;
+       }
+       return X86EMUL_CONTINUE;
+}
+
 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
 {
        if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
@@ -3256,6 +3290,18 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt)
        return X86EMUL_CONTINUE;
 }
 
+static int em_sahf(struct x86_emulate_ctxt *ctxt)
+{
+       u32 flags;
+
+       flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF;
+       flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
+
+       ctxt->eflags &= ~0xffUL;
+       ctxt->eflags |= flags | X86_EFLAGS_FIXED;
+       return X86EMUL_CONTINUE;
+}
+
 static int em_lahf(struct x86_emulate_ctxt *ctxt)
 {
        *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
@@ -3502,7 +3548,7 @@ static const struct opcode group7_rm1[] = {
 
 static const struct opcode group7_rm3[] = {
        DIP(SrcNone | Prot | Priv,              vmrun,          check_svme_pa),
-       II(SrcNone  | Prot | VendorSpecific,    em_vmmcall,     vmmcall),
+       II(SrcNone  | Prot | EmulateOnUD,       em_vmmcall,     vmmcall),
        DIP(SrcNone | Prot | Priv,              vmload,         check_svme_pa),
        DIP(SrcNone | Prot | Priv,              vmsave,         check_svme_pa),
        DIP(SrcNone | Prot | Priv,              stgi,           check_svme),
@@ -3587,7 +3633,7 @@ static const struct group_dual group7 = { {
        II(SrcMem16 | Mov | Priv,               em_lmsw, lmsw),
        II(SrcMem | ByteOp | Priv | NoAccess,   em_invlpg, invlpg),
 }, {
-       I(SrcNone | Priv | VendorSpecific,      em_vmcall),
+       I(SrcNone | Priv | EmulateOnUD, em_vmcall),
        EXT(0, group7_rm1),
        N, EXT(0, group7_rm3),
        II(SrcNone | DstMem | Mov,              em_smsw, smsw), N,
@@ -3750,7 +3796,8 @@ static const struct opcode opcode_table[256] = {
        D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
        I(SrcImmFAddr | No64, em_call_far), N,
        II(ImplicitOps | Stack, em_pushf, pushf),
-       II(ImplicitOps | Stack, em_popf, popf), N, I(ImplicitOps, em_lahf),
+       II(ImplicitOps | Stack, em_popf, popf),
+       I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
        /* 0xA0 - 0xA7 */
        I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
        I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
@@ -3810,7 +3857,7 @@ static const struct opcode opcode_table[256] = {
 static const struct opcode twobyte_table[256] = {
        /* 0x00 - 0x0F */
        G(0, group6), GD(0, &group7), N, N,
-       N, I(ImplicitOps | VendorSpecific, em_syscall),
+       N, I(ImplicitOps | EmulateOnUD, em_syscall),
        II(ImplicitOps | Priv, em_clts, clts), N,
        DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
        N, D(ImplicitOps | ModRM), N, N,
@@ -3830,8 +3877,8 @@ static const struct opcode twobyte_table[256] = {
        IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
        II(ImplicitOps | Priv, em_rdmsr, rdmsr),
        IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
-       I(ImplicitOps | VendorSpecific, em_sysenter),
-       I(ImplicitOps | Priv | VendorSpecific, em_sysexit),
+       I(ImplicitOps | EmulateOnUD, em_sysenter),
+       I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
        N, N,
        N, N, N, N, N, N, N, N,
        /* 0x40 - 0x4F */
@@ -3892,6 +3939,30 @@ static const struct opcode twobyte_table[256] = {
        N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
 };
 
+static const struct gprefix three_byte_0f_38_f0 = {
+       I(DstReg | SrcMem | Mov, em_movbe), N, N, N
+};
+
+static const struct gprefix three_byte_0f_38_f1 = {
+       I(DstMem | SrcReg | Mov, em_movbe), N, N, N
+};
+
+/*
+ * Insns below are selected by the prefix which indexed by the third opcode
+ * byte.
+ */
+static const struct opcode opcode_map_0f_38[256] = {
+       /* 0x00 - 0x7f */
+       X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
+       /* 0x80 - 0xef */
+       X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
+       /* 0xf0 - 0xf1 */
+       GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0),
+       GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1),
+       /* 0xf2 - 0xff */
+       N, N, X4(N), X8(N)
+};
+
 #undef D
 #undef N
 #undef G
@@ -4040,7 +4111,8 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
        case OpMem8:
                ctxt->memop.bytes = 1;
                if (ctxt->memop.type == OP_REG) {
-                       ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1);
+                       ctxt->memop.addr.reg = decode_register(ctxt,
+                                       ctxt->modrm_rm, true);
                        fetch_register_operand(&ctxt->memop);
                }
                goto mem_common;
@@ -4126,6 +4198,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
        ctxt->_eip = ctxt->eip;
        ctxt->fetch.start = ctxt->_eip;
        ctxt->fetch.end = ctxt->fetch.start + insn_len;
+       ctxt->opcode_len = 1;
        if (insn_len > 0)
                memcpy(ctxt->fetch.data, insn, insn_len);
 
@@ -4208,9 +4281,16 @@ done_prefixes:
        opcode = opcode_table[ctxt->b];
        /* Two-byte opcode? */
        if (ctxt->b == 0x0f) {
-               ctxt->twobyte = 1;
+               ctxt->opcode_len = 2;
                ctxt->b = insn_fetch(u8, ctxt);
                opcode = twobyte_table[ctxt->b];
+
+               /* 0F_38 opcode map */
+               if (ctxt->b == 0x38) {
+                       ctxt->opcode_len = 3;
+                       ctxt->b = insn_fetch(u8, ctxt);
+                       opcode = opcode_map_0f_38[ctxt->b];
+               }
        }
        ctxt->d = opcode.flags;
 
@@ -4267,7 +4347,7 @@ done_prefixes:
        if (ctxt->d == 0 || (ctxt->d & NotImpl))
                return EMULATION_FAILED;
 
-       if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn)
+       if (!(ctxt->d & EmulateOnUD) && ctxt->ud)
                return EMULATION_FAILED;
 
        if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
@@ -4540,8 +4620,10 @@ special_insn:
                goto writeback;
        }
 
-       if (ctxt->twobyte)
+       if (ctxt->opcode_len == 2)
                goto twobyte_insn;
+       else if (ctxt->opcode_len == 3)
+               goto threebyte_insn;
 
        switch (ctxt->b) {
        case 0x63:              /* movsxd */
@@ -4726,6 +4808,8 @@ twobyte_insn:
                goto cannot_emulate;
        }
 
+threebyte_insn:
+
        if (rc != X86EMUL_CONTINUE)
                goto done;
 
index dce0df8..40772ef 100644 (file)
@@ -2570,11 +2570,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
        kvm_release_pfn_clean(pfn);
 }
 
-static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
-{
-       mmu_free_roots(vcpu);
-}
-
 static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
                                     bool no_dirty_log)
 {
@@ -3424,18 +3419,11 @@ out_unlock:
        return 0;
 }
 
-static void nonpaging_free(struct kvm_vcpu *vcpu)
-{
-       mmu_free_roots(vcpu);
-}
-
-static int nonpaging_init_context(struct kvm_vcpu *vcpu,
-                                 struct kvm_mmu *context)
+static void nonpaging_init_context(struct kvm_vcpu *vcpu,
+                                  struct kvm_mmu *context)
 {
-       context->new_cr3 = nonpaging_new_cr3;
        context->page_fault = nonpaging_page_fault;
        context->gva_to_gpa = nonpaging_gva_to_gpa;
-       context->free = nonpaging_free;
        context->sync_page = nonpaging_sync_page;
        context->invlpg = nonpaging_invlpg;
        context->update_pte = nonpaging_update_pte;
@@ -3444,7 +3432,6 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu,
        context->root_hpa = INVALID_PAGE;
        context->direct_map = true;
        context->nx = false;
-       return 0;
 }
 
 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
@@ -3454,9 +3441,8 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
 
-static void paging_new_cr3(struct kvm_vcpu *vcpu)
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
 {
-       pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu));
        mmu_free_roots(vcpu);
 }
 
@@ -3471,11 +3457,6 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
        vcpu->arch.mmu.inject_page_fault(vcpu, fault);
 }
 
-static void paging_free(struct kvm_vcpu *vcpu)
-{
-       nonpaging_free(vcpu);
-}
-
 static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
                           unsigned access, int *nr_present)
 {
@@ -3665,9 +3646,9 @@ static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
        mmu->last_pte_bitmap = map;
 }
 
-static int paging64_init_context_common(struct kvm_vcpu *vcpu,
-                                       struct kvm_mmu *context,
-                                       int level)
+static void paging64_init_context_common(struct kvm_vcpu *vcpu,
+                                        struct kvm_mmu *context,
+                                        int level)
 {
        context->nx = is_nx(vcpu);
        context->root_level = level;
@@ -3677,27 +3658,24 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
        update_last_pte_bitmap(vcpu, context);
 
        ASSERT(is_pae(vcpu));
-       context->new_cr3 = paging_new_cr3;
        context->page_fault = paging64_page_fault;
        context->gva_to_gpa = paging64_gva_to_gpa;
        context->sync_page = paging64_sync_page;
        context->invlpg = paging64_invlpg;
        context->update_pte = paging64_update_pte;
-       context->free = paging_free;
        context->shadow_root_level = level;
        context->root_hpa = INVALID_PAGE;
        context->direct_map = false;
-       return 0;
 }
 
-static int paging64_init_context(struct kvm_vcpu *vcpu,
-                                struct kvm_mmu *context)
+static void paging64_init_context(struct kvm_vcpu *vcpu,
+                                 struct kvm_mmu *context)
 {
-       return paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL);
+       paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL);
 }
 
-static int paging32_init_context(struct kvm_vcpu *vcpu,
-                                struct kvm_mmu *context)
+static void paging32_init_context(struct kvm_vcpu *vcpu,
+                                 struct kvm_mmu *context)
 {
        context->nx = false;
        context->root_level = PT32_ROOT_LEVEL;
@@ -3706,33 +3684,28 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
        update_permission_bitmask(vcpu, context, false);
        update_last_pte_bitmap(vcpu, context);
 
-       context->new_cr3 = paging_new_cr3;
        context->page_fault = paging32_page_fault;
        context->gva_to_gpa = paging32_gva_to_gpa;
-       context->free = paging_free;
        context->sync_page = paging32_sync_page;
        context->invlpg = paging32_invlpg;
        context->update_pte = paging32_update_pte;
        context->shadow_root_level = PT32E_ROOT_LEVEL;
        context->root_hpa = INVALID_PAGE;
        context->direct_map = false;
-       return 0;
 }
 
-static int paging32E_init_context(struct kvm_vcpu *vcpu,
-                                 struct kvm_mmu *context)
+static void paging32E_init_context(struct kvm_vcpu *vcpu,
+                                  struct kvm_mmu *context)
 {
-       return paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
+       paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
 }
 
-static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
+static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 {
        struct kvm_mmu *context = vcpu->arch.walk_mmu;
 
        context->base_role.word = 0;
-       context->new_cr3 = nonpaging_new_cr3;
        context->page_fault = tdp_page_fault;
-       context->free = nonpaging_free;
        context->sync_page = nonpaging_sync_page;
        context->invlpg = nonpaging_invlpg;
        context->update_pte = nonpaging_update_pte;
@@ -3767,37 +3740,32 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 
        update_permission_bitmask(vcpu, context, false);
        update_last_pte_bitmap(vcpu, context);
-
-       return 0;
 }
 
-int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
+void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 {
-       int r;
        bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
        ASSERT(vcpu);
        ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
 
        if (!is_paging(vcpu))
-               r = nonpaging_init_context(vcpu, context);
+               nonpaging_init_context(vcpu, context);
        else if (is_long_mode(vcpu))
-               r = paging64_init_context(vcpu, context);
+               paging64_init_context(vcpu, context);
        else if (is_pae(vcpu))
-               r = paging32E_init_context(vcpu, context);
+               paging32E_init_context(vcpu, context);
        else
-               r = paging32_init_context(vcpu, context);
+               paging32_init_context(vcpu, context);
 
        vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);
        vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
        vcpu->arch.mmu.base_role.cr0_wp  = is_write_protection(vcpu);
        vcpu->arch.mmu.base_role.smep_andnot_wp
                = smep && !is_write_protection(vcpu);
-
-       return r;
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
-int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
                bool execonly)
 {
        ASSERT(vcpu);
@@ -3806,37 +3774,30 @@ int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
        context->shadow_root_level = kvm_x86_ops->get_tdp_level();
 
        context->nx = true;
-       context->new_cr3 = paging_new_cr3;
        context->page_fault = ept_page_fault;
        context->gva_to_gpa = ept_gva_to_gpa;
        context->sync_page = ept_sync_page;
        context->invlpg = ept_invlpg;
        context->update_pte = ept_update_pte;
-       context->free = paging_free;
        context->root_level = context->shadow_root_level;
        context->root_hpa = INVALID_PAGE;
        context->direct_map = false;
 
        update_permission_bitmask(vcpu, context, true);
        reset_rsvds_bits_mask_ept(vcpu, context, execonly);
-
-       return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
 
-static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
+static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
 {
-       int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
-
+       kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
        vcpu->arch.walk_mmu->set_cr3           = kvm_x86_ops->set_cr3;
        vcpu->arch.walk_mmu->get_cr3           = get_cr3;
        vcpu->arch.walk_mmu->get_pdptr         = kvm_pdptr_read;
        vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
-
-       return r;
 }
 
-static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
+static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 {
        struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
 
@@ -3873,11 +3834,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 
        update_permission_bitmask(vcpu, g_context, false);
        update_last_pte_bitmap(vcpu, g_context);
-
-       return 0;
 }
 
-static int init_kvm_mmu(struct kvm_vcpu *vcpu)
+static void init_kvm_mmu(struct kvm_vcpu *vcpu)
 {
        if (mmu_is_nested(vcpu))
                return init_kvm_nested_mmu(vcpu);
@@ -3887,18 +3846,12 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu)
                return init_kvm_softmmu(vcpu);
 }
 
-static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
+void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
 {
        ASSERT(vcpu);
-       if (VALID_PAGE(vcpu->arch.mmu.root_hpa))
-               /* mmu.free() should set root_hpa = INVALID_PAGE */
-               vcpu->arch.mmu.free(vcpu);
-}
 
-int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
-{
-       destroy_kvm_mmu(vcpu);
-       return init_kvm_mmu(vcpu);
+       kvm_mmu_unload(vcpu);
+       init_kvm_mmu(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
 
@@ -3923,6 +3876,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu)
 {
        mmu_free_roots(vcpu);
+       WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unload);
 
@@ -4281,12 +4235,12 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
        return alloc_mmu_pages(vcpu);
 }
 
-int kvm_mmu_setup(struct kvm_vcpu *vcpu)
+void kvm_mmu_setup(struct kvm_vcpu *vcpu)
 {
        ASSERT(vcpu);
        ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
 
-       return init_kvm_mmu(vcpu);
+       init_kvm_mmu(vcpu);
 }
 
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
@@ -4428,7 +4382,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
        int nr_to_scan = sc->nr_to_scan;
        unsigned long freed = 0;
 
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
 
        list_for_each_entry(kvm, &vm_list, vm_list) {
                int idx;
@@ -4478,9 +4432,8 @@ unlock:
                break;
        }
 
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
        return freed;
-
 }
 
 static unsigned long
@@ -4574,7 +4527,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
 {
        ASSERT(vcpu);
 
-       destroy_kvm_mmu(vcpu);
+       kvm_mmu_unload(vcpu);
        free_mmu_pages(vcpu);
        mmu_free_memory_caches(vcpu);
 }
index 77e044a..2926152 100644 (file)
@@ -70,8 +70,8 @@ enum {
 };
 
 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
-int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
-int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
+void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
                bool execonly);
 
 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
index c0bc803..c7168a5 100644 (file)
@@ -1959,11 +1959,9 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
        nested_svm_vmexit(svm);
 }
 
-static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
+static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
 {
-       int r;
-
-       r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
+       kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
 
        vcpu->arch.mmu.set_cr3           = nested_svm_set_tdp_cr3;
        vcpu->arch.mmu.get_cr3           = nested_svm_get_tdp_cr3;
@@ -1971,8 +1969,6 @@ static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
        vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
        vcpu->arch.mmu.shadow_root_level = get_npt_level();
        vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
-
-       return r;
 }
 
 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
index a1216de..e293a62 100644 (file)
@@ -1498,7 +1498,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
                        break;
 
        if (i == NR_AUTOLOAD_MSRS) {
-               printk_once(KERN_WARNING"Not enough mst switch entries. "
+               printk_once(KERN_WARNING "Not enough msr switch entries. "
                                "Can't add msr %x\n", msr);
                return;
        } else if (i == m->nr) {
@@ -1898,16 +1898,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 /*
  * KVM wants to inject page-faults which it got to the guest. This function
  * checks whether in a nested guest, we need to inject them to L1 or L2.
- * This function assumes it is called with the exit reason in vmcs02 being
- * a #PF exception (this is the only case in which KVM injects a #PF when L2
- * is running).
  */
-static int nested_pf_handled(struct kvm_vcpu *vcpu)
+static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
 {
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
-       /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
-       if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
+       if (!(vmcs12->exception_bitmap & (1u << nr)))
                return 0;
 
        nested_vmx_vmexit(vcpu);
@@ -1921,8 +1917,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
-       if (nr == PF_VECTOR && is_guest_mode(vcpu) &&
-           !vmx->nested.nested_run_pending && nested_pf_handled(vcpu))
+       if (!reinject && is_guest_mode(vcpu) &&
+           nested_vmx_check_exception(vcpu, nr))
                return;
 
        if (has_error_code) {
@@ -2204,9 +2200,15 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 #ifdef CONFIG_X86_64
                VM_EXIT_HOST_ADDR_SPACE_SIZE |
 #endif
-               VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
+               VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
+               VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+       if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) ||
+           !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
+               nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+               nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+       }
        nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
-                                     VM_EXIT_LOAD_IA32_EFER);
+               VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
 
        /* entry controls */
        rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2226,7 +2228,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
                nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
        nested_vmx_procbased_ctls_low = 0;
        nested_vmx_procbased_ctls_high &=
-               CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_USE_TSC_OFFSETING |
+               CPU_BASED_VIRTUAL_INTR_PENDING |
+               CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
                CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
                CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
                CPU_BASED_CR3_STORE_EXITING |
@@ -2252,13 +2255,15 @@ static __init void nested_vmx_setup_ctls_msrs(void)
        nested_vmx_secondary_ctls_low = 0;
        nested_vmx_secondary_ctls_high &=
                SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+               SECONDARY_EXEC_UNRESTRICTED_GUEST |
                SECONDARY_EXEC_WBINVD_EXITING;
 
        if (enable_ept) {
                /* nested EPT: emulate EPT also to L1 */
                nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
                nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
-                        VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
+                        VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
+                        VMX_EPT_INVEPT_BIT;
                nested_vmx_ept_caps &= vmx_capability.ept;
                /*
                 * Since invept is completely emulated we support both global
@@ -3255,25 +3260,29 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
 
 static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
 {
+       struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
        if (!test_bit(VCPU_EXREG_PDPTR,
                      (unsigned long *)&vcpu->arch.regs_dirty))
                return;
 
        if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
-               vmcs_write64(GUEST_PDPTR0, vcpu->arch.mmu.pdptrs[0]);
-               vmcs_write64(GUEST_PDPTR1, vcpu->arch.mmu.pdptrs[1]);
-               vmcs_write64(GUEST_PDPTR2, vcpu->arch.mmu.pdptrs[2]);
-               vmcs_write64(GUEST_PDPTR3, vcpu->arch.mmu.pdptrs[3]);
+               vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
+               vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
+               vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
+               vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
        }
 }
 
 static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
 {
+       struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
        if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
-               vcpu->arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
-               vcpu->arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
-               vcpu->arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
-               vcpu->arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
+               mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
+               mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
+               mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
+               mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
        }
 
        __set_bit(VCPU_EXREG_PDPTR,
@@ -3376,8 +3385,10 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
        if (enable_ept) {
                eptp = construct_eptp(cr3);
                vmcs_write64(EPT_POINTER, eptp);
-               guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) :
-                       vcpu->kvm->arch.ept_identity_map_addr;
+               if (is_paging(vcpu) || is_guest_mode(vcpu))
+                       guest_cr3 = kvm_read_cr3(vcpu);
+               else
+                       guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr;
                ept_load_pdptrs(vcpu);
        }
 
@@ -4875,6 +4886,17 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
        hypercall[2] = 0xc1;
 }
 
+static bool nested_cr0_valid(struct vmcs12 *vmcs12, unsigned long val)
+{
+       unsigned long always_on = VMXON_CR0_ALWAYSON;
+
+       if (nested_vmx_secondary_ctls_high &
+               SECONDARY_EXEC_UNRESTRICTED_GUEST &&
+           nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
+               always_on &= ~(X86_CR0_PE | X86_CR0_PG);
+       return (val & always_on) == always_on;
+}
+
 /* called to set cr0 as appropriate for a mov-to-cr0 exit. */
 static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
 {
@@ -4893,9 +4915,7 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
                val = (val & ~vmcs12->cr0_guest_host_mask) |
                        (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
 
-               /* TODO: will have to take unrestricted guest mode into
-                * account */
-               if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)
+               if (!nested_cr0_valid(vmcs12, val))
                        return 1;
 
                if (kvm_set_cr0(vcpu, val))
@@ -5345,7 +5365,9 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
         * There are errata that may cause this bit to not be set:
         * AAK134, BY25.
         */
-       if (exit_qualification & INTR_INFO_UNBLOCK_NMI)
+       if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+                       cpu_has_virtual_nmis() &&
+                       (exit_qualification & INTR_INFO_UNBLOCK_NMI))
                vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
 
        gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
@@ -6716,6 +6738,27 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
        *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
 }
 
+static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
+{
+       u64 delta_tsc_l1;
+       u32 preempt_val_l1, preempt_val_l2, preempt_scale;
+
+       if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
+                       PIN_BASED_VMX_PREEMPTION_TIMER))
+               return;
+       preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
+                       MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
+       preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+       delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
+               - vcpu->arch.last_guest_tsc;
+       preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
+       if (preempt_val_l2 <= preempt_val_l1)
+               preempt_val_l2 = 0;
+       else
+               preempt_val_l2 -= preempt_val_l1;
+       vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
+}
+
 /*
  * The guest has exited.  See if we can fix it or if we need userspace
  * assistance.
@@ -6730,20 +6773,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
        if (vmx->emulation_required)
                return handle_invalid_guest_state(vcpu);
 
-       /*
-        * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
-        * we did not inject a still-pending event to L1 now because of
-        * nested_run_pending, we need to re-enable this bit.
-        */
-       if (vmx->nested.nested_run_pending)
-               kvm_make_request(KVM_REQ_EVENT, vcpu);
-
-       if (!is_guest_mode(vcpu) && (exit_reason == EXIT_REASON_VMLAUNCH ||
-           exit_reason == EXIT_REASON_VMRESUME))
-               vmx->nested.nested_run_pending = 1;
-       else
-               vmx->nested.nested_run_pending = 0;
-
        if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
                nested_vmx_vmexit(vcpu);
                return 1;
@@ -7055,9 +7084,9 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
        case INTR_TYPE_HARD_EXCEPTION:
                if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
                        u32 err = vmcs_read32(error_code_field);
-                       kvm_queue_exception_e(vcpu, vector, err);
+                       kvm_requeue_exception_e(vcpu, vector, err);
                } else
-                       kvm_queue_exception(vcpu, vector);
+                       kvm_requeue_exception(vcpu, vector);
                break;
        case INTR_TYPE_SOFT_INTR:
                vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
@@ -7140,6 +7169,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
        atomic_switch_perf_msrs(vmx);
        debugctlmsr = get_debugctlmsr();
 
+       if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending)
+               nested_adjust_preemption_timer(vcpu);
        vmx->__launched = vmx->loaded_vmcs->launched;
        asm(
                /* Store host registers */
@@ -7278,6 +7309,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
        vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
        trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
 
+       /*
+        * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
+        * we did not inject a still-pending event to L1 now because of
+        * nested_run_pending, we need to re-enable this bit.
+        */
+       if (vmx->nested.nested_run_pending)
+               kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+       vmx->nested.nested_run_pending = 0;
+
        vmx_complete_atomic_exit(vmx);
        vmx_recover_nmi_blocking(vmx);
        vmx_complete_interrupts(vmx);
@@ -7404,8 +7445,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
         */
        if (is_mmio)
                ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
-       else if (vcpu->kvm->arch.iommu_domain &&
-               !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY))
+       else if (kvm_arch_has_noncoherent_dma(vcpu->kvm))
                ret = kvm_get_guest_memory_type(vcpu, gfn) <<
                      VMX_EPT_MT_EPTE_SHIFT;
        else
@@ -7495,9 +7535,9 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
        return get_vmcs12(vcpu)->ept_pointer;
 }
 
-static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
+static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
 {
-       int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
+       kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
                        nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT);
 
        vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
@@ -7505,8 +7545,6 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
        vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
 
        vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
-
-       return r;
 }
 
 static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
@@ -7514,6 +7552,20 @@ static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
        vcpu->arch.walk_mmu = &vcpu->arch.mmu;
 }
 
+static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
+               struct x86_exception *fault)
+{
+       struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+       WARN_ON(!is_guest_mode(vcpu));
+
+       /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
+       if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
+               nested_vmx_vmexit(vcpu);
+       else
+               kvm_inject_page_fault(vcpu, fault);
+}
+
 /*
  * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
  * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -7527,6 +7579,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u32 exec_control;
+       u32 exit_control;
 
        vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
        vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -7700,7 +7753,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
         * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
         * bits are further modified by vmx_set_efer() below.
         */
-       vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
+       exit_control = vmcs_config.vmexit_ctrl;
+       if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
+               exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+       vmcs_write32(VM_EXIT_CONTROLS, exit_control);
 
        /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
         * emulated by vmx_set_efer(), below.
@@ -7767,6 +7823,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        kvm_set_cr3(vcpu, vmcs12->guest_cr3);
        kvm_mmu_reset_context(vcpu);
 
+       if (!enable_ept)
+               vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
+
        /*
         * L1 may access the L2's PDPTR, so save them to construct vmcs12
         */
@@ -7775,10 +7834,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
                vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
                vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
-               __clear_bit(VCPU_EXREG_PDPTR,
-                               (unsigned long *)&vcpu->arch.regs_avail);
-               __clear_bit(VCPU_EXREG_PDPTR,
-                               (unsigned long *)&vcpu->arch.regs_dirty);
        }
 
        kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
@@ -7874,7 +7929,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
                return 1;
        }
 
-       if (((vmcs12->guest_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) ||
+       if (!nested_cr0_valid(vmcs12, vmcs12->guest_cr0) ||
            ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
                nested_vmx_entry_failure(vcpu, vmcs12,
                        EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
@@ -7936,6 +7991,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
        enter_guest_mode(vcpu);
 
+       vmx->nested.nested_run_pending = 1;
+
        vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
 
        cpu = get_cpu();
@@ -8003,7 +8060,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
        u32 idt_vectoring;
        unsigned int nr;
 
-       if (vcpu->arch.exception.pending) {
+       if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) {
                nr = vcpu->arch.exception.nr;
                idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
 
@@ -8021,7 +8078,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
                }
 
                vmcs12->idt_vectoring_info_field = idt_vectoring;
-       } else if (vcpu->arch.nmi_pending) {
+       } else if (vcpu->arch.nmi_injected) {
                vmcs12->idt_vectoring_info_field =
                        INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
        } else if (vcpu->arch.interrupt.pending) {
@@ -8103,6 +8160,11 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        vmcs12->guest_pending_dbg_exceptions =
                vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
 
+       if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
+           (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+               vmcs12->vmx_preemption_timer_value =
+                       vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+
        /*
         * In some cases (usually, nested EPT), L2 is allowed to change its
         * own CR3 without exiting. If it has changed it, we must keep it.
@@ -8128,6 +8190,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
        if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
                vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
+       if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
+               vmcs12->guest_ia32_efer = vcpu->arch.efer;
        vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
        vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
        vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
@@ -8199,7 +8263,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
         * fpu_active (which may have changed).
         * Note that vmx_set_cr0 refers to efer set above.
         */
-       kvm_set_cr0(vcpu, vmcs12->host_cr0);
+       vmx_set_cr0(vcpu, vmcs12->host_cr0);
        /*
         * If we did fpu_activate()/fpu_deactivate() during L2's run, we need
         * to apply the same changes to L1's vmcs. We just set cr0 correctly,
@@ -8222,6 +8286,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
        kvm_set_cr3(vcpu, vmcs12->host_cr3);
        kvm_mmu_reset_context(vcpu);
 
+       if (!enable_ept)
+               vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
+
        if (enable_vpid) {
                /*
                 * Trivially support vpid by letting L2s share their parent
index e5ca72a..21ef1ba 100644 (file)
@@ -577,6 +577,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
 int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 {
        u64 xcr0;
+       u64 valid_bits;
 
        /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now  */
        if (index != XCR_XFEATURE_ENABLED_MASK)
@@ -586,8 +587,16 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
                return 1;
        if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
                return 1;
-       if (xcr0 & ~host_xcr0)
+
+       /*
+        * Do not allow the guest to set bits that we do not support
+        * saving.  However, xcr0 bit 0 is always set, even if the
+        * emulated CPU does not support XSAVE (see fx_init).
+        */
+       valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP;
+       if (xcr0 & ~valid_bits)
                return 1;
+
        kvm_put_guest_xcr0(vcpu);
        vcpu->arch.xcr0 = xcr0;
        return 0;
@@ -684,7 +693,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
        vcpu->arch.cr3 = cr3;
        __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
-       vcpu->arch.mmu.new_cr3(vcpu);
+       kvm_mmu_new_cr3(vcpu);
        return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr3);
@@ -2564,6 +2573,7 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
        case KVM_CAP_SET_TSS_ADDR:
        case KVM_CAP_EXT_CPUID:
+       case KVM_CAP_EXT_EMUL_CPUID:
        case KVM_CAP_CLOCKSOURCE:
        case KVM_CAP_PIT:
        case KVM_CAP_NOP_IO_DELAY:
@@ -2673,15 +2683,17 @@ long kvm_arch_dev_ioctl(struct file *filp,
                r = 0;
                break;
        }
-       case KVM_GET_SUPPORTED_CPUID: {
+       case KVM_GET_SUPPORTED_CPUID:
+       case KVM_GET_EMULATED_CPUID: {
                struct kvm_cpuid2 __user *cpuid_arg = argp;
                struct kvm_cpuid2 cpuid;
 
                r = -EFAULT;
                if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
                        goto out;
-               r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
-                                                     cpuid_arg->entries);
+
+               r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
+                                           ioctl);
                if (r)
                        goto out;
 
@@ -2715,8 +2727,7 @@ static void wbinvd_ipi(void *garbage)
 
 static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
 {
-       return vcpu->kvm->arch.iommu_domain &&
-               !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
+       return kvm_arch_has_noncoherent_dma(vcpu->kvm);
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -2984,11 +2995,13 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
                                         struct kvm_xsave *guest_xsave)
 {
-       if (cpu_has_xsave)
+       if (cpu_has_xsave) {
                memcpy(guest_xsave->region,
                        &vcpu->arch.guest_fpu.state->xsave,
-                       xstate_size);
-       else {
+                       vcpu->arch.guest_xstate_size);
+               *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
+                       vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
+       } else {
                memcpy(guest_xsave->region,
                        &vcpu->arch.guest_fpu.state->fxsave,
                        sizeof(struct i387_fxsave_struct));
@@ -3003,10 +3016,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
        u64 xstate_bv =
                *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
 
-       if (cpu_has_xsave)
+       if (cpu_has_xsave) {
+               /*
+                * Here we allow setting states that are not present in
+                * CPUID leaf 0xD, index 0, EDX:EAX.  This is for compatibility
+                * with old userspace.
+                */
+               if (xstate_bv & ~KVM_SUPPORTED_XCR0)
+                       return -EINVAL;
+               if (xstate_bv & ~host_xcr0)
+                       return -EINVAL;
                memcpy(&vcpu->arch.guest_fpu.state->xsave,
-                       guest_xsave->region, xstate_size);
-       else {
+                       guest_xsave->region, vcpu->arch.guest_xstate_size);
+       else {
                if (xstate_bv & ~XSTATE_FPSSE)
                        return -EINVAL;
                memcpy(&vcpu->arch.guest_fpu.state->fxsave,
@@ -3042,9 +3064,9 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
 
        for (i = 0; i < guest_xcrs->nr_xcrs; i++)
                /* Only support XCR0 currently */
-               if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) {
+               if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
                        r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
-                               guest_xcrs->xcrs[0].value);
+                               guest_xcrs->xcrs[i].value);
                        break;
                }
        if (r)
@@ -4775,8 +4797,8 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
 
 static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
 {
-       memset(&ctxt->twobyte, 0,
-              (void *)&ctxt->_regs - (void *)&ctxt->twobyte);
+       memset(&ctxt->opcode_len, 0,
+              (void *)&ctxt->_regs - (void *)&ctxt->opcode_len);
 
        ctxt->fetch.start = 0;
        ctxt->fetch.end = 0;
@@ -5094,8 +5116,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                ctxt->have_exception = false;
                ctxt->perm_ok = false;
 
-               ctxt->only_vendor_specific_insn
-                       = emulation_type & EMULTYPE_TRAP_UD;
+               ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
 
                r = x86_decode_insn(ctxt, insn, insn_len);
 
@@ -5263,7 +5284,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 
        smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
 
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list) {
                kvm_for_each_vcpu(i, vcpu, kvm) {
                        if (vcpu->cpu != freq->cpu)
@@ -5273,7 +5294,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
                                send_ipi = 1;
                }
        }
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
 
        if (freq->old < freq->new && send_ipi) {
                /*
@@ -5426,12 +5447,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
        struct kvm_vcpu *vcpu;
        int i;
 
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
                kvm_for_each_vcpu(i, vcpu, kvm)
                        set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
        atomic_set(&kvm_guest_has_master_clock, 0);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
 }
 
 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
@@ -5945,10 +5966,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
        vcpu->mode = IN_GUEST_MODE;
 
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+
        /* We should set ->mode before check ->requests,
         * see the comment in make_all_cpus_request.
         */
-       smp_mb();
+       smp_mb__after_srcu_read_unlock();
 
        local_irq_disable();
 
@@ -5958,12 +5981,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                smp_wmb();
                local_irq_enable();
                preempt_enable();
+               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
                r = 1;
                goto cancel_injection;
        }
 
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-
        if (req_immediate_exit)
                smp_send_reschedule(vcpu->cpu);
 
@@ -6688,7 +6710,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
        if (r)
                return r;
        kvm_vcpu_reset(vcpu);
-       r = kvm_mmu_setup(vcpu);
+       kvm_mmu_setup(vcpu);
        vcpu_put(vcpu);
 
        return r;
@@ -6940,6 +6962,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
        vcpu->arch.ia32_tsc_adjust_msr = 0x0;
        vcpu->arch.pv_time_enabled = false;
+
+       vcpu->arch.guest_supported_xcr0 = 0;
+       vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+
        kvm_async_pf_hash_reset(vcpu);
        kvm_pmu_init(vcpu);
 
@@ -6981,6 +7007,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
        INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
        INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
+       atomic_set(&kvm->arch.noncoherent_dma_count, 0);
 
        /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
        set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
@@ -7065,7 +7092,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
        kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
                           struct kvm_memory_slot *dont)
 {
        int i;
@@ -7086,7 +7113,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
        }
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+                           unsigned long npages)
 {
        int i;
 
@@ -7283,7 +7311,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
        int r;
 
        if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
-             is_error_page(work->page))
+             work->wakeup_all)
                return;
 
        r = kvm_mmu_reload(vcpu);
@@ -7393,7 +7421,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
        struct x86_exception fault;
 
        trace_kvm_async_pf_ready(work->arch.token, work->gva);
-       if (is_error_page(work->page))
+       if (work->wakeup_all)
                work->arch.token = ~0; /* broadcast wakeup */
        else
                kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
@@ -7420,6 +7448,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
                        kvm_x86_ops->interrupt_allowed(vcpu);
 }
 
+void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
+{
+       atomic_inc(&kvm->arch.noncoherent_dma_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
+
+void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
+{
+       atomic_dec(&kvm->arch.noncoherent_dma_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
+
+bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
+{
+       return atomic_read(&kvm->arch.noncoherent_dma_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
index e224f7a..587fb9e 100644 (file)
@@ -122,6 +122,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
        gva_t addr, void *val, unsigned int bytes,
        struct x86_exception *exception);
 
+#define KVM_SUPPORTED_XCR0     (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
 extern u64 host_xcr0;
 
 extern struct static_key kvm_no_apic_vcpu;
index 5596c7b..082e881 100644 (file)
@@ -700,7 +700,7 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
        if (!(pci_probe & PCI_PROBE_MMCONF) || pci_mmcfg_arch_init_failed)
                return -ENODEV;
 
-       if (start > end || !addr)
+       if (start > end)
                return -EINVAL;
 
        mutex_lock(&pci_mmcfg_lock);
@@ -716,6 +716,11 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
                return -EEXIST;
        }
 
+       if (!addr) {
+               mutex_unlock(&pci_mmcfg_lock);
+               return -EINVAL;
+       }
+
        rc = -EBUSY;
        cfg = pci_mmconfig_alloc(seg, start, end, addr);
        if (cfg == NULL) {
index 90f6ed1..c7e22ab 100644 (file)
@@ -912,10 +912,13 @@ void __init efi_enter_virtual_mode(void)
 
        for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
                md = p;
-               if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
-                   md->type != EFI_BOOT_SERVICES_CODE &&
-                   md->type != EFI_BOOT_SERVICES_DATA)
-                       continue;
+               if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
+#ifdef CONFIG_X86_64
+                       if (md->type != EFI_BOOT_SERVICES_CODE &&
+                           md->type != EFI_BOOT_SERVICES_DATA)
+#endif
+                               continue;
+               }
 
                size = md->num_pages << EFI_PAGE_SHIFT;
                end = md->phys_addr + size;
index 8b901e8..a61c7d5 100644 (file)
@@ -879,7 +879,6 @@ int m2p_add_override(unsigned long mfn, struct page *page,
        unsigned long uninitialized_var(address);
        unsigned level;
        pte_t *ptep = NULL;
-       int ret = 0;
 
        pfn = page_to_pfn(page);
        if (!PageHighMem(page)) {
@@ -926,8 +925,8 @@ int m2p_add_override(unsigned long mfn, struct page *page,
         * frontend pages while they are being shared with the backend,
         * because mfn_to_pfn (that ends up being called by GUPF) will
         * return the backend pfn rather than the frontend pfn. */
-       ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
-       if (ret == 0 && get_phys_to_machine(pfn) == mfn)
+       pfn = mfn_to_pfn_no_overrides(mfn);
+       if (get_phys_to_machine(pfn) == mfn)
                set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
 
        return 0;
@@ -942,7 +941,6 @@ int m2p_remove_override(struct page *page,
        unsigned long uninitialized_var(address);
        unsigned level;
        pte_t *ptep = NULL;
-       int ret = 0;
 
        pfn = page_to_pfn(page);
        mfn = get_phys_to_machine(pfn);
@@ -1029,8 +1027,8 @@ int m2p_remove_override(struct page *page,
         * the original pfn causes mfn_to_pfn(mfn) to return the frontend
         * pfn again. */
        mfn &= ~FOREIGN_FRAME_BIT;
-       ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
-       if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
+       pfn = mfn_to_pfn_no_overrides(mfn);
+       if (get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
                        m2p_find_override(mfn) == NULL)
                set_phys_to_machine(pfn, mfn);
 
index d1e4777..31d0475 100644 (file)
@@ -278,6 +278,15 @@ static void __init xen_smp_prepare_boot_cpu(void)
                   old memory can be recycled */
                make_lowmem_page_readwrite(xen_initial_gdt);
 
+#ifdef CONFIG_X86_32
+               /*
+                * Xen starts us with XEN_FLAT_RING1_DS, but linux code
+                * expects __USER_DS
+                */
+               loadsegment(ds, __USER_DS);
+               loadsegment(es, __USER_DS);
+#endif
+
                xen_filter_cpu_maps();
                xen_setup_vcpu_info_placement();
        }
index 253f63f..be6b860 100644 (file)
@@ -259,6 +259,14 @@ void xen_uninit_lock_cpu(int cpu)
 }
 
 
+/*
+ * Our init of PV spinlocks is split in two init functions due to us
+ * using paravirt patching and jump labels patching and having to do
+ * all of this before SMP code is invoked.
+ *
+ * The paravirt patching needs to be done _before_ the alternative asm code
+ * is started, otherwise we would not patch the core kernel code.
+ */
 void __init xen_init_spinlocks(void)
 {
 
@@ -267,12 +275,26 @@ void __init xen_init_spinlocks(void)
                return;
        }
 
-       static_key_slow_inc(&paravirt_ticketlocks_enabled);
-
        pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
        pv_lock_ops.unlock_kick = xen_unlock_kick;
 }
 
+/*
+ * While the jump_label init code needs to happend _after_ the jump labels are
+ * enabled and before SMP is started. Hence we use pre-SMP initcall level
+ * init. We cannot do it in xen_init_spinlocks as that is done before
+ * jump labels are activated.
+ */
+static __init int xen_init_spinlocks_jump(void)
+{
+       if (!xen_pvspin)
+               return 0;
+
+       static_key_slow_inc(&paravirt_ticketlocks_enabled);
+       return 0;
+}
+early_initcall(xen_init_spinlocks_jump);
+
 static __init int xen_parse_nopvspin(char *arg)
 {
        xen_pvspin = false;
index 7f38e40..2429515 100644 (file)
@@ -99,11 +99,16 @@ config BLK_DEV_THROTTLING
 
        See Documentation/cgroups/blkio-controller.txt for more information.
 
-config CMDLINE_PARSER
+config BLK_CMDLINE_PARSER
        bool "Block device command line partition parser"
        default n
        ---help---
-       Parsing command line, get the partitions information.
+       Enabling this option allows you to specify the partition layout from
+       the kernel boot args.  This is typically of use for embedded devices
+       which don't otherwise have any standardized method for listing the
+       partitions on a block device.
+
+       See Documentation/block/cmdline-partition.txt for more information.
 
 menu "Partition Types"
 
index 4fa4be5..671a83d 100644 (file)
@@ -18,4 +18,4 @@ obj-$(CONFIG_IOSCHED_CFQ)     += cfq-iosched.o
 
 obj-$(CONFIG_BLOCK_COMPAT)     += compat_ioctl.o
 obj-$(CONFIG_BLK_DEV_INTEGRITY)        += blk-integrity.o
-obj-$(CONFIG_CMDLINE_PARSER)   += cmdline-parser.o
+obj-$(CONFIG_BLK_CMDLINE_PARSER)       += cmdline-parser.o
index 87a3208..9b29a99 100644 (file)
@@ -263,7 +263,7 @@ config SYSV68_PARTITION
 
 config CMDLINE_PARTITION
        bool "Command line partition support" if PARTITION_ADVANCED
-       select CMDLINE_PARSER
+       select BLK_CMDLINE_PARSER
        help
-         Say Y here if you would read the partitions table from bootargs.
+         Say Y here if you want to read the partition table from bootargs.
          The format for the command line is just like mtdparts.
index 56cf4ff..5141b56 100644 (file)
@@ -2,15 +2,15 @@
  * Copyright (C) 2013 HUAWEI
  * Author: Cai Zhiyong <caizhiyong@huawei.com>
  *
- * Read block device partition table from command line.
- * The partition used for fixed block device (eMMC) embedded device.
- * It is no MBR, save storage space. Bootloader can be easily accessed
+ * Read block device partition table from the command line.
+ * Typically used for fixed block (eMMC) embedded devices.
+ * It has no MBR, so saves storage space. Bootloader can be easily accessed
  * by absolute address of data on the block device.
  * Users can easily change the partition.
  *
  * The format for the command line is just like mtdparts.
  *
- * Verbose config please reference "Documentation/block/cmdline-partition.txt"
+ * For further information, see "Documentation/block/cmdline-partition.txt"
  *
  */
 
index 22327e6..5ea5c32 100644 (file)
@@ -24,7 +24,7 @@ menuconfig ACPI
          are configured, ACPI is used.
 
          The project home page for the Linux ACPI subsystem is here:
-         <http://www.lesswatts.org/projects/acpi/>
+         <https://01.org/linux-acpi>
 
          Linux support for ACPI is based on Intel Corporation's ACPI
          Component Architecture (ACPI CA).  For more information on the
index f40acef..a6977e1 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/ipmi.h>
 #include <linux/device.h>
 #include <linux/pnp.h>
+#include <linux/spinlock.h>
 
 MODULE_AUTHOR("Zhao Yakui");
 MODULE_DESCRIPTION("ACPI IPMI Opregion driver");
@@ -57,7 +58,7 @@ struct acpi_ipmi_device {
        struct list_head head;
        /* the IPMI request message list */
        struct list_head tx_msg_list;
-       struct mutex    tx_msg_lock;
+       spinlock_t      tx_msg_lock;
        acpi_handle handle;
        struct pnp_dev *pnp_dev;
        ipmi_user_t     user_interface;
@@ -147,6 +148,7 @@ static void acpi_format_ipmi_msg(struct acpi_ipmi_msg *tx_msg,
        struct kernel_ipmi_msg *msg;
        struct acpi_ipmi_buffer *buffer;
        struct acpi_ipmi_device *device;
+       unsigned long flags;
 
        msg = &tx_msg->tx_message;
        /*
@@ -177,10 +179,10 @@ static void acpi_format_ipmi_msg(struct acpi_ipmi_msg *tx_msg,
 
        /* Get the msgid */
        device = tx_msg->device;
-       mutex_lock(&device->tx_msg_lock);
+       spin_lock_irqsave(&device->tx_msg_lock, flags);
        device->curr_msgid++;
        tx_msg->tx_msgid = device->curr_msgid;
-       mutex_unlock(&device->tx_msg_lock);
+       spin_unlock_irqrestore(&device->tx_msg_lock, flags);
 }
 
 static void acpi_format_ipmi_response(struct acpi_ipmi_msg *msg,
@@ -242,6 +244,7 @@ static void ipmi_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data)
        int msg_found = 0;
        struct acpi_ipmi_msg *tx_msg;
        struct pnp_dev *pnp_dev = ipmi_device->pnp_dev;
+       unsigned long flags;
 
        if (msg->user != ipmi_device->user_interface) {
                dev_warn(&pnp_dev->dev, "Unexpected response is returned. "
@@ -250,7 +253,7 @@ static void ipmi_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data)
                ipmi_free_recv_msg(msg);
                return;
        }
-       mutex_lock(&ipmi_device->tx_msg_lock);
+       spin_lock_irqsave(&ipmi_device->tx_msg_lock, flags);
        list_for_each_entry(tx_msg, &ipmi_device->tx_msg_list, head) {
                if (msg->msgid == tx_msg->tx_msgid) {
                        msg_found = 1;
@@ -258,7 +261,7 @@ static void ipmi_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data)
                }
        }
 
-       mutex_unlock(&ipmi_device->tx_msg_lock);
+       spin_unlock_irqrestore(&ipmi_device->tx_msg_lock, flags);
        if (!msg_found) {
                dev_warn(&pnp_dev->dev, "Unexpected response (msg id %ld) is "
                        "returned.\n", msg->msgid);
@@ -378,6 +381,7 @@ acpi_ipmi_space_handler(u32 function, acpi_physical_address address,
        struct acpi_ipmi_device *ipmi_device = handler_context;
        int err, rem_time;
        acpi_status status;
+       unsigned long flags;
        /*
         * IPMI opregion message.
         * IPMI message is firstly written to the BMC and system software
@@ -395,9 +399,9 @@ acpi_ipmi_space_handler(u32 function, acpi_physical_address address,
                return AE_NO_MEMORY;
 
        acpi_format_ipmi_msg(tx_msg, address, value);
-       mutex_lock(&ipmi_device->tx_msg_lock);
+       spin_lock_irqsave(&ipmi_device->tx_msg_lock, flags);
        list_add_tail(&tx_msg->head, &ipmi_device->tx_msg_list);
-       mutex_unlock(&ipmi_device->tx_msg_lock);
+       spin_unlock_irqrestore(&ipmi_device->tx_msg_lock, flags);
        err = ipmi_request_settime(ipmi_device->user_interface,
                                        &tx_msg->addr,
                                        tx_msg->tx_msgid,
@@ -413,9 +417,9 @@ acpi_ipmi_space_handler(u32 function, acpi_physical_address address,
        status = AE_OK;
 
 end_label:
-       mutex_lock(&ipmi_device->tx_msg_lock);
+       spin_lock_irqsave(&ipmi_device->tx_msg_lock, flags);
        list_del(&tx_msg->head);
-       mutex_unlock(&ipmi_device->tx_msg_lock);
+       spin_unlock_irqrestore(&ipmi_device->tx_msg_lock, flags);
        kfree(tx_msg);
        return status;
 }
@@ -457,7 +461,7 @@ static void acpi_add_ipmi_device(struct acpi_ipmi_device *ipmi_device)
 
        INIT_LIST_HEAD(&ipmi_device->head);
 
-       mutex_init(&ipmi_device->tx_msg_lock);
+       spin_lock_init(&ipmi_device->tx_msg_lock);
        INIT_LIST_HEAD(&ipmi_device->tx_msg_list);
        ipmi_install_space_handler(ipmi_device);
 
index fbdb82e..407ad13 100644 (file)
@@ -968,7 +968,7 @@ int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device)
        }
        return 0;
 }
-EXPORT_SYMBOL_GPL(acpi_bus_get_device);
+EXPORT_SYMBOL(acpi_bus_get_device);
 
 int acpi_device_add(struct acpi_device *device,
                    void (*release)(struct device *))
@@ -1121,7 +1121,7 @@ int acpi_bus_register_driver(struct acpi_driver *driver)
 EXPORT_SYMBOL(acpi_bus_register_driver);
 
 /**
- * acpi_bus_unregister_driver - unregisters a driver with the APIC bus
+ * acpi_bus_unregister_driver - unregisters a driver with the ACPI bus
  * @driver: driver to unregister
  *
  * Unregisters a driver with the ACPI bus.  Searches the namespace for all
index 958ba2a..97f4acb 100644 (file)
@@ -2,7 +2,7 @@
  *  sata_promise.c - Promise SATA
  *
  *  Maintained by:  Tejun Heo <tj@kernel.org>
- *                 Mikael Pettersson <mikpe@it.uu.se>
+ *                 Mikael Pettersson
  *                 Please ALWAYS copy linux-ide@vger.kernel.org
  *                 on emails.
  *
index c7cfadc..34abf4d 100644 (file)
@@ -2017,7 +2017,7 @@ EXPORT_SYMBOL_GPL(device_move);
  */
 void device_shutdown(void)
 {
-       struct device *dev;
+       struct device *dev, *parent;
 
        spin_lock(&devices_kset->list_lock);
        /*
@@ -2034,7 +2034,7 @@ void device_shutdown(void)
                 * prevent it from being freed because parent's
                 * lock is to be held
                 */
-               get_device(dev->parent);
+               parent = get_device(dev->parent);
                get_device(dev);
                /*
                 * Make sure the device is off the kset list, in the
@@ -2044,8 +2044,8 @@ void device_shutdown(void)
                spin_unlock(&devices_kset->list_lock);
 
                /* hold lock to avoid race with probe/release */
-               if (dev->parent)
-                       device_lock(dev->parent);
+               if (parent)
+                       device_lock(parent);
                device_lock(dev);
 
                /* Don't allow any more runtime suspends */
@@ -2063,11 +2063,11 @@ void device_shutdown(void)
                }
 
                device_unlock(dev);
-               if (dev->parent)
-                       device_unlock(dev->parent);
+               if (parent)
+                       device_unlock(parent);
 
                put_device(dev);
-               put_device(dev->parent);
+               put_device(parent);
 
                spin_lock(&devices_kset->list_lock);
        }
index c9fd694..50329d1 100644 (file)
@@ -210,25 +210,6 @@ static void bcma_core_pci_config_fixup(struct bcma_drv_pci *pc)
        }
 }
 
-static void bcma_core_pci_power_save(struct bcma_drv_pci *pc, bool up)
-{
-       u16 data;
-
-       if (pc->core->id.rev >= 15 && pc->core->id.rev <= 20) {
-               data = up ? 0x74 : 0x7C;
-               bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1,
-                                        BCMA_CORE_PCI_MDIO_BLK1_MGMT1, 0x7F64);
-               bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1,
-                                        BCMA_CORE_PCI_MDIO_BLK1_MGMT3, data);
-       } else if (pc->core->id.rev >= 21 && pc->core->id.rev <= 22) {
-               data = up ? 0x75 : 0x7D;
-               bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1,
-                                        BCMA_CORE_PCI_MDIO_BLK1_MGMT1, 0x7E65);
-               bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1,
-                                        BCMA_CORE_PCI_MDIO_BLK1_MGMT3, data);
-       }
-}
-
 /**************************************************
  * Init.
  **************************************************/
@@ -255,6 +236,32 @@ void bcma_core_pci_init(struct bcma_drv_pci *pc)
                bcma_core_pci_clientmode_init(pc);
 }
 
+void bcma_core_pci_power_save(struct bcma_bus *bus, bool up)
+{
+       struct bcma_drv_pci *pc;
+       u16 data;
+
+       if (bus->hosttype != BCMA_HOSTTYPE_PCI)
+               return;
+
+       pc = &bus->drv_pci[0];
+
+       if (pc->core->id.rev >= 15 && pc->core->id.rev <= 20) {
+               data = up ? 0x74 : 0x7C;
+               bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1,
+                                        BCMA_CORE_PCI_MDIO_BLK1_MGMT1, 0x7F64);
+               bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1,
+                                        BCMA_CORE_PCI_MDIO_BLK1_MGMT3, data);
+       } else if (pc->core->id.rev >= 21 && pc->core->id.rev <= 22) {
+               data = up ? 0x75 : 0x7D;
+               bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1,
+                                        BCMA_CORE_PCI_MDIO_BLK1_MGMT1, 0x7E65);
+               bcma_pcie_mdio_writeread(pc, BCMA_CORE_PCI_MDIO_BLK1,
+                                        BCMA_CORE_PCI_MDIO_BLK1_MGMT3, data);
+       }
+}
+EXPORT_SYMBOL_GPL(bcma_core_pci_power_save);
+
 int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core,
                          bool enable)
 {
@@ -310,8 +317,6 @@ void bcma_core_pci_up(struct bcma_bus *bus)
 
        pc = &bus->drv_pci[0];
 
-       bcma_core_pci_power_save(pc, true);
-
        bcma_core_pci_extend_L1timer(pc, true);
 }
 EXPORT_SYMBOL_GPL(bcma_core_pci_up);
@@ -326,7 +331,5 @@ void bcma_core_pci_down(struct bcma_bus *bus)
        pc = &bus->drv_pci[0];
 
        bcma_core_pci_extend_L1timer(pc, false);
-
-       bcma_core_pci_power_save(pc, false);
 }
 EXPORT_SYMBOL_GPL(bcma_core_pci_down);
index d2d95ff..edfa251 100644 (file)
@@ -1189,6 +1189,7 @@ static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
        int err;
        u32 cp;
 
+       memset(&arg64, 0, sizeof(arg64));
        err = 0;
        err |=
            copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
index 639d26b..2b94403 100644 (file)
@@ -1193,6 +1193,7 @@ out_passthru:
                ida_pci_info_struct pciinfo;
 
                if (!arg) return -EINVAL;
+               memset(&pciinfo, 0, sizeof(pciinfo));
                pciinfo.bus = host->pci_dev->bus->number;
                pciinfo.dev_fn = host->pci_dev->devfn;
                pciinfo.board_id = host->board_id;
index a12b923..0a327f4 100644 (file)
@@ -85,6 +85,7 @@ static struct usb_device_id ath3k_table[] = {
        { USB_DEVICE(0x04CA, 0x3008) },
        { USB_DEVICE(0x13d3, 0x3362) },
        { USB_DEVICE(0x0CF3, 0xE004) },
+       { USB_DEVICE(0x0CF3, 0xE005) },
        { USB_DEVICE(0x0930, 0x0219) },
        { USB_DEVICE(0x0489, 0xe057) },
        { USB_DEVICE(0x13d3, 0x3393) },
@@ -126,6 +127,7 @@ static struct usb_device_id ath3k_blist_tbl[] = {
        { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 },
index 8e16f0a..f3dfc0a 100644 (file)
@@ -102,6 +102,7 @@ static struct usb_device_id btusb_table[] = {
 
        /* Broadcom BCM20702A0 */
        { USB_DEVICE(0x0b05, 0x17b5) },
+       { USB_DEVICE(0x0b05, 0x17cb) },
        { USB_DEVICE(0x04ca, 0x2003) },
        { USB_DEVICE(0x0489, 0xe042) },
        { USB_DEVICE(0x413c, 0x8197) },
@@ -112,6 +113,9 @@ static struct usb_device_id btusb_table[] = {
        /*Broadcom devices with vendor specific id */
        { USB_VENDOR_AND_INTERFACE_INFO(0x0a5c, 0xff, 0x01, 0x01) },
 
+       /* Belkin F8065bf - Broadcom based */
+       { USB_VENDOR_AND_INTERFACE_INFO(0x050d, 0xff, 0x01, 0x01) },
+
        { }     /* Terminating entry */
 };
 
@@ -148,6 +152,7 @@ static struct usb_device_id blacklist_table[] = {
        { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 },
+       { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 },
        { USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 },
index 19ab6ff..2394e97 100644 (file)
@@ -700,6 +700,7 @@ static int __init mvebu_mbus_common_init(struct mvebu_mbus_state *mbus,
                                         phys_addr_t sdramwins_phys_base,
                                         size_t sdramwins_size)
 {
+       struct device_node *np;
        int win;
 
        mbus->mbuswins_base = ioremap(mbuswins_phys_base, mbuswins_size);
@@ -712,8 +713,11 @@ static int __init mvebu_mbus_common_init(struct mvebu_mbus_state *mbus,
                return -ENOMEM;
        }
 
-       if (of_find_compatible_node(NULL, NULL, "marvell,coherency-fabric"))
+       np = of_find_compatible_node(NULL, NULL, "marvell,coherency-fabric");
+       if (np) {
                mbus->hw_io_coherency = 1;
+               of_node_put(np);
+       }
 
        for (win = 0; win < mbus->soc->num_wins; win++)
                mvebu_mbus_disable_window(mbus, win);
@@ -861,11 +865,13 @@ static void __init mvebu_mbus_get_pcie_resources(struct device_node *np,
        int ret;
 
        /*
-        * These are optional, so we clear them and they'll
-        * be zero if they are missing from the DT.
+        * These are optional, so we make sure that resource_size(x) will
+        * return 0.
         */
        memset(mem, 0, sizeof(struct resource));
+       mem->end = -1;
        memset(io, 0, sizeof(struct resource));
+       io->end = -1;
 
        ret = of_property_read_u32_array(np, "pcie-mem-aperture", reg, ARRAY_SIZE(reg));
        if (!ret) {
index 7737b5b..7a744d3 100644 (file)
@@ -640,7 +640,7 @@ struct timer_rand_state {
  */
 void add_device_randomness(const void *buf, unsigned int size)
 {
-       unsigned long time = get_cycles() ^ jiffies;
+       unsigned long time = random_get_entropy() ^ jiffies;
 
        mix_pool_bytes(&input_pool, buf, size, NULL);
        mix_pool_bytes(&input_pool, &time, sizeof(time), NULL);
@@ -677,7 +677,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
                goto out;
 
        sample.jiffies = jiffies;
-       sample.cycles = get_cycles();
+       sample.cycles = random_get_entropy();
        sample.num = num;
        mix_pool_bytes(&input_pool, &sample, sizeof(sample), NULL);
 
@@ -744,7 +744,7 @@ void add_interrupt_randomness(int irq, int irq_flags)
        struct fast_pool        *fast_pool = &__get_cpu_var(irq_randomness);
        struct pt_regs          *regs = get_irq_regs();
        unsigned long           now = jiffies;
-       __u32                   input[4], cycles = get_cycles();
+       __u32                   input[4], cycles = random_get_entropy();
 
        input[0] = cycles ^ jiffies;
        input[1] = irq;
@@ -1459,12 +1459,11 @@ struct ctl_table random_table[] = {
 
 static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
 
-static int __init random_int_secret_init(void)
+int random_int_secret_init(void)
 {
        get_random_bytes(random_int_secret, sizeof(random_int_secret));
        return 0;
 }
-late_initcall(random_int_secret_init);
 
 /*
  * Get a random word for internal kernel use only. Similar to urandom but
@@ -1483,7 +1482,7 @@ unsigned int get_random_int(void)
 
        hash = get_cpu_var(get_random_int_hash);
 
-       hash[0] += current->pid + jiffies + get_cycles();
+       hash[0] += current->pid + jiffies + random_get_entropy();
        md5_transform(hash, random_int_secret);
        ret = hash[0];
        put_cpu_var(get_random_int_hash);
index 7a7929b..94c280d 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
+#include <xen/xen.h>
 #include <xen/events.h>
 #include <xen/interface/io/tpmif.h>
 #include <xen/grant_table.h>
@@ -142,32 +143,6 @@ static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
        return length;
 }
 
-ssize_t tpm_show_locality(struct device *dev, struct device_attribute *attr,
-                         char *buf)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-       struct tpm_private *priv = TPM_VPRIV(chip);
-       u8 locality = priv->shr->locality;
-
-       return sprintf(buf, "%d\n", locality);
-}
-
-ssize_t tpm_store_locality(struct device *dev, struct device_attribute *attr,
-                       const char *buf, size_t len)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-       struct tpm_private *priv = TPM_VPRIV(chip);
-       u8 val;
-
-       int rv = kstrtou8(buf, 0, &val);
-       if (rv)
-               return rv;
-
-       priv->shr->locality = val;
-
-       return len;
-}
-
 static const struct file_operations vtpm_ops = {
        .owner = THIS_MODULE,
        .llseek = no_llseek,
@@ -188,8 +163,6 @@ static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
 static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
 static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
 static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
-static DEVICE_ATTR(locality, S_IRUGO | S_IWUSR, tpm_show_locality,
-               tpm_store_locality);
 
 static struct attribute *vtpm_attrs[] = {
        &dev_attr_pubek.attr,
@@ -202,7 +175,6 @@ static struct attribute *vtpm_attrs[] = {
        &dev_attr_cancel.attr,
        &dev_attr_durations.attr,
        &dev_attr_timeouts.attr,
-       &dev_attr_locality.attr,
        NULL,
 };
 
@@ -210,8 +182,6 @@ static struct attribute_group vtpm_attr_grp = {
        .attrs = vtpm_attrs,
 };
 
-#define TPM_LONG_TIMEOUT   (10 * 60 * HZ)
-
 static const struct tpm_vendor_specific tpm_vtpm = {
        .status = vtpm_status,
        .recv = vtpm_recv,
@@ -224,11 +194,6 @@ static const struct tpm_vendor_specific tpm_vtpm = {
        .miscdev = {
                .fops = &vtpm_ops,
        },
-       .duration = {
-               TPM_LONG_TIMEOUT,
-               TPM_LONG_TIMEOUT,
-               TPM_LONG_TIMEOUT,
-       },
 };
 
 static irqreturn_t tpmif_interrupt(int dummy, void *dev_id)
index 41c6946..971d796 100644 (file)
@@ -26,6 +26,7 @@ config DW_APB_TIMER_OF
 
 config ARMADA_370_XP_TIMER
        bool
+       select CLKSRC_OF
 
 config ORION_TIMER
        select CLKSRC_OF
index 37f5325..b9ddd9e 100644 (file)
@@ -30,6 +30,9 @@ void __init clocksource_of_init(void)
        clocksource_of_init_fn init_func;
 
        for_each_matching_node_and_match(np, __clksrc_of_table, &match) {
+               if (!of_device_is_available(np))
+                       continue;
+
                init_func = match->data;
                init_func(np);
        }
index b9c81b7..3a5909c 100644 (file)
@@ -301,7 +301,7 @@ static void em_sti_register_clockevent(struct em_sti_priv *p)
        ced->name = dev_name(&p->pdev->dev);
        ced->features = CLOCK_EVT_FEAT_ONESHOT;
        ced->rating = 200;
-       ced->cpumask = cpumask_of(0);
+       ced->cpumask = cpu_possible_mask;
        ced->set_next_event = em_sti_clock_event_next;
        ced->set_mode = em_sti_clock_event_mode;
 
index 5b34768..62b0de6 100644 (file)
@@ -428,7 +428,6 @@ static int exynos4_local_timer_setup(struct clock_event_device *evt)
                                evt->irq);
                        return -EIO;
                }
-               irq_set_affinity(evt->irq, cpumask_of(cpu));
        } else {
                enable_percpu_irq(mct_irqs[MCT_L0_IRQ], 0);
        }
@@ -449,6 +448,7 @@ static int exynos4_mct_cpu_notify(struct notifier_block *self,
                                           unsigned long action, void *hcpu)
 {
        struct mct_clock_event_device *mevt;
+       unsigned int cpu;
 
        /*
         * Grab cpu pointer in each case to avoid spurious
@@ -459,6 +459,12 @@ static int exynos4_mct_cpu_notify(struct notifier_block *self,
                mevt = this_cpu_ptr(&percpu_mct_tick);
                exynos4_local_timer_setup(&mevt->evt);
                break;
+       case CPU_ONLINE:
+               cpu = (unsigned long)hcpu;
+               if (mct_int_type == MCT_INT_SPI)
+                       irq_set_affinity(mct_irqs[MCT_L0_IRQ + cpu],
+                                               cpumask_of(cpu));
+               break;
        case CPU_DYING:
                mevt = this_cpu_ptr(&percpu_mct_tick);
                exynos4_local_timer_stop(&mevt->evt);
@@ -500,6 +506,8 @@ static void __init exynos4_timer_resources(struct device_node *np, void __iomem
                                         &percpu_mct_tick);
                WARN(err, "MCT: can't request IRQ %d (%d)\n",
                     mct_irqs[MCT_L0_IRQ], err);
+       } else {
+               irq_set_affinity(mct_irqs[MCT_L0_IRQ], cpumask_of(0));
        }
 
        err = register_cpu_notifier(&exynos4_mct_cpu_nb);
index a1260b4..d2c3253 100644 (file)
@@ -986,6 +986,10 @@ static int __init acpi_cpufreq_init(void)
 {
        int ret;
 
+       /* don't keep reloading if cpufreq_driver exists */
+       if (cpufreq_get_current_driver())
+               return 0;
+
        if (acpi_disabled)
                return 0;
 
index 78c49d8..c522a95 100644 (file)
@@ -229,7 +229,7 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev)
        if (of_property_read_u32(np, "clock-latency", &transition_latency))
                transition_latency = CPUFREQ_ETERNAL;
 
-       if (cpu_reg) {
+       if (!IS_ERR(cpu_reg)) {
                struct opp *opp;
                unsigned long min_uV, max_uV;
                int i;
index 89b3c52..04548f7 100644 (file)
@@ -1460,6 +1460,9 @@ unsigned int cpufreq_get(unsigned int cpu)
 {
        unsigned int ret_freq = 0;
 
+       if (cpufreq_disabled() || !cpufreq_driver)
+               return -ENOENT;
+
        if (!down_read_trylock(&cpufreq_rwsem))
                return 0;
 
index d514c15..be5380e 100644 (file)
@@ -457,7 +457,7 @@ err_free_table:
        opp_free_cpufreq_table(dvfs_info->dev, &dvfs_info->freq_table);
 err_put_node:
        of_node_put(np);
-       dev_err(dvfs_info->dev, "%s: failed initialization\n", __func__);
+       dev_err(&pdev->dev, "%s: failed initialization\n", __func__);
        return ret;
 }
 
index 9733f29..32b3479 100644 (file)
@@ -394,7 +394,10 @@ static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
        trace_cpu_frequency(pstate * 100000, cpu->cpu);
 
        cpu->pstate.current_pstate = pstate;
-       wrmsrl(MSR_IA32_PERF_CTL, pstate << 8);
+       if (limits.no_turbo)
+               wrmsrl(MSR_IA32_PERF_CTL, BIT(32) | (pstate << 8));
+       else
+               wrmsrl(MSR_IA32_PERF_CTL, pstate << 8);
 
 }
 
index 19e364f..3f41816 100644 (file)
@@ -113,7 +113,7 @@ static int spear_cpufreq_target(struct cpufreq_policy *policy,
                unsigned int target_freq, unsigned int relation)
 {
        struct cpufreq_freqs freqs;
-       unsigned long newfreq;
+       long newfreq;
        struct clk *srcclk;
        int index, ret, mult = 1;
 
index 526ec77..f238cfd 100644 (file)
@@ -198,6 +198,7 @@ config TI_EDMA
        depends on ARCH_DAVINCI || ARCH_OMAP
        select DMA_ENGINE
        select DMA_VIRTUAL_CHANNELS
+       select TI_PRIV_EDMA
        default n
        help
          Enable support for the TI EDMA controller. This DMA
index ff50ff4..3519111 100644 (file)
@@ -306,6 +306,7 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
                                                EDMA_SLOT_ANY);
                        if (echan->slot[i] < 0) {
                                dev_err(dev, "Failed to allocate slot\n");
+                               kfree(edesc);
                                return NULL;
                        }
                }
@@ -749,6 +750,6 @@ static void __exit edma_exit(void)
 }
 module_exit(edma_exit);
 
-MODULE_AUTHOR("Matt Porter <mporter@ti.com>");
+MODULE_AUTHOR("Matt Porter <matt.porter@linaro.org>");
 MODULE_DESCRIPTION("TI EDMA DMA engine driver");
 MODULE_LICENSE("GPL v2");
index 78f8ca5..55852c0 100644 (file)
@@ -437,17 +437,18 @@ static void dma_irq_handle_channel(struct imxdma_channel *imxdmac)
        struct imxdma_engine *imxdma = imxdmac->imxdma;
        int chno = imxdmac->channel;
        struct imxdma_desc *desc;
+       unsigned long flags;
 
-       spin_lock(&imxdma->lock);
+       spin_lock_irqsave(&imxdma->lock, flags);
        if (list_empty(&imxdmac->ld_active)) {
-               spin_unlock(&imxdma->lock);
+               spin_unlock_irqrestore(&imxdma->lock, flags);
                goto out;
        }
 
        desc = list_first_entry(&imxdmac->ld_active,
                                struct imxdma_desc,
                                node);
-       spin_unlock(&imxdma->lock);
+       spin_unlock_irqrestore(&imxdma->lock, flags);
 
        if (desc->sg) {
                u32 tmp;
@@ -519,7 +520,6 @@ static int imxdma_xfer_desc(struct imxdma_desc *d)
 {
        struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
        struct imxdma_engine *imxdma = imxdmac->imxdma;
-       unsigned long flags;
        int slot = -1;
        int i;
 
@@ -527,7 +527,6 @@ static int imxdma_xfer_desc(struct imxdma_desc *d)
        switch (d->type) {
        case IMXDMA_DESC_INTERLEAVED:
                /* Try to get a free 2D slot */
-               spin_lock_irqsave(&imxdma->lock, flags);
                for (i = 0; i < IMX_DMA_2D_SLOTS; i++) {
                        if ((imxdma->slots_2d[i].count > 0) &&
                        ((imxdma->slots_2d[i].xsr != d->x) ||
@@ -537,10 +536,8 @@ static int imxdma_xfer_desc(struct imxdma_desc *d)
                        slot = i;
                        break;
                }
-               if (slot < 0) {
-                       spin_unlock_irqrestore(&imxdma->lock, flags);
+               if (slot < 0)
                        return -EBUSY;
-               }
 
                imxdma->slots_2d[slot].xsr = d->x;
                imxdma->slots_2d[slot].ysr = d->y;
@@ -549,7 +546,6 @@ static int imxdma_xfer_desc(struct imxdma_desc *d)
 
                imxdmac->slot_2d = slot;
                imxdmac->enabled_2d = true;
-               spin_unlock_irqrestore(&imxdma->lock, flags);
 
                if (slot == IMX_DMA_2D_SLOT_A) {
                        d->config_mem &= ~CCR_MSEL_B;
@@ -625,18 +621,17 @@ static void imxdma_tasklet(unsigned long data)
        struct imxdma_channel *imxdmac = (void *)data;
        struct imxdma_engine *imxdma = imxdmac->imxdma;
        struct imxdma_desc *desc;
+       unsigned long flags;
 
-       spin_lock(&imxdma->lock);
+       spin_lock_irqsave(&imxdma->lock, flags);
 
        if (list_empty(&imxdmac->ld_active)) {
                /* Someone might have called terminate all */
-               goto out;
+               spin_unlock_irqrestore(&imxdma->lock, flags);
+               return;
        }
        desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc, node);
 
-       if (desc->desc.callback)
-               desc->desc.callback(desc->desc.callback_param);
-
        /* If we are dealing with a cyclic descriptor, keep it on ld_active
         * and dont mark the descriptor as complete.
         * Only in non-cyclic cases it would be marked as complete
@@ -663,7 +658,11 @@ static void imxdma_tasklet(unsigned long data)
                                 __func__, imxdmac->channel);
        }
 out:
-       spin_unlock(&imxdma->lock);
+       spin_unlock_irqrestore(&imxdma->lock, flags);
+
+       if (desc->desc.callback)
+               desc->desc.callback(desc->desc.callback_param);
+
 }
 
 static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
@@ -883,7 +882,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
        kfree(imxdmac->sg_list);
 
        imxdmac->sg_list = kcalloc(periods + 1,
-                       sizeof(struct scatterlist), GFP_KERNEL);
+                       sizeof(struct scatterlist), GFP_ATOMIC);
        if (!imxdmac->sg_list)
                return NULL;
 
index 45a5202..ebad845 100644 (file)
@@ -93,6 +93,7 @@ struct hpb_dmae_chan {
        void __iomem *base;
        const struct hpb_dmae_slave_config *cfg;
        char dev_id[16];                /* unique name per DMAC of channel */
+       dma_addr_t slave_addr;
 };
 
 struct hpb_dmae_device {
@@ -432,7 +433,6 @@ hpb_dmae_alloc_chan_resources(struct hpb_dmae_chan *hpb_chan,
                hpb_chan->xfer_mode = XFER_DOUBLE;
        } else {
                dev_err(hpb_chan->shdma_chan.dev, "DCR setting error");
-               shdma_free_irq(&hpb_chan->shdma_chan);
                return -EINVAL;
        }
 
@@ -446,7 +446,8 @@ hpb_dmae_alloc_chan_resources(struct hpb_dmae_chan *hpb_chan,
        return 0;
 }
 
-static int hpb_dmae_set_slave(struct shdma_chan *schan, int slave_id, bool try)
+static int hpb_dmae_set_slave(struct shdma_chan *schan, int slave_id,
+                             dma_addr_t slave_addr, bool try)
 {
        struct hpb_dmae_chan *chan = to_chan(schan);
        const struct hpb_dmae_slave_config *sc =
@@ -457,6 +458,7 @@ static int hpb_dmae_set_slave(struct shdma_chan *schan, int slave_id, bool try)
        if (try)
                return 0;
        chan->cfg = sc;
+       chan->slave_addr = slave_addr ? : sc->addr;
        return hpb_dmae_alloc_chan_resources(chan, sc);
 }
 
@@ -468,7 +470,7 @@ static dma_addr_t hpb_dmae_slave_addr(struct shdma_chan *schan)
 {
        struct hpb_dmae_chan *chan = to_chan(schan);
 
-       return chan->cfg->addr;
+       return chan->slave_addr;
 }
 
 static struct shdma_desc *hpb_dmae_embedded_desc(void *buf, int i)
@@ -614,7 +616,6 @@ static void hpb_dmae_chan_remove(struct hpb_dmae_device *hpbdev)
        shdma_for_each_chan(schan, &hpbdev->shdma_dev, i) {
                BUG_ON(!schan);
 
-               shdma_free_irq(schan);
                shdma_chan_remove(schan);
        }
        dma_dev->chancnt = 0;
index 0ff4355..89675f8 100644 (file)
@@ -63,6 +63,7 @@ struct gpio_bank {
        struct gpio_chip chip;
        struct clk *dbck;
        u32 mod_usage;
+       u32 irq_usage;
        u32 dbck_enable_mask;
        bool dbck_enabled;
        struct device *dev;
@@ -86,6 +87,9 @@ struct gpio_bank {
 #define GPIO_BIT(bank, gpio) (1 << GPIO_INDEX(bank, gpio))
 #define GPIO_MOD_CTRL_BIT      BIT(0)
 
+#define BANK_USED(bank) (bank->mod_usage || bank->irq_usage)
+#define LINE_USED(line, offset) (line & (1 << offset))
+
 static int irq_to_gpio(struct gpio_bank *bank, unsigned int gpio_irq)
 {
        return bank->chip.base + gpio_irq;
@@ -420,15 +424,69 @@ static int _set_gpio_triggering(struct gpio_bank *bank, int gpio,
        return 0;
 }
 
+static void _enable_gpio_module(struct gpio_bank *bank, unsigned offset)
+{
+       if (bank->regs->pinctrl) {
+               void __iomem *reg = bank->base + bank->regs->pinctrl;
+
+               /* Claim the pin for MPU */
+               __raw_writel(__raw_readl(reg) | (1 << offset), reg);
+       }
+
+       if (bank->regs->ctrl && !BANK_USED(bank)) {
+               void __iomem *reg = bank->base + bank->regs->ctrl;
+               u32 ctrl;
+
+               ctrl = __raw_readl(reg);
+               /* Module is enabled, clocks are not gated */
+               ctrl &= ~GPIO_MOD_CTRL_BIT;
+               __raw_writel(ctrl, reg);
+               bank->context.ctrl = ctrl;
+       }
+}
+
+static void _disable_gpio_module(struct gpio_bank *bank, unsigned offset)
+{
+       void __iomem *base = bank->base;
+
+       if (bank->regs->wkup_en &&
+           !LINE_USED(bank->mod_usage, offset) &&
+           !LINE_USED(bank->irq_usage, offset)) {
+               /* Disable wake-up during idle for dynamic tick */
+               _gpio_rmw(base, bank->regs->wkup_en, 1 << offset, 0);
+               bank->context.wake_en =
+                       __raw_readl(bank->base + bank->regs->wkup_en);
+       }
+
+       if (bank->regs->ctrl && !BANK_USED(bank)) {
+               void __iomem *reg = bank->base + bank->regs->ctrl;
+               u32 ctrl;
+
+               ctrl = __raw_readl(reg);
+               /* Module is disabled, clocks are gated */
+               ctrl |= GPIO_MOD_CTRL_BIT;
+               __raw_writel(ctrl, reg);
+               bank->context.ctrl = ctrl;
+       }
+}
+
+static int gpio_is_input(struct gpio_bank *bank, int mask)
+{
+       void __iomem *reg = bank->base + bank->regs->direction;
+
+       return __raw_readl(reg) & mask;
+}
+
 static int gpio_irq_type(struct irq_data *d, unsigned type)
 {
        struct gpio_bank *bank = irq_data_get_irq_chip_data(d);
        unsigned gpio = 0;
        int retval;
        unsigned long flags;
+       unsigned offset;
 
-       if (WARN_ON(!bank->mod_usage))
-               return -EINVAL;
+       if (!BANK_USED(bank))
+               pm_runtime_get_sync(bank->dev);
 
 #ifdef CONFIG_ARCH_OMAP1
        if (d->irq > IH_MPUIO_BASE)
@@ -446,7 +504,17 @@ static int gpio_irq_type(struct irq_data *d, unsigned type)
                return -EINVAL;
 
        spin_lock_irqsave(&bank->lock, flags);
-       retval = _set_gpio_triggering(bank, GPIO_INDEX(bank, gpio), type);
+       offset = GPIO_INDEX(bank, gpio);
+       retval = _set_gpio_triggering(bank, offset, type);
+       if (!LINE_USED(bank->mod_usage, offset)) {
+               _enable_gpio_module(bank, offset);
+               _set_gpio_direction(bank, offset, 1);
+       } else if (!gpio_is_input(bank, 1 << offset)) {
+               spin_unlock_irqrestore(&bank->lock, flags);
+               return -EINVAL;
+       }
+
+       bank->irq_usage |= 1 << GPIO_INDEX(bank, gpio);
        spin_unlock_irqrestore(&bank->lock, flags);
 
        if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
@@ -603,35 +671,19 @@ static int omap_gpio_request(struct gpio_chip *chip, unsigned offset)
         * If this is the first gpio_request for the bank,
         * enable the bank module.
         */
-       if (!bank->mod_usage)
+       if (!BANK_USED(bank))
                pm_runtime_get_sync(bank->dev);
 
        spin_lock_irqsave(&bank->lock, flags);
        /* Set trigger to none. You need to enable the desired trigger with
-        * request_irq() or set_irq_type().
+        * request_irq() or set_irq_type(). Only do this if the IRQ line has
+        * not already been requested.
         */
-       _set_gpio_triggering(bank, offset, IRQ_TYPE_NONE);
-
-       if (bank->regs->pinctrl) {
-               void __iomem *reg = bank->base + bank->regs->pinctrl;
-
-               /* Claim the pin for MPU */
-               __raw_writel(__raw_readl(reg) | (1 << offset), reg);
-       }
-
-       if (bank->regs->ctrl && !bank->mod_usage) {
-               void __iomem *reg = bank->base + bank->regs->ctrl;
-               u32 ctrl;
-
-               ctrl = __raw_readl(reg);
-               /* Module is enabled, clocks are not gated */
-               ctrl &= ~GPIO_MOD_CTRL_BIT;
-               __raw_writel(ctrl, reg);
-               bank->context.ctrl = ctrl;
+       if (!LINE_USED(bank->irq_usage, offset)) {
+               _set_gpio_triggering(bank, offset, IRQ_TYPE_NONE);
+               _enable_gpio_module(bank, offset);
        }
-
        bank->mod_usage |= 1 << offset;
-
        spin_unlock_irqrestore(&bank->lock, flags);
 
        return 0;
@@ -640,31 +692,11 @@ static int omap_gpio_request(struct gpio_chip *chip, unsigned offset)
 static void omap_gpio_free(struct gpio_chip *chip, unsigned offset)
 {
        struct gpio_bank *bank = container_of(chip, struct gpio_bank, chip);
-       void __iomem *base = bank->base;
        unsigned long flags;
 
        spin_lock_irqsave(&bank->lock, flags);
-
-       if (bank->regs->wkup_en) {
-               /* Disable wake-up during idle for dynamic tick */
-               _gpio_rmw(base, bank->regs->wkup_en, 1 << offset, 0);
-               bank->context.wake_en =
-                       __raw_readl(bank->base + bank->regs->wkup_en);
-       }
-
        bank->mod_usage &= ~(1 << offset);
-
-       if (bank->regs->ctrl && !bank->mod_usage) {
-               void __iomem *reg = bank->base + bank->regs->ctrl;
-               u32 ctrl;
-
-               ctrl = __raw_readl(reg);
-               /* Module is disabled, clocks are gated */
-               ctrl |= GPIO_MOD_CTRL_BIT;
-               __raw_writel(ctrl, reg);
-               bank->context.ctrl = ctrl;
-       }
-
+       _disable_gpio_module(bank, offset);
        _reset_gpio(bank, bank->chip.base + offset);
        spin_unlock_irqrestore(&bank->lock, flags);
 
@@ -672,7 +704,7 @@ static void omap_gpio_free(struct gpio_chip *chip, unsigned offset)
         * If this is the last gpio to be freed in the bank,
         * disable the bank module.
         */
-       if (!bank->mod_usage)
+       if (!BANK_USED(bank))
                pm_runtime_put(bank->dev);
 }
 
@@ -762,10 +794,20 @@ static void gpio_irq_shutdown(struct irq_data *d)
        struct gpio_bank *bank = irq_data_get_irq_chip_data(d);
        unsigned int gpio = irq_to_gpio(bank, d->hwirq);
        unsigned long flags;
+       unsigned offset = GPIO_INDEX(bank, gpio);
 
        spin_lock_irqsave(&bank->lock, flags);
+       bank->irq_usage &= ~(1 << offset);
+       _disable_gpio_module(bank, offset);
        _reset_gpio(bank, gpio);
        spin_unlock_irqrestore(&bank->lock, flags);
+
+       /*
+        * If this is the last IRQ to be freed in the bank,
+        * disable the bank module.
+        */
+       if (!BANK_USED(bank))
+               pm_runtime_put(bank->dev);
 }
 
 static void gpio_ack_irq(struct irq_data *d)
@@ -897,13 +939,6 @@ static int gpio_input(struct gpio_chip *chip, unsigned offset)
        return 0;
 }
 
-static int gpio_is_input(struct gpio_bank *bank, int mask)
-{
-       void __iomem *reg = bank->base + bank->regs->direction;
-
-       return __raw_readl(reg) & mask;
-}
-
 static int gpio_get(struct gpio_chip *chip, unsigned offset)
 {
        struct gpio_bank *bank;
@@ -922,13 +957,22 @@ static int gpio_output(struct gpio_chip *chip, unsigned offset, int value)
 {
        struct gpio_bank *bank;
        unsigned long flags;
+       int retval = 0;
 
        bank = container_of(chip, struct gpio_bank, chip);
        spin_lock_irqsave(&bank->lock, flags);
+
+       if (LINE_USED(bank->irq_usage, offset)) {
+                       retval = -EINVAL;
+                       goto exit;
+       }
+
        bank->set_dataout(bank, offset, value);
        _set_gpio_direction(bank, offset, 0);
+
+exit:
        spin_unlock_irqrestore(&bank->lock, flags);
-       return 0;
+       return retval;
 }
 
 static int gpio_debounce(struct gpio_chip *chip, unsigned offset,
@@ -1400,7 +1444,7 @@ void omap2_gpio_prepare_for_idle(int pwr_mode)
        struct gpio_bank *bank;
 
        list_for_each_entry(bank, &omap_gpio_list, node) {
-               if (!bank->mod_usage || !bank->loses_context)
+               if (!BANK_USED(bank) || !bank->loses_context)
                        continue;
 
                bank->power_mode = pwr_mode;
@@ -1414,7 +1458,7 @@ void omap2_gpio_resume_after_idle(void)
        struct gpio_bank *bank;
 
        list_for_each_entry(bank, &omap_gpio_list, node) {
-               if (!bank->mod_usage || !bank->loses_context)
+               if (!BANK_USED(bank) || !bank->loses_context)
                        continue;
 
                pm_runtime_get_sync(bank->dev);
index e3745eb..6038966 100644 (file)
@@ -293,10 +293,9 @@ static void gpio_rcar_parse_pdata(struct gpio_rcar_priv *p)
        if (pdata) {
                p->config = *pdata;
        } else if (IS_ENABLED(CONFIG_OF) && np) {
-               ret = of_parse_phandle_with_args(np, "gpio-ranges",
-                               "#gpio-range-cells", 0, &args);
-               p->config.number_of_pins = ret == 0 && args.args_count == 3
-                                        ? args.args[2]
+               ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0,
+                                                      &args);
+               p->config.number_of_pins = ret == 0 ? args.args[2]
                                         : RCAR_MAX_GPIO_PER_BANK;
                p->config.gpio_base = -1;
        }
index 1688ff5..830f750 100644 (file)
@@ -2925,6 +2925,8 @@ int drm_edid_to_speaker_allocation(struct edid *edid, u8 **sadb)
                        /* Speaker Allocation Data Block */
                        if (dbl == 3) {
                                *sadb = kmalloc(dbl, GFP_KERNEL);
+                               if (!*sadb)
+                                       return -ENOMEM;
                                memcpy(*sadb, &db[1], dbl);
                                count = dbl;
                                break;
index f6f6cc7..3d13ca6 100644 (file)
@@ -416,14 +416,6 @@ static void drm_fb_helper_dpms(struct fb_info *info, int dpms_mode)
                return;
 
        /*
-        * fbdev->blank can be called from irq context in case of a panic.
-        * Since we already have our own special panic handler which will
-        * restore the fbdev console mode completely, just bail out early.
-        */
-       if (oops_in_progress)
-               return;
-
-       /*
         * For each CRTC in this fb, turn the connectors on/off.
         */
        drm_modeset_lock_all(dev);
index 92babac..2db731f 100644 (file)
@@ -204,6 +204,7 @@ static int psb_gtt_attach_pages(struct gtt_range *gt)
        if (IS_ERR(pages))
                return PTR_ERR(pages);
 
+       gt->npage = gt->gem.size / PAGE_SIZE;
        gt->pages = pages;
 
        return 0;
index b1f8fc6..60e8404 100644 (file)
@@ -707,8 +707,7 @@ tda998x_encoder_dpms(struct drm_encoder *encoder, int mode)
                reg_write(encoder, REG_VIP_CNTRL_2, priv->vip_cntrl_2);
                break;
        case DRM_MODE_DPMS_OFF:
-               /* disable audio and video ports */
-               reg_write(encoder, REG_ENA_AP, 0x00);
+               /* disable video ports */
                reg_write(encoder, REG_ENA_VP_0, 0x00);
                reg_write(encoder, REG_ENA_VP_1, 0x00);
                reg_write(encoder, REG_ENA_VP_2, 0x00);
index c27a210..d5c784d 100644 (file)
@@ -1290,12 +1290,9 @@ static int i915_load_modeset_init(struct drm_device *dev)
         * then we do not take part in VGA arbitration and the
         * vga_client_register() fails with -ENODEV.
         */
-       if (!HAS_PCH_SPLIT(dev)) {
-               ret = vga_client_register(dev->pdev, dev, NULL,
-                                         i915_vga_set_decode);
-               if (ret && ret != -ENODEV)
-                       goto out;
-       }
+       ret = vga_client_register(dev->pdev, dev, NULL, i915_vga_set_decode);
+       if (ret && ret != -ENODEV)
+               goto out;
 
        intel_register_dsm_handler();
 
@@ -1351,12 +1348,6 @@ static int i915_load_modeset_init(struct drm_device *dev)
         */
        intel_fbdev_initial_config(dev);
 
-       /*
-        * Must do this after fbcon init so that
-        * vgacon_save_screen() works during the handover.
-        */
-       i915_disable_vga_mem(dev);
-
        /* Only enable hotplug handling once the fbdev is fully set up. */
        dev_priv->enable_hotplug_processing = true;
 
index df9253d..cdfb9da 100644 (file)
@@ -4800,10 +4800,10 @@ i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc)
 
        if (!mutex_trylock(&dev->struct_mutex)) {
                if (!mutex_is_locked_by(&dev->struct_mutex, current))
-                       return SHRINK_STOP;
+                       return 0;
 
                if (dev_priv->mm.shrinker_no_lock_stealing)
-                       return SHRINK_STOP;
+                       return 0;
 
                unlock = false;
        }
@@ -4901,10 +4901,10 @@ i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc)
 
        if (!mutex_trylock(&dev->struct_mutex)) {
                if (!mutex_is_locked_by(&dev->struct_mutex, current))
-                       return 0;
+                       return SHRINK_STOP;
 
                if (dev_priv->mm.shrinker_no_lock_stealing)
-                       return 0;
+                       return SHRINK_STOP;
 
                unlock = false;
        }
index aba9d74..dae364f 100644 (file)
@@ -143,8 +143,10 @@ static void i915_error_vprintf(struct drm_i915_error_state_buf *e,
 
        /* Seek the first printf which is hits start position */
        if (e->pos < e->start) {
-               len = vsnprintf(NULL, 0, f, args);
-               if (!__i915_error_seek(e, len))
+               va_list tmp;
+
+               va_copy(tmp, args);
+               if (!__i915_error_seek(e, vsnprintf(NULL, 0, f, tmp)))
                        return;
        }
 
index c159e1a..38f96f6 100644 (file)
 #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG         0x9030
 #define  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB      (1<<11)
 
+#define HSW_SCRATCH1                           0xb038
+#define  HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE  (1<<27)
+
 #define HSW_FUSE_STRAP         0x42014
 #define  HSW_CDCLK_LIMIT       (1 << 24)
 
 #define GEN7_ROW_CHICKEN2_GT2          0xf4f4
 #define   DOP_CLOCK_GATING_DISABLE     (1<<0)
 
+#define HSW_ROW_CHICKEN3               0xe49c
+#define  HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE    (1 << 6)
+
 #define G4X_AUD_VID_DID                        (dev_priv->info->display_mmio_offset + 0x62020)
 #define INTEL_AUDIO_DEVCL              0x808629FB
 #define INTEL_AUDIO_DEVBLC             0x80862801
index d8a1d98..581fb4b 100644 (file)
@@ -3941,8 +3941,6 @@ static void intel_connector_check_state(struct intel_connector *connector)
  * consider. */
 void intel_connector_dpms(struct drm_connector *connector, int mode)
 {
-       struct intel_encoder *encoder = intel_attached_encoder(connector);
-
        /* All the simple cases only support two dpms states. */
        if (mode != DRM_MODE_DPMS_ON)
                mode = DRM_MODE_DPMS_OFF;
@@ -3953,10 +3951,8 @@ void intel_connector_dpms(struct drm_connector *connector, int mode)
        connector->dpms = mode;
 
        /* Only need to change hw state when actually enabled */
-       if (encoder->base.crtc)
-               intel_encoder_dpms(encoder, mode);
-       else
-               WARN_ON(encoder->connectors_active != false);
+       if (connector->encoder)
+               intel_encoder_dpms(to_intel_encoder(connector->encoder), mode);
 
        intel_modeset_check_state(connector->dev);
 }
@@ -4775,6 +4771,10 @@ static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc)
 
        pipeconf = 0;
 
+       if (dev_priv->quirks & QUIRK_PIPEA_FORCE &&
+           I915_READ(PIPECONF(intel_crtc->pipe)) & PIPECONF_ENABLE)
+               pipeconf |= PIPECONF_ENABLE;
+
        if (intel_crtc->pipe == 0 && INTEL_INFO(dev)->gen < 4) {
                /* Enable pixel doubling when the dot clock is > 90% of the (display)
                 * core speed.
@@ -10045,33 +10045,6 @@ static void i915_disable_vga(struct drm_device *dev)
        POSTING_READ(vga_reg);
 }
 
-static void i915_enable_vga_mem(struct drm_device *dev)
-{
-       /* Enable VGA memory on Intel HD */
-       if (HAS_PCH_SPLIT(dev)) {
-               vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO);
-               outb(inb(VGA_MSR_READ) | VGA_MSR_MEM_EN, VGA_MSR_WRITE);
-               vga_set_legacy_decoding(dev->pdev, VGA_RSRC_LEGACY_IO |
-                                                  VGA_RSRC_LEGACY_MEM |
-                                                  VGA_RSRC_NORMAL_IO |
-                                                  VGA_RSRC_NORMAL_MEM);
-               vga_put(dev->pdev, VGA_RSRC_LEGACY_IO);
-       }
-}
-
-void i915_disable_vga_mem(struct drm_device *dev)
-{
-       /* Disable VGA memory on Intel HD */
-       if (HAS_PCH_SPLIT(dev)) {
-               vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO);
-               outb(inb(VGA_MSR_READ) & ~VGA_MSR_MEM_EN, VGA_MSR_WRITE);
-               vga_set_legacy_decoding(dev->pdev, VGA_RSRC_LEGACY_IO |
-                                                  VGA_RSRC_NORMAL_IO |
-                                                  VGA_RSRC_NORMAL_MEM);
-               vga_put(dev->pdev, VGA_RSRC_LEGACY_IO);
-       }
-}
-
 void intel_modeset_init_hw(struct drm_device *dev)
 {
        intel_init_power_well(dev);
@@ -10350,7 +10323,6 @@ void i915_redisable_vga(struct drm_device *dev)
        if (I915_READ(vga_reg) != VGA_DISP_DISABLE) {
                DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n");
                i915_disable_vga(dev);
-               i915_disable_vga_mem(dev);
        }
 }
 
@@ -10564,8 +10536,6 @@ void intel_modeset_cleanup(struct drm_device *dev)
 
        intel_disable_fbc(dev);
 
-       i915_enable_vga_mem(dev);
-
        intel_disable_gt_powersave(dev);
 
        ironlake_teardown_rc6(dev);
index 2151d13..2c555f9 100644 (file)
@@ -588,7 +588,18 @@ intel_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
                        DRM_DEBUG_KMS("aux_ch native nack\n");
                        return -EREMOTEIO;
                case AUX_NATIVE_REPLY_DEFER:
-                       udelay(100);
+                       /*
+                        * For now, just give more slack to branch devices. We
+                        * could check the DPCD for I2C bit rate capabilities,
+                        * and if available, adjust the interval. We could also
+                        * be more careful with DP-to-Legacy adapters where a
+                        * long legacy cable may force very low I2C bit rates.
+                        */
+                       if (intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] &
+                           DP_DWN_STRM_PORT_PRESENT)
+                               usleep_range(500, 600);
+                       else
+                               usleep_range(300, 400);
                        continue;
                default:
                        DRM_ERROR("aux_ch invalid native reply 0x%02x\n",
@@ -1456,7 +1467,7 @@ static void intel_edp_psr_setup(struct intel_dp *intel_dp)
 
        /* Avoid continuous PSR exit by masking memup and hpd */
        I915_WRITE(EDP_PSR_DEBUG_CTL, EDP_PSR_DEBUG_MASK_MEMUP |
-                  EDP_PSR_DEBUG_MASK_HPD);
+                  EDP_PSR_DEBUG_MASK_HPD | EDP_PSR_DEBUG_MASK_LPSP);
 
        intel_dp->psr_setup_done = true;
 }
index 28cae80..9b7b68f 100644 (file)
@@ -793,6 +793,5 @@ extern void hsw_pc8_disable_interrupts(struct drm_device *dev);
 extern void hsw_pc8_restore_interrupts(struct drm_device *dev);
 extern void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv);
 extern void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv);
-extern void i915_disable_vga_mem(struct drm_device *dev);
 
 #endif /* __INTEL_DRV_H__ */
index dd176b7..f4c5e95 100644 (file)
@@ -3864,8 +3864,6 @@ static void valleyview_enable_rps(struct drm_device *dev)
                                      dev_priv->rps.rpe_delay),
                         dev_priv->rps.rpe_delay);
 
-       INIT_DELAYED_WORK(&dev_priv->rps.vlv_work, vlv_rps_timer_work);
-
        valleyview_set_rps(dev_priv->dev, dev_priv->rps.rpe_delay);
 
        gen6_enable_rps_interrupts(dev);
@@ -4955,6 +4953,11 @@ static void haswell_init_clock_gating(struct drm_device *dev)
        I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
                        GEN7_WA_L3_CHICKEN_MODE);
 
+       /* L3 caching of data atomics doesn't work -- disable it. */
+       I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
+       I915_WRITE(HSW_ROW_CHICKEN3,
+                  _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
+
        /* This is required by WaCatErrorRejectionIssue:hsw */
        I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
                        I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
@@ -5681,5 +5684,7 @@ void intel_pm_init(struct drm_device *dev)
 
        INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
                          intel_gen6_powersave_work);
+
+       INIT_DELAYED_WORK(&dev_priv->rps.vlv_work, vlv_rps_timer_work);
 }
 
index f2c6d79..dd6f84b 100644 (file)
@@ -916,6 +916,14 @@ intel_tv_compute_config(struct intel_encoder *encoder,
        DRM_DEBUG_KMS("forcing bpc to 8 for TV\n");
        pipe_config->pipe_bpp = 8*3;
 
+       /* TV has it's own notion of sync and other mode flags, so clear them. */
+       pipe_config->adjusted_mode.flags = 0;
+
+       /*
+        * FIXME: We don't check whether the input mode is actually what we want
+        * or whether userspace is doing something stupid.
+        */
+
        return true;
 }
 
index 5db5bba..bc7fd11 100644 (file)
@@ -19,8 +19,6 @@
 #include "msm_drv.h"
 #include "mdp4_kms.h"
 
-#include <mach/iommu.h>
-
 static struct mdp4_platform_config *mdp4_get_config(struct platform_device *dev);
 
 static int mdp4_hw_init(struct msm_kms *kms)
index 008d772..b3a2f16 100644 (file)
@@ -18,8 +18,6 @@
 #include "msm_drv.h"
 #include "msm_gpu.h"
 
-#include <mach/iommu.h>
-
 static void msm_fb_output_poll_changed(struct drm_device *dev)
 {
        struct msm_drm_private *priv = dev->dev_private;
@@ -62,6 +60,8 @@ int msm_iommu_attach(struct drm_device *dev, struct iommu_domain *iommu,
        int i, ret;
 
        for (i = 0; i < cnt; i++) {
+               /* TODO maybe some day msm iommu won't require this hack: */
+               struct device *msm_iommu_get_ctx(const char *ctx_name);
                struct device *ctx = msm_iommu_get_ctx(names[i]);
                if (!ctx)
                        continue;
@@ -199,7 +199,7 @@ static int msm_load(struct drm_device *dev, unsigned long flags)
                 * imx drm driver on iMX5
                 */
                dev_err(dev->dev, "failed to load kms\n");
-               ret = PTR_ERR(priv->kms);
+               ret = PTR_ERR(kms);
                goto fail;
        }
 
@@ -697,7 +697,7 @@ static struct drm_driver msm_driver = {
        .gem_vm_ops         = &vm_ops,
        .dumb_create        = msm_gem_dumb_create,
        .dumb_map_offset    = msm_gem_dumb_map_offset,
-       .dumb_destroy       = msm_gem_dumb_destroy,
+       .dumb_destroy       = drm_gem_dumb_destroy,
 #ifdef CONFIG_DEBUG_FS
        .debugfs_init       = msm_debugfs_init,
        .debugfs_cleanup    = msm_debugfs_cleanup,
index 29eacfa..2bae46c 100644 (file)
@@ -319,13 +319,6 @@ int msm_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
                        MSM_BO_SCANOUT | MSM_BO_WC, &args->handle);
 }
 
-int msm_gem_dumb_destroy(struct drm_file *file, struct drm_device *dev,
-               uint32_t handle)
-{
-       /* No special work needed, drop the reference and see what falls out */
-       return drm_gem_handle_delete(file, handle);
-}
-
 int msm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
                uint32_t handle, uint64_t *offset)
 {
index 37712a6..e290cfa 100644 (file)
@@ -113,7 +113,7 @@ nouveau_mc_create_(struct nouveau_object *parent, struct nouveau_object *engine,
                pmc->use_msi = false;
                break;
        default:
-               pmc->use_msi = nouveau_boolopt(device->cfgopt, "NvMSI", true);
+               pmc->use_msi = nouveau_boolopt(device->cfgopt, "NvMSI", false);
                if (pmc->use_msi) {
                        pmc->use_msi = pci_enable_msi(device->pdev) == 0;
                        if (pmc->use_msi) {
index 05ff315..9b6950d 100644 (file)
@@ -1168,6 +1168,23 @@ static const struct radeon_blacklist_clocks btc_blacklist_clocks[] =
         { 25000, 30000, RADEON_SCLK_UP }
 };
 
+void btc_get_max_clock_from_voltage_dependency_table(struct radeon_clock_voltage_dependency_table *table,
+                                                    u32 *max_clock)
+{
+       u32 i, clock = 0;
+
+       if ((table == NULL) || (table->count == 0)) {
+               *max_clock = clock;
+               return;
+       }
+
+       for (i = 0; i < table->count; i++) {
+               if (clock < table->entries[i].clk)
+                       clock = table->entries[i].clk;
+       }
+       *max_clock = clock;
+}
+
 void btc_apply_voltage_dependency_rules(struct radeon_clock_voltage_dependency_table *table,
                                        u32 clock, u16 max_voltage, u16 *voltage)
 {
@@ -1913,7 +1930,7 @@ static int btc_set_mc_special_registers(struct radeon_device *rdev,
                        }
                        j++;
 
-                       if (j > SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE)
+                       if (j >= SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE)
                                return -EINVAL;
 
                        tmp = RREG32(MC_PMG_CMD_MRS);
@@ -1928,7 +1945,7 @@ static int btc_set_mc_special_registers(struct radeon_device *rdev,
                        }
                        j++;
 
-                       if (j > SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE)
+                       if (j >= SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE)
                                return -EINVAL;
                        break;
                case MC_SEQ_RESERVE_M >> 2:
@@ -1942,7 +1959,7 @@ static int btc_set_mc_special_registers(struct radeon_device *rdev,
                        }
                        j++;
 
-                       if (j > SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE)
+                       if (j >= SMC_EVERGREEN_MC_REGISTER_ARRAY_SIZE)
                                return -EINVAL;
                        break;
                default:
@@ -2080,6 +2097,7 @@ static void btc_apply_state_adjust_rules(struct radeon_device *rdev,
        bool disable_mclk_switching;
        u32 mclk, sclk;
        u16 vddc, vddci;
+       u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc;
 
        if ((rdev->pm.dpm.new_active_crtc_count > 1) ||
            btc_dpm_vblank_too_short(rdev))
@@ -2121,6 +2139,39 @@ static void btc_apply_state_adjust_rules(struct radeon_device *rdev,
                        ps->low.vddci = max_limits->vddci;
        }
 
+       /* limit clocks to max supported clocks based on voltage dependency tables */
+       btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk,
+                                                       &max_sclk_vddc);
+       btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk,
+                                                       &max_mclk_vddci);
+       btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk,
+                                                       &max_mclk_vddc);
+
+       if (max_sclk_vddc) {
+               if (ps->low.sclk > max_sclk_vddc)
+                       ps->low.sclk = max_sclk_vddc;
+               if (ps->medium.sclk > max_sclk_vddc)
+                       ps->medium.sclk = max_sclk_vddc;
+               if (ps->high.sclk > max_sclk_vddc)
+                       ps->high.sclk = max_sclk_vddc;
+       }
+       if (max_mclk_vddci) {
+               if (ps->low.mclk > max_mclk_vddci)
+                       ps->low.mclk = max_mclk_vddci;
+               if (ps->medium.mclk > max_mclk_vddci)
+                       ps->medium.mclk = max_mclk_vddci;
+               if (ps->high.mclk > max_mclk_vddci)
+                       ps->high.mclk = max_mclk_vddci;
+       }
+       if (max_mclk_vddc) {
+               if (ps->low.mclk > max_mclk_vddc)
+                       ps->low.mclk = max_mclk_vddc;
+               if (ps->medium.mclk > max_mclk_vddc)
+                       ps->medium.mclk = max_mclk_vddc;
+               if (ps->high.mclk > max_mclk_vddc)
+                       ps->high.mclk = max_mclk_vddc;
+       }
+
        /* XXX validate the min clocks required for display */
 
        if (disable_mclk_switching) {
index 1a15e0e..3b6f12b 100644 (file)
@@ -46,6 +46,8 @@ void btc_adjust_clock_combinations(struct radeon_device *rdev,
                                   struct rv7xx_pl *pl);
 void btc_apply_voltage_dependency_rules(struct radeon_clock_voltage_dependency_table *table,
                                        u32 clock, u16 max_voltage, u16 *voltage);
+void btc_get_max_clock_from_voltage_dependency_table(struct radeon_clock_voltage_dependency_table *table,
+                                                    u32 *max_clock);
 void btc_apply_voltage_delta_rules(struct radeon_device *rdev,
                                   u16 max_vddc, u16 max_vddci,
                                   u16 *vddc, u16 *vddci);
index 8996274..51e947a 100644 (file)
@@ -146,6 +146,8 @@ static const struct ci_pt_config_reg didt_config_ci[] =
 };
 
 extern u8 rv770_get_memory_module_index(struct radeon_device *rdev);
+extern void btc_get_max_clock_from_voltage_dependency_table(struct radeon_clock_voltage_dependency_table *table,
+                                                           u32 *max_clock);
 extern int ni_copy_and_switch_arb_sets(struct radeon_device *rdev,
                                       u32 arb_freq_src, u32 arb_freq_dest);
 extern u8 si_get_ddr3_mclk_frequency_ratio(u32 memory_clock);
@@ -712,6 +714,7 @@ static void ci_apply_state_adjust_rules(struct radeon_device *rdev,
        struct radeon_clock_and_voltage_limits *max_limits;
        bool disable_mclk_switching;
        u32 sclk, mclk;
+       u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc;
        int i;
 
        if ((rdev->pm.dpm.new_active_crtc_count > 1) ||
@@ -739,6 +742,29 @@ static void ci_apply_state_adjust_rules(struct radeon_device *rdev,
                }
        }
 
+       /* limit clocks to max supported clocks based on voltage dependency tables */
+       btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk,
+                                                       &max_sclk_vddc);
+       btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk,
+                                                       &max_mclk_vddci);
+       btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk,
+                                                       &max_mclk_vddc);
+
+       for (i = 0; i < ps->performance_level_count; i++) {
+               if (max_sclk_vddc) {
+                       if (ps->performance_levels[i].sclk > max_sclk_vddc)
+                               ps->performance_levels[i].sclk = max_sclk_vddc;
+               }
+               if (max_mclk_vddci) {
+                       if (ps->performance_levels[i].mclk > max_mclk_vddci)
+                               ps->performance_levels[i].mclk = max_mclk_vddci;
+               }
+               if (max_mclk_vddc) {
+                       if (ps->performance_levels[i].mclk > max_mclk_vddc)
+                               ps->performance_levels[i].mclk = max_mclk_vddc;
+               }
+       }
+
        /* XXX validate the min clocks required for display */
 
        if (disable_mclk_switching) {
index adbdb65..b874ccd 100644 (file)
@@ -77,6 +77,8 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev);
 static void cik_program_aspm(struct radeon_device *rdev);
 static void cik_init_pg(struct radeon_device *rdev);
 static void cik_init_cg(struct radeon_device *rdev);
+static void cik_fini_pg(struct radeon_device *rdev);
+static void cik_fini_cg(struct radeon_device *rdev);
 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
                                          bool enable);
 
@@ -2845,10 +2847,8 @@ static void cik_gpu_init(struct radeon_device *rdev)
                rdev->config.cik.tile_config |= (3 << 0);
                break;
        }
-       if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
-               rdev->config.cik.tile_config |= 1 << 4;
-       else
-               rdev->config.cik.tile_config |= 0 << 4;
+       rdev->config.cik.tile_config |=
+               ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
        rdev->config.cik.tile_config |=
                ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
        rdev->config.cik.tile_config |=
@@ -4187,6 +4187,10 @@ static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
        dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
 
+       /* disable CG/PG */
+       cik_fini_pg(rdev);
+       cik_fini_cg(rdev);
+
        /* stop the rlc */
        cik_rlc_stop(rdev);
 
@@ -4456,8 +4460,8 @@ static int cik_mc_init(struct radeon_device *rdev)
        rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
        rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
        /* size in MB on si */
-       rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
-       rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
+       rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+       rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
        rdev->mc.visible_vram_size = rdev->mc.aper_size;
        si_vram_gtt_location(rdev, &rdev->mc);
        radeon_update_bandwidth_info(rdev);
@@ -4735,12 +4739,13 @@ static void cik_vm_decode_fault(struct radeon_device *rdev,
        u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
        u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
        u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
-       char *block = (char *)&mc_client;
+       char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
+               (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
 
-       printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
+       printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
               protections, vmid, addr,
               (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
-              block, mc_id);
+              block, mc_client, mc_id);
 }
 
 /**
index 555164e..b5c67a9 100644 (file)
@@ -3131,7 +3131,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
                rdev->config.evergreen.sx_max_export_size = 256;
                rdev->config.evergreen.sx_max_export_pos_size = 64;
                rdev->config.evergreen.sx_max_export_smx_size = 192;
-               rdev->config.evergreen.max_hw_contexts = 8;
+               rdev->config.evergreen.max_hw_contexts = 4;
                rdev->config.evergreen.sq_num_cf_insts = 2;
 
                rdev->config.evergreen.sc_prim_fifo_size = 0x40;
index f71ce39..f815c20 100644 (file)
@@ -288,8 +288,7 @@ void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode
        /* fglrx clears sth in AFMT_AUDIO_PACKET_CONTROL2 here */
 
        WREG32(HDMI_ACR_PACKET_CONTROL + offset,
-              HDMI_ACR_AUTO_SEND | /* allow hw to sent ACR packets when required */
-              HDMI_ACR_SOURCE); /* select SW CTS value */
+              HDMI_ACR_AUTO_SEND); /* allow hw to sent ACR packets when required */
 
        evergreen_hdmi_update_ACR(encoder, mode->clock);
 
index 8768fd6..4f6d296 100644 (file)
  * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
  */
 #              define PACKET3_CP_DMA_DST_SEL(x)    ((x) << 20)
-                /* 0 - SRC_ADDR
+                /* 0 - DST_ADDR
                 * 1 - GDS
                 */
 #              define PACKET3_CP_DMA_ENGINE(x)     ((x) << 27)
 #              define PACKET3_CP_DMA_CP_SYNC       (1 << 31)
 /* COMMAND */
 #              define PACKET3_CP_DMA_DIS_WC        (1 << 21)
-#              define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
+#              define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22)
                 /* 0 - none
                 * 1 - 8 in 16
                 * 2 - 8 in 32
index 6c398a4..f263390 100644 (file)
@@ -787,6 +787,7 @@ static void ni_apply_state_adjust_rules(struct radeon_device *rdev,
        bool disable_mclk_switching;
        u32 mclk, sclk;
        u16 vddc, vddci;
+       u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc;
        int i;
 
        if ((rdev->pm.dpm.new_active_crtc_count > 1) ||
@@ -813,6 +814,29 @@ static void ni_apply_state_adjust_rules(struct radeon_device *rdev,
                }
        }
 
+       /* limit clocks to max supported clocks based on voltage dependency tables */
+       btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk,
+                                                       &max_sclk_vddc);
+       btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk,
+                                                       &max_mclk_vddci);
+       btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk,
+                                                       &max_mclk_vddc);
+
+       for (i = 0; i < ps->performance_level_count; i++) {
+               if (max_sclk_vddc) {
+                       if (ps->performance_levels[i].sclk > max_sclk_vddc)
+                               ps->performance_levels[i].sclk = max_sclk_vddc;
+               }
+               if (max_mclk_vddci) {
+                       if (ps->performance_levels[i].mclk > max_mclk_vddci)
+                               ps->performance_levels[i].mclk = max_mclk_vddci;
+               }
+               if (max_mclk_vddc) {
+                       if (ps->performance_levels[i].mclk > max_mclk_vddc)
+                               ps->performance_levels[i].mclk = max_mclk_vddc;
+               }
+       }
+
        /* XXX validate the min clocks required for display */
 
        if (disable_mclk_switching) {
index 2417571..d713330 100644 (file)
@@ -2933,9 +2933,11 @@ static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
        seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
        seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
        seq_printf(m, "%u dwords in ring\n", count);
-       for (j = 0; j <= count; j++) {
-               i = (rdp + j) & ring->ptr_mask;
-               seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
+       if (ring->ready) {
+               for (j = 0; j <= count; j++) {
+                       i = (rdp + j) & ring->ptr_mask;
+                       seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
+               }
        }
        return 0;
 }
index e65f211..5513d8f 100644 (file)
@@ -1084,7 +1084,7 @@ int r600_parse_extended_power_table(struct radeon_device *rdev)
                                rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[i].dclk =
                                        le16_to_cpu(uvd_clk->usDClkLow) | (uvd_clk->ucDClkHigh << 16);
                                rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[i].v =
-                                       le16_to_cpu(limits->entries[i].usVoltage);
+                                       le16_to_cpu(entry->usVoltage);
                                entry = (ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record *)
                                        ((u8 *)entry + sizeof(ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record));
                        }
index f443010..5b72931 100644 (file)
@@ -57,15 +57,15 @@ enum r600_hdmi_iec_status_bits {
 static const struct radeon_hdmi_acr r600_hdmi_predefined_acr[] = {
     /*      32kHz        44.1kHz       48kHz    */
     /* Clock      N     CTS      N     CTS      N     CTS */
-    {  25174,  4576,  28125,  7007,  31250,  6864,  28125 }, /*  25,20/1.001 MHz */
+    {  25175,  4576,  28125,  7007,  31250,  6864,  28125 }, /*  25,20/1.001 MHz */
     {  25200,  4096,  25200,  6272,  28000,  6144,  25200 }, /*  25.20       MHz */
     {  27000,  4096,  27000,  6272,  30000,  6144,  27000 }, /*  27.00       MHz */
     {  27027,  4096,  27027,  6272,  30030,  6144,  27027 }, /*  27.00*1.001 MHz */
     {  54000,  4096,  54000,  6272,  60000,  6144,  54000 }, /*  54.00       MHz */
     {  54054,  4096,  54054,  6272,  60060,  6144,  54054 }, /*  54.00*1.001 MHz */
-    {  74175, 11648, 210937, 17836, 234375, 11648, 140625 }, /*  74.25/1.001 MHz */
+    {  74176, 11648, 210937, 17836, 234375, 11648, 140625 }, /*  74.25/1.001 MHz */
     {  74250,  4096,  74250,  6272,  82500,  6144,  74250 }, /*  74.25       MHz */
-    { 148351, 11648, 421875,  8918, 234375,  5824, 140625 }, /* 148.50/1.001 MHz */
+    { 148352, 11648, 421875,  8918, 234375,  5824, 140625 }, /* 148.50/1.001 MHz */
     { 148500,  4096, 148500,  6272, 165000,  6144, 148500 }, /* 148.50       MHz */
     {      0,  4096,      0,  6272,      0,  6144,      0 }  /* Other */
 };
@@ -75,8 +75,15 @@ static const struct radeon_hdmi_acr r600_hdmi_predefined_acr[] = {
  */
 static void r600_hdmi_calc_cts(uint32_t clock, int *CTS, int N, int freq)
 {
-       if (*CTS == 0)
-               *CTS = clock * N / (128 * freq) * 1000;
+       u64 n;
+       u32 d;
+
+       if (*CTS == 0) {
+               n = (u64)clock * (u64)N * 1000ULL;
+               d = 128 * freq;
+               do_div(n, d);
+               *CTS = n;
+       }
        DRM_DEBUG("Using ACR timing N=%d CTS=%d for frequency %d\n",
                  N, *CTS, freq);
 }
@@ -257,10 +264,7 @@ void r600_audio_set_dto(struct drm_encoder *encoder, u32 clock)
         * number (coefficient of two integer numbers.  DCCG_AUDIO_DTOx_PHASE
         * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
         */
-       if (ASIC_IS_DCE3(rdev)) {
-               /* according to the reg specs, this should DCE3.2 only, but in
-                * practice it seems to cover DCE3.0 as well.
-                */
+       if (ASIC_IS_DCE32(rdev)) {
                if (dig->dig_encoder == 0) {
                        dto_cntl = RREG32(DCCG_AUDIO_DTO0_CNTL) & ~DCCG_AUDIO_DTO_WALLCLOCK_RATIO_MASK;
                        dto_cntl |= DCCG_AUDIO_DTO_WALLCLOCK_RATIO(wallclock_ratio);
@@ -276,8 +280,21 @@ void r600_audio_set_dto(struct drm_encoder *encoder, u32 clock)
                        WREG32(DCCG_AUDIO_DTO1_MODULE, dto_modulo);
                        WREG32(DCCG_AUDIO_DTO_SELECT, 1); /* select DTO1 */
                }
+       } else if (ASIC_IS_DCE3(rdev)) {
+               /* according to the reg specs, this should DCE3.2 only, but in
+                * practice it seems to cover DCE3.0/3.1 as well.
+                */
+               if (dig->dig_encoder == 0) {
+                       WREG32(DCCG_AUDIO_DTO0_PHASE, base_rate * 100);
+                       WREG32(DCCG_AUDIO_DTO0_MODULE, clock * 100);
+                       WREG32(DCCG_AUDIO_DTO_SELECT, 0); /* select DTO0 */
+               } else {
+                       WREG32(DCCG_AUDIO_DTO1_PHASE, base_rate * 100);
+                       WREG32(DCCG_AUDIO_DTO1_MODULE, clock * 100);
+                       WREG32(DCCG_AUDIO_DTO_SELECT, 1); /* select DTO1 */
+               }
        } else {
-               /* according to the reg specs, this should be DCE2.0 and DCE3.0 */
+               /* according to the reg specs, this should be DCE2.0 and DCE3.0/3.1 */
                WREG32(AUDIO_DTO, AUDIO_DTO_PHASE(base_rate / 10) |
                       AUDIO_DTO_MODULE(clock / 10));
        }
@@ -434,8 +451,8 @@ void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mod
        }
 
        WREG32(HDMI0_ACR_PACKET_CONTROL + offset,
-              HDMI0_ACR_AUTO_SEND | /* allow hw to sent ACR packets when required */
-              HDMI0_ACR_SOURCE); /* select SW CTS value */
+              HDMI0_ACR_SOURCE | /* select SW CTS value - XXX verify that hw CTS works on all families */
+              HDMI0_ACR_AUTO_SEND); /* allow hw to sent ACR packets when required */
 
        WREG32(HDMI0_VBI_PACKET_CONTROL + offset,
               HDMI0_NULL_SEND | /* send null packets when required */
index e673fe2..7b3c7b5 100644 (file)
  */
 #              define PACKET3_CP_DMA_CP_SYNC       (1 << 31)
 /* COMMAND */
-#              define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
+#              define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22)
                 /* 0 - none
                 * 1 - 8 in 16
                 * 2 - 8 in 32
index 5003385..8f7e045 100644 (file)
@@ -1004,6 +1004,8 @@ static struct radeon_asic rv6xx_asic = {
                .wait_for_vblank = &avivo_wait_for_vblank,
                .set_backlight_level = &atombios_set_backlight_level,
                .get_backlight_level = &atombios_get_backlight_level,
+               .hdmi_enable = &r600_hdmi_enable,
+               .hdmi_setmode = &r600_hdmi_setmode,
        },
        .copy = {
                .blit = &r600_copy_cpdma,
index 404e25d..f79ee18 100644 (file)
@@ -1367,6 +1367,7 @@ bool radeon_atombios_get_ppll_ss_info(struct radeon_device *rdev,
        int index = GetIndexIntoMasterTable(DATA, PPLL_SS_Info);
        uint16_t data_offset, size;
        struct _ATOM_SPREAD_SPECTRUM_INFO *ss_info;
+       struct _ATOM_SPREAD_SPECTRUM_ASSIGNMENT *ss_assign;
        uint8_t frev, crev;
        int i, num_indices;
 
@@ -1378,18 +1379,21 @@ bool radeon_atombios_get_ppll_ss_info(struct radeon_device *rdev,
 
                num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
                        sizeof(ATOM_SPREAD_SPECTRUM_ASSIGNMENT);
-
+               ss_assign = (struct _ATOM_SPREAD_SPECTRUM_ASSIGNMENT*)
+                       ((u8 *)&ss_info->asSS_Info[0]);
                for (i = 0; i < num_indices; i++) {
-                       if (ss_info->asSS_Info[i].ucSS_Id == id) {
+                       if (ss_assign->ucSS_Id == id) {
                                ss->percentage =
-                                       le16_to_cpu(ss_info->asSS_Info[i].usSpreadSpectrumPercentage);
-                               ss->type = ss_info->asSS_Info[i].ucSpreadSpectrumType;
-                               ss->step = ss_info->asSS_Info[i].ucSS_Step;
-                               ss->delay = ss_info->asSS_Info[i].ucSS_Delay;
-                               ss->range = ss_info->asSS_Info[i].ucSS_Range;
-                               ss->refdiv = ss_info->asSS_Info[i].ucRecommendedRef_Div;
+                                       le16_to_cpu(ss_assign->usSpreadSpectrumPercentage);
+                               ss->type = ss_assign->ucSpreadSpectrumType;
+                               ss->step = ss_assign->ucSS_Step;
+                               ss->delay = ss_assign->ucSS_Delay;
+                               ss->range = ss_assign->ucSS_Range;
+                               ss->refdiv = ss_assign->ucRecommendedRef_Div;
                                return true;
                        }
+                       ss_assign = (struct _ATOM_SPREAD_SPECTRUM_ASSIGNMENT*)
+                               ((u8 *)ss_assign + sizeof(struct _ATOM_SPREAD_SPECTRUM_ASSIGNMENT));
                }
        }
        return false;
@@ -1477,6 +1481,12 @@ union asic_ss_info {
        struct _ATOM_ASIC_INTERNAL_SS_INFO_V3 info_3;
 };
 
+union asic_ss_assignment {
+       struct _ATOM_ASIC_SS_ASSIGNMENT v1;
+       struct _ATOM_ASIC_SS_ASSIGNMENT_V2 v2;
+       struct _ATOM_ASIC_SS_ASSIGNMENT_V3 v3;
+};
+
 bool radeon_atombios_get_asic_ss_info(struct radeon_device *rdev,
                                      struct radeon_atom_ss *ss,
                                      int id, u32 clock)
@@ -1485,6 +1495,7 @@ bool radeon_atombios_get_asic_ss_info(struct radeon_device *rdev,
        int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info);
        uint16_t data_offset, size;
        union asic_ss_info *ss_info;
+       union asic_ss_assignment *ss_assign;
        uint8_t frev, crev;
        int i, num_indices;
 
@@ -1509,45 +1520,52 @@ bool radeon_atombios_get_asic_ss_info(struct radeon_device *rdev,
                        num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
                                sizeof(ATOM_ASIC_SS_ASSIGNMENT);
 
+                       ss_assign = (union asic_ss_assignment *)((u8 *)&ss_info->info.asSpreadSpectrum[0]);
                        for (i = 0; i < num_indices; i++) {
-                               if ((ss_info->info.asSpreadSpectrum[i].ucClockIndication == id) &&
-                                   (clock <= le32_to_cpu(ss_info->info.asSpreadSpectrum[i].ulTargetClockRange))) {
+                               if ((ss_assign->v1.ucClockIndication == id) &&
+                                   (clock <= le32_to_cpu(ss_assign->v1.ulTargetClockRange))) {
                                        ss->percentage =
-                                               le16_to_cpu(ss_info->info.asSpreadSpectrum[i].usSpreadSpectrumPercentage);
-                                       ss->type = ss_info->info.asSpreadSpectrum[i].ucSpreadSpectrumMode;
-                                       ss->rate = le16_to_cpu(ss_info->info.asSpreadSpectrum[i].usSpreadRateInKhz);
+                                               le16_to_cpu(ss_assign->v1.usSpreadSpectrumPercentage);
+                                       ss->type = ss_assign->v1.ucSpreadSpectrumMode;
+                                       ss->rate = le16_to_cpu(ss_assign->v1.usSpreadRateInKhz);
                                        return true;
                                }
+                               ss_assign = (union asic_ss_assignment *)
+                                       ((u8 *)ss_assign + sizeof(ATOM_ASIC_SS_ASSIGNMENT));
                        }
                        break;
                case 2:
                        num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
                                sizeof(ATOM_ASIC_SS_ASSIGNMENT_V2);
+                       ss_assign = (union asic_ss_assignment *)((u8 *)&ss_info->info_2.asSpreadSpectrum[0]);
                        for (i = 0; i < num_indices; i++) {
-                               if ((ss_info->info_2.asSpreadSpectrum[i].ucClockIndication == id) &&
-                                   (clock <= le32_to_cpu(ss_info->info_2.asSpreadSpectrum[i].ulTargetClockRange))) {
+                               if ((ss_assign->v2.ucClockIndication == id) &&
+                                   (clock <= le32_to_cpu(ss_assign->v2.ulTargetClockRange))) {
                                        ss->percentage =
-                                               le16_to_cpu(ss_info->info_2.asSpreadSpectrum[i].usSpreadSpectrumPercentage);
-                                       ss->type = ss_info->info_2.asSpreadSpectrum[i].ucSpreadSpectrumMode;
-                                       ss->rate = le16_to_cpu(ss_info->info_2.asSpreadSpectrum[i].usSpreadRateIn10Hz);
+                                               le16_to_cpu(ss_assign->v2.usSpreadSpectrumPercentage);
+                                       ss->type = ss_assign->v2.ucSpreadSpectrumMode;
+                                       ss->rate = le16_to_cpu(ss_assign->v2.usSpreadRateIn10Hz);
                                        if ((crev == 2) &&
                                            ((id == ASIC_INTERNAL_ENGINE_SS) ||
                                             (id == ASIC_INTERNAL_MEMORY_SS)))
                                                ss->rate /= 100;
                                        return true;
                                }
+                               ss_assign = (union asic_ss_assignment *)
+                                       ((u8 *)ss_assign + sizeof(ATOM_ASIC_SS_ASSIGNMENT_V2));
                        }
                        break;
                case 3:
                        num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
                                sizeof(ATOM_ASIC_SS_ASSIGNMENT_V3);
+                       ss_assign = (union asic_ss_assignment *)((u8 *)&ss_info->info_3.asSpreadSpectrum[0]);
                        for (i = 0; i < num_indices; i++) {
-                               if ((ss_info->info_3.asSpreadSpectrum[i].ucClockIndication == id) &&
-                                   (clock <= le32_to_cpu(ss_info->info_3.asSpreadSpectrum[i].ulTargetClockRange))) {
+                               if ((ss_assign->v3.ucClockIndication == id) &&
+                                   (clock <= le32_to_cpu(ss_assign->v3.ulTargetClockRange))) {
                                        ss->percentage =
-                                               le16_to_cpu(ss_info->info_3.asSpreadSpectrum[i].usSpreadSpectrumPercentage);
-                                       ss->type = ss_info->info_3.asSpreadSpectrum[i].ucSpreadSpectrumMode;
-                                       ss->rate = le16_to_cpu(ss_info->info_3.asSpreadSpectrum[i].usSpreadRateIn10Hz);
+                                               le16_to_cpu(ss_assign->v3.usSpreadSpectrumPercentage);
+                                       ss->type = ss_assign->v3.ucSpreadSpectrumMode;
+                                       ss->rate = le16_to_cpu(ss_assign->v3.usSpreadRateIn10Hz);
                                        if ((id == ASIC_INTERNAL_ENGINE_SS) ||
                                            (id == ASIC_INTERNAL_MEMORY_SS))
                                                ss->rate /= 100;
@@ -1555,6 +1573,8 @@ bool radeon_atombios_get_asic_ss_info(struct radeon_device *rdev,
                                                radeon_atombios_get_igp_ss_overrides(rdev, ss, id);
                                        return true;
                                }
+                               ss_assign = (union asic_ss_assignment *)
+                                       ((u8 *)ss_assign + sizeof(ATOM_ASIC_SS_ASSIGNMENT_V3));
                        }
                        break;
                default:
index ac6ece6..66c2228 100644 (file)
@@ -85,8 +85,9 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
                   VRAM, also but everything into VRAM on AGP cards to avoid
                   image corruptions */
                if (p->ring == R600_RING_TYPE_UVD_INDEX &&
-                   (i == 0 || p->rdev->flags & RADEON_IS_AGP)) {
-                       /* TODO: is this still needed for NI+ ? */
+                   p->rdev->family < CHIP_PALM &&
+                   (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) {
+
                        p->relocs[i].lobj.domain =
                                RADEON_GEM_DOMAIN_VRAM;
 
index e29faa7..841d0e0 100644 (file)
@@ -1320,13 +1320,22 @@ int radeon_device_init(struct radeon_device *rdev,
                        return r;
        }
        if ((radeon_testing & 1)) {
-               radeon_test_moves(rdev);
+               if (rdev->accel_working)
+                       radeon_test_moves(rdev);
+               else
+                       DRM_INFO("radeon: acceleration disabled, skipping move tests\n");
        }
        if ((radeon_testing & 2)) {
-               radeon_test_syncing(rdev);
+               if (rdev->accel_working)
+                       radeon_test_syncing(rdev);
+               else
+                       DRM_INFO("radeon: acceleration disabled, skipping sync tests\n");
        }
        if (radeon_benchmarking) {
-               radeon_benchmark(rdev, radeon_benchmarking);
+               if (rdev->accel_working)
+                       radeon_benchmark(rdev, radeon_benchmarking);
+               else
+                       DRM_INFO("radeon: acceleration disabled, skipping benchmarks\n");
        }
        return 0;
 }
index 87e1d69..4f6b7fc 100644 (file)
@@ -945,6 +945,8 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable)
                if (enable) {
                        mutex_lock(&rdev->pm.mutex);
                        rdev->pm.dpm.uvd_active = true;
+                       /* disable this for now */
+#if 0
                        if ((rdev->pm.dpm.sd == 1) && (rdev->pm.dpm.hd == 0))
                                dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_SD;
                        else if ((rdev->pm.dpm.sd == 2) && (rdev->pm.dpm.hd == 0))
@@ -954,6 +956,7 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable)
                        else if ((rdev->pm.dpm.sd == 0) && (rdev->pm.dpm.hd == 2))
                                dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD2;
                        else
+#endif
                                dpm_state = POWER_STATE_TYPE_INTERNAL_UVD;
                        rdev->pm.dpm.state = dpm_state;
                        mutex_unlock(&rdev->pm.mutex);
@@ -1002,7 +1005,7 @@ static void radeon_pm_resume_old(struct radeon_device *rdev)
 {
        /* set up the default clocks if the MC ucode is loaded */
        if ((rdev->family >= CHIP_BARTS) &&
-           (rdev->family <= CHIP_HAINAN) &&
+           (rdev->family <= CHIP_CAYMAN) &&
            rdev->mc_fw) {
                if (rdev->pm.default_vddc)
                        radeon_atom_set_voltage(rdev, rdev->pm.default_vddc,
@@ -1046,7 +1049,7 @@ static void radeon_pm_resume_dpm(struct radeon_device *rdev)
        if (ret) {
                DRM_ERROR("radeon: dpm resume failed\n");
                if ((rdev->family >= CHIP_BARTS) &&
-                   (rdev->family <= CHIP_HAINAN) &&
+                   (rdev->family <= CHIP_CAYMAN) &&
                    rdev->mc_fw) {
                        if (rdev->pm.default_vddc)
                                radeon_atom_set_voltage(rdev, rdev->pm.default_vddc,
@@ -1097,7 +1100,7 @@ static int radeon_pm_init_old(struct radeon_device *rdev)
                radeon_pm_init_profile(rdev);
                /* set up the default clocks if the MC ucode is loaded */
                if ((rdev->family >= CHIP_BARTS) &&
-                   (rdev->family <= CHIP_HAINAN) &&
+                   (rdev->family <= CHIP_CAYMAN) &&
                    rdev->mc_fw) {
                        if (rdev->pm.default_vddc)
                                radeon_atom_set_voltage(rdev, rdev->pm.default_vddc,
@@ -1183,7 +1186,7 @@ static int radeon_pm_init_dpm(struct radeon_device *rdev)
        if (ret) {
                rdev->pm.dpm_enabled = false;
                if ((rdev->family >= CHIP_BARTS) &&
-                   (rdev->family <= CHIP_HAINAN) &&
+                   (rdev->family <= CHIP_CAYMAN) &&
                    rdev->mc_fw) {
                        if (rdev->pm.default_vddc)
                                radeon_atom_set_voltage(rdev, rdev->pm.default_vddc,
index 46a25f0..18254e1 100644 (file)
@@ -839,9 +839,11 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data)
         * packet that is the root issue
         */
        i = (ring->rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask;
-       for (j = 0; j <= (count + 32); j++) {
-               seq_printf(m, "r[%5d]=0x%08x\n", i, ring->ring[i]);
-               i = (i + 1) & ring->ptr_mask;
+       if (ring->ready) {
+               for (j = 0; j <= (count + 32); j++) {
+                       seq_printf(m, "r[%5d]=0x%08x\n", i, ring->ring[i]);
+                       i = (i + 1) & ring->ptr_mask;
+               }
        }
        return 0;
 }
index f4d6bce..12e8099 100644 (file)
@@ -36,8 +36,8 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
        struct radeon_bo *vram_obj = NULL;
        struct radeon_bo **gtt_obj = NULL;
        uint64_t gtt_addr, vram_addr;
-       unsigned i, n, size;
-       int r, ring;
+       unsigned n, size;
+       int i, r, ring;
 
        switch (flag) {
        case RADEON_TEST_COPY_DMA:
index 1a01bbf..4f2e73f 100644 (file)
@@ -476,8 +476,7 @@ static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
                return -EINVAL;
        }
 
-       /* TODO: is this still necessary on NI+ ? */
-       if ((cmd == 0 || cmd == 0x3) &&
+       if (p->rdev->family < CHIP_PALM && (cmd == 0 || cmd == 0x3) &&
            (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) {
                DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
                          start, end);
@@ -799,7 +798,8 @@ void radeon_uvd_note_usage(struct radeon_device *rdev)
                    (rdev->pm.dpm.hd != hd)) {
                        rdev->pm.dpm.sd = sd;
                        rdev->pm.dpm.hd = hd;
-                       streams_changed = true;
+                       /* disable this for now */
+                       /*streams_changed = true;*/
                }
        }
 
index c354c10..d4652af 100644 (file)
@@ -85,6 +85,9 @@ extern void si_dma_vm_set_page(struct radeon_device *rdev,
                               uint32_t incr, uint32_t flags);
 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
                                         bool enable);
+static void si_fini_pg(struct radeon_device *rdev);
+static void si_fini_cg(struct radeon_device *rdev);
+static void si_rlc_stop(struct radeon_device *rdev);
 
 static const u32 verde_rlc_save_restore_register_list[] =
 {
@@ -3608,6 +3611,13 @@ static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
        dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
                 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
 
+       /* disable PG/CG */
+       si_fini_pg(rdev);
+       si_fini_cg(rdev);
+
+       /* stop the rlc */
+       si_rlc_stop(rdev);
+
        /* Disable CP parsing/prefetching */
        WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
 
index cfe5d4d..2332aa1 100644 (file)
@@ -2910,6 +2910,7 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
        bool disable_sclk_switching = false;
        u32 mclk, sclk;
        u16 vddc, vddci;
+       u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc;
        int i;
 
        if ((rdev->pm.dpm.new_active_crtc_count > 1) ||
@@ -2943,6 +2944,29 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
                }
        }
 
+       /* limit clocks to max supported clocks based on voltage dependency tables */
+       btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk,
+                                                       &max_sclk_vddc);
+       btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk,
+                                                       &max_mclk_vddci);
+       btc_get_max_clock_from_voltage_dependency_table(&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk,
+                                                       &max_mclk_vddc);
+
+       for (i = 0; i < ps->performance_level_count; i++) {
+               if (max_sclk_vddc) {
+                       if (ps->performance_levels[i].sclk > max_sclk_vddc)
+                               ps->performance_levels[i].sclk = max_sclk_vddc;
+               }
+               if (max_mclk_vddci) {
+                       if (ps->performance_levels[i].mclk > max_mclk_vddci)
+                               ps->performance_levels[i].mclk = max_mclk_vddci;
+               }
+               if (max_mclk_vddc) {
+                       if (ps->performance_levels[i].mclk > max_mclk_vddc)
+                               ps->performance_levels[i].mclk = max_mclk_vddc;
+               }
+       }
+
        /* XXX validate the min clocks required for display */
 
        if (disable_mclk_switching) {
@@ -5184,7 +5208,7 @@ static int si_set_mc_special_registers(struct radeon_device *rdev,
                                        table->mc_reg_table_entry[k].mc_data[j] |= 0x100;
                        }
                        j++;
-                       if (j > SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE)
+                       if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE)
                                return -EINVAL;
 
                        if (!pi->mem_gddr5) {
@@ -5194,7 +5218,7 @@ static int si_set_mc_special_registers(struct radeon_device *rdev,
                                        table->mc_reg_table_entry[k].mc_data[j] =
                                                (table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16;
                                j++;
-                               if (j > SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE)
+                               if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE)
                                        return -EINVAL;
                        }
                        break;
@@ -5207,7 +5231,7 @@ static int si_set_mc_special_registers(struct radeon_device *rdev,
                                        (temp_reg & 0xffff0000) |
                                        (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff);
                        j++;
-                       if (j > SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE)
+                       if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE)
                                return -EINVAL;
                        break;
                default:
index 52d2ab6..7e2e0ea 100644 (file)
  * 6. COMMAND [30:21] | BYTE_COUNT [20:0]
  */
 #              define PACKET3_CP_DMA_DST_SEL(x)    ((x) << 20)
-                /* 0 - SRC_ADDR
+                /* 0 - DST_ADDR
                 * 1 - GDS
                 */
 #              define PACKET3_CP_DMA_ENGINE(x)     ((x) << 27)
 #              define PACKET3_CP_DMA_CP_SYNC       (1 << 31)
 /* COMMAND */
 #              define PACKET3_CP_DMA_DIS_WC        (1 << 21)
-#              define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
+#              define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22)
                 /* 0 - none
                 * 1 - 8 in 16
                 * 2 - 8 in 32
index 7f998bf..9364129 100644 (file)
@@ -1868,7 +1868,7 @@ int trinity_dpm_init(struct radeon_device *rdev)
        for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++)
                pi->at[i] = TRINITY_AT_DFLT;
 
-       pi->enable_bapm = true;
+       pi->enable_bapm = false;
        pi->enable_nbps_policy = true;
        pi->enable_sclk_ds = true;
        pi->enable_gfx_power_gating = true;
index 7266805..3100fa9 100644 (file)
@@ -212,8 +212,8 @@ int uvd_v1_0_start(struct radeon_device *rdev)
        /* enable VCPU clock */
        WREG32(UVD_VCPU_CNTL,  1 << 9);
 
-       /* enable UMC */
-       WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
+       /* enable UMC and NC0 */
+       WREG32_P(UVD_LMI_CTRL2, 1 << 13, ~((1 << 8) | (1 << 13)));
 
        /* boot up the VCPU */
        WREG32(UVD_SOFT_RESET, 0);
index 71b70e3..c91d547 100644 (file)
@@ -241,6 +241,7 @@ config HID_HOLTEK
          - Sharkoon Drakonia / Perixx MX-2000 gaming mice
          - Tracer Sniper TRM-503 / NOVA Gaming Slider X200 /
            Zalman ZM-GM1
+         - SHARKOON DarkGlider Gaming mouse
 
 config HOLTEK_FF
        bool "Holtek On Line Grip force feedback support"
index b8470b1..5a8c011 100644 (file)
@@ -1715,6 +1715,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A04A) },
        { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A067) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) },
        { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_580) },
        { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) },
index 7e6db3c..e696566 100644 (file)
@@ -27,6 +27,7 @@
  * - USB ID 04d9:a067, sold as Sharkoon Drakonia and Perixx MX-2000
  * - USB ID 04d9:a04a, sold as Tracer Sniper TRM-503, NOVA Gaming Slider X200
  *   and Zalman ZM-GM1
+ * - USB ID 04d9:a081, sold as SHARKOON DarkGlider Gaming mouse
  */
 
 static __u8 *holtek_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc,
@@ -46,6 +47,7 @@ static __u8 *holtek_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                        }
                        break;
                case USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A04A:
+               case USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081:
                        if (*rsize >= 113 && rdesc[106] == 0xff && rdesc[107] == 0x7f
                                        && rdesc[111] == 0xff && rdesc[112] == 0x7f) {
                                hid_info(hdev, "Fixing up report descriptor\n");
@@ -63,6 +65,8 @@ static const struct hid_device_id holtek_mouse_devices[] = {
                        USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A067) },
        { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT,
                        USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A04A) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT,
+                       USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) },
        { }
 };
 MODULE_DEVICE_TABLE(hid, holtek_mouse_devices);
index e60e8d5..9cbc7ab 100644 (file)
 #define USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD      0xa055
 #define USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A067    0xa067
 #define USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A04A    0xa04a
+#define USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081    0xa081
 
 #define USB_VENDOR_ID_IMATION          0x0718
 #define USB_DEVICE_ID_DISC_STAKKA      0xd000
index 602c188..6101816 100644 (file)
@@ -382,7 +382,7 @@ static ssize_t kone_sysfs_write_profilex(struct file *fp,
 }
 #define PROFILE_ATTR(number)                                   \
 static struct bin_attribute bin_attr_profile##number = {       \
-       .attr = { .name = "profile##number", .mode = 0660 },    \
+       .attr = { .name = "profile" #number, .mode = 0660 },    \
        .size = sizeof(struct kone_profile),                    \
        .read = kone_sysfs_read_profilex,                       \
        .write = kone_sysfs_write_profilex,                     \
index 5ddf605..5e99fcd 100644 (file)
@@ -229,13 +229,13 @@ static ssize_t koneplus_sysfs_read_profilex_buttons(struct file *fp,
 
 #define PROFILE_ATTR(number)                                           \
 static struct bin_attribute bin_attr_profile##number##_settings = {    \
-       .attr = { .name = "profile##number##_settings", .mode = 0440 }, \
+       .attr = { .name = "profile" #number "_settings", .mode = 0440 },        \
        .size = KONEPLUS_SIZE_PROFILE_SETTINGS,                         \
        .read = koneplus_sysfs_read_profilex_settings,                  \
        .private = &profile_numbers[number-1],                          \
 };                                                                     \
 static struct bin_attribute bin_attr_profile##number##_buttons = {     \
-       .attr = { .name = "profile##number##_buttons", .mode = 0440 },  \
+       .attr = { .name = "profile" #number "_buttons", .mode = 0440 }, \
        .size = KONEPLUS_SIZE_PROFILE_BUTTONS,                          \
        .read = koneplus_sysfs_read_profilex_buttons,                   \
        .private = &profile_numbers[number-1],                          \
index 515bc03..0c8e1ef 100644 (file)
@@ -257,13 +257,13 @@ static ssize_t kovaplus_sysfs_read_profilex_buttons(struct file *fp,
 
 #define PROFILE_ATTR(number)                                           \
 static struct bin_attribute bin_attr_profile##number##_settings = {    \
-       .attr = { .name = "profile##number##_settings", .mode = 0440 }, \
+       .attr = { .name = "profile" #number "_settings", .mode = 0440 },        \
        .size = KOVAPLUS_SIZE_PROFILE_SETTINGS,                         \
        .read = kovaplus_sysfs_read_profilex_settings,                  \
        .private = &profile_numbers[number-1],                          \
 };                                                                     \
 static struct bin_attribute bin_attr_profile##number##_buttons = {     \
-       .attr = { .name = "profile##number##_buttons", .mode = 0440 },  \
+       .attr = { .name = "profile" #number "_buttons", .mode = 0440 }, \
        .size = KOVAPLUS_SIZE_PROFILE_BUTTONS,                          \
        .read = kovaplus_sysfs_read_profilex_buttons,                   \
        .private = &profile_numbers[number-1],                          \
index 5a6dbbe..1a07e07 100644 (file)
@@ -225,13 +225,13 @@ static ssize_t pyra_sysfs_read_profilex_buttons(struct file *fp,
 
 #define PROFILE_ATTR(number)                                           \
 static struct bin_attribute bin_attr_profile##number##_settings = {    \
-       .attr = { .name = "profile##number##_settings", .mode = 0440 }, \
+       .attr = { .name = "profile" #number "_settings", .mode = 0440 },        \
        .size = PYRA_SIZE_PROFILE_SETTINGS,                             \
        .read = pyra_sysfs_read_profilex_settings,                      \
        .private = &profile_numbers[number-1],                          \
 };                                                                     \
 static struct bin_attribute bin_attr_profile##number##_buttons = {     \
-       .attr = { .name = "profile##number##_buttons", .mode = 0440 },  \
+       .attr = { .name = "profile" #number "_buttons", .mode = 0440 }, \
        .size = PYRA_SIZE_PROFILE_BUTTONS,                              \
        .read = pyra_sysfs_read_profilex_buttons,                       \
        .private = &profile_numbers[number-1],                          \
index 2e7d644..71adf9e 100644 (file)
@@ -119,12 +119,22 @@ static const struct wiimod_ops wiimod_keys = {
  * the rumble motor, this flag shouldn't be set.
  */
 
+/* used by wiimod_rumble and wiipro_rumble */
+static void wiimod_rumble_worker(struct work_struct *work)
+{
+       struct wiimote_data *wdata = container_of(work, struct wiimote_data,
+                                                 rumble_worker);
+
+       spin_lock_irq(&wdata->state.lock);
+       wiiproto_req_rumble(wdata, wdata->state.cache_rumble);
+       spin_unlock_irq(&wdata->state.lock);
+}
+
 static int wiimod_rumble_play(struct input_dev *dev, void *data,
                              struct ff_effect *eff)
 {
        struct wiimote_data *wdata = input_get_drvdata(dev);
        __u8 value;
-       unsigned long flags;
 
        /*
         * The wiimote supports only a single rumble motor so if any magnitude
@@ -137,9 +147,10 @@ static int wiimod_rumble_play(struct input_dev *dev, void *data,
        else
                value = 0;
 
-       spin_lock_irqsave(&wdata->state.lock, flags);
-       wiiproto_req_rumble(wdata, value);
-       spin_unlock_irqrestore(&wdata->state.lock, flags);
+       /* Locking state.lock here might deadlock with input_event() calls.
+        * schedule_work acts as barrier. Merging multiple changes is fine. */
+       wdata->state.cache_rumble = value;
+       schedule_work(&wdata->rumble_worker);
 
        return 0;
 }
@@ -147,6 +158,8 @@ static int wiimod_rumble_play(struct input_dev *dev, void *data,
 static int wiimod_rumble_probe(const struct wiimod_ops *ops,
                               struct wiimote_data *wdata)
 {
+       INIT_WORK(&wdata->rumble_worker, wiimod_rumble_worker);
+
        set_bit(FF_RUMBLE, wdata->input->ffbit);
        if (input_ff_create_memless(wdata->input, NULL, wiimod_rumble_play))
                return -ENOMEM;
@@ -159,6 +172,8 @@ static void wiimod_rumble_remove(const struct wiimod_ops *ops,
 {
        unsigned long flags;
 
+       cancel_work_sync(&wdata->rumble_worker);
+
        spin_lock_irqsave(&wdata->state.lock, flags);
        wiiproto_req_rumble(wdata, 0);
        spin_unlock_irqrestore(&wdata->state.lock, flags);
@@ -1731,7 +1746,6 @@ static int wiimod_pro_play(struct input_dev *dev, void *data,
 {
        struct wiimote_data *wdata = input_get_drvdata(dev);
        __u8 value;
-       unsigned long flags;
 
        /*
         * The wiimote supports only a single rumble motor so if any magnitude
@@ -1744,9 +1758,10 @@ static int wiimod_pro_play(struct input_dev *dev, void *data,
        else
                value = 0;
 
-       spin_lock_irqsave(&wdata->state.lock, flags);
-       wiiproto_req_rumble(wdata, value);
-       spin_unlock_irqrestore(&wdata->state.lock, flags);
+       /* Locking state.lock here might deadlock with input_event() calls.
+        * schedule_work acts as barrier. Merging multiple changes is fine. */
+       wdata->state.cache_rumble = value;
+       schedule_work(&wdata->rumble_worker);
 
        return 0;
 }
@@ -1756,6 +1771,8 @@ static int wiimod_pro_probe(const struct wiimod_ops *ops,
 {
        int ret, i;
 
+       INIT_WORK(&wdata->rumble_worker, wiimod_rumble_worker);
+
        wdata->extension.input = input_allocate_device();
        if (!wdata->extension.input)
                return -ENOMEM;
@@ -1817,12 +1834,13 @@ static void wiimod_pro_remove(const struct wiimod_ops *ops,
        if (!wdata->extension.input)
                return;
 
+       input_unregister_device(wdata->extension.input);
+       wdata->extension.input = NULL;
+       cancel_work_sync(&wdata->rumble_worker);
+
        spin_lock_irqsave(&wdata->state.lock, flags);
        wiiproto_req_rumble(wdata, 0);
        spin_unlock_irqrestore(&wdata->state.lock, flags);
-
-       input_unregister_device(wdata->extension.input);
-       wdata->extension.input = NULL;
 }
 
 static const struct wiimod_ops wiimod_pro = {
index f1474f3..75db0c4 100644 (file)
@@ -133,13 +133,15 @@ struct wiimote_state {
        __u8 *cmd_read_buf;
        __u8 cmd_read_size;
 
-       /* calibration data */
+       /* calibration/cache data */
        __u16 calib_bboard[4][3];
+       __u8 cache_rumble;
 };
 
 struct wiimote_data {
        struct hid_device *hdev;
        struct input_dev *input;
+       struct work_struct rumble_worker;
        struct led_classdev *leds[4];
        struct input_dev *accel;
        struct input_dev *ir;
index 8918dd1..6a6dd5c 100644 (file)
@@ -308,18 +308,25 @@ static int hidraw_fasync(int fd, struct file *file, int on)
 static void drop_ref(struct hidraw *hidraw, int exists_bit)
 {
        if (exists_bit) {
-               hid_hw_close(hidraw->hid);
                hidraw->exist = 0;
-               if (hidraw->open)
+               if (hidraw->open) {
+                       hid_hw_close(hidraw->hid);
                        wake_up_interruptible(&hidraw->wait);
+               }
        } else {
                --hidraw->open;
        }
-
-       if (!hidraw->open && !hidraw->exist) {
-               device_destroy(hidraw_class, MKDEV(hidraw_major, hidraw->minor));
-               hidraw_table[hidraw->minor] = NULL;
-               kfree(hidraw);
+       if (!hidraw->open) {
+               if (!hidraw->exist) {
+                       device_destroy(hidraw_class,
+                                       MKDEV(hidraw_major, hidraw->minor));
+                       hidraw_table[hidraw->minor] = NULL;
+                       kfree(hidraw);
+               } else {
+                       /* close device for last reader */
+                       hid_hw_power(hidraw->hid, PM_HINT_NORMAL);
+                       hid_hw_close(hidraw->hid);
+               }
        }
 }
 
index 5bf2fb7..93b00d7 100644 (file)
@@ -615,7 +615,7 @@ static const struct file_operations uhid_fops = {
 
 static struct miscdevice uhid_misc = {
        .fops           = &uhid_fops,
-       .minor          = MISC_DYNAMIC_MINOR,
+       .minor          = UHID_MINOR,
        .name           = UHID_NAME,
 };
 
@@ -634,4 +634,5 @@ module_exit(uhid_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("David Herrmann <dh.herrmann@gmail.com>");
 MODULE_DESCRIPTION("User-space I/O driver support for HID subsystem");
+MODULE_ALIAS_MISCDEV(UHID_MINOR);
 MODULE_ALIAS("devname:" UHID_NAME);
index 8f4743a..936093e 100644 (file)
@@ -195,7 +195,7 @@ int vmbus_connect(void)
 
        do {
                ret = vmbus_negotiate_version(msginfo, version);
-               if (ret)
+               if (ret == -ETIMEDOUT)
                        goto cleanup;
 
                if (vmbus_connection.conn_state == CONNECTED)
index 28b0332..09988b2 100644 (file)
 /*
  * Pre win8 version numbers used in ws2008 and ws 2008 r2 (win7)
  */
+#define WS2008_SRV_MAJOR       1
+#define WS2008_SRV_MINOR       0
+#define WS2008_SRV_VERSION     (WS2008_SRV_MAJOR << 16 | WS2008_SRV_MINOR)
+
 #define WIN7_SRV_MAJOR   3
 #define WIN7_SRV_MINOR   0
-#define WIN7_SRV_MAJOR_MINOR     (WIN7_SRV_MAJOR << 16 | WIN7_SRV_MINOR)
+#define WIN7_SRV_VERSION     (WIN7_SRV_MAJOR << 16 | WIN7_SRV_MINOR)
 
 #define WIN8_SRV_MAJOR   4
 #define WIN8_SRV_MINOR   0
-#define WIN8_SRV_MAJOR_MINOR     (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR)
+#define WIN8_SRV_VERSION     (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR)
 
 /*
  * Global state maintained for transaction that is being processed.
@@ -587,6 +591,8 @@ void hv_kvp_onchannelcallback(void *context)
 
        struct icmsg_hdr *icmsghdrp;
        struct icmsg_negotiate *negop = NULL;
+       int util_fw_version;
+       int kvp_srv_version;
 
        if (kvp_transaction.active) {
                /*
@@ -606,17 +612,26 @@ void hv_kvp_onchannelcallback(void *context)
 
                if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
                        /*
-                        * We start with win8 version and if the host cannot
-                        * support that we use the previous version.
+                        * Based on the host, select appropriate
+                        * framework and service versions we will
+                        * negotiate.
                         */
-                       if (vmbus_prep_negotiate_resp(icmsghdrp, negop,
-                                recv_buffer, UTIL_FW_MAJOR_MINOR,
-                                WIN8_SRV_MAJOR_MINOR))
-                               goto done;
-
+                       switch (vmbus_proto_version) {
+                       case (VERSION_WS2008):
+                               util_fw_version = UTIL_WS2K8_FW_VERSION;
+                               kvp_srv_version = WS2008_SRV_VERSION;
+                               break;
+                       case (VERSION_WIN7):
+                               util_fw_version = UTIL_FW_VERSION;
+                               kvp_srv_version = WIN7_SRV_VERSION;
+                               break;
+                       default:
+                               util_fw_version = UTIL_FW_VERSION;
+                               kvp_srv_version = WIN8_SRV_VERSION;
+                       }
                        vmbus_prep_negotiate_resp(icmsghdrp, negop,
-                                recv_buffer, UTIL_FW_MAJOR_MINOR,
-                                WIN7_SRV_MAJOR_MINOR);
+                                recv_buffer, util_fw_version,
+                                kvp_srv_version);
 
                } else {
                        kvp_msg = (struct hv_kvp_msg *)&recv_buffer[
@@ -649,7 +664,6 @@ void hv_kvp_onchannelcallback(void *context)
                        return;
 
                }
-done:
 
                icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
                        | ICMSGHDRFLAG_RESPONSE;
index e4572f3..0c35462 100644 (file)
@@ -26,7 +26,7 @@
 
 #define VSS_MAJOR  5
 #define VSS_MINOR  0
-#define VSS_MAJOR_MINOR    (VSS_MAJOR << 16 | VSS_MINOR)
+#define VSS_VERSION    (VSS_MAJOR << 16 | VSS_MINOR)
 
 
 
@@ -190,8 +190,8 @@ void hv_vss_onchannelcallback(void *context)
 
                if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
                        vmbus_prep_negotiate_resp(icmsghdrp, negop,
-                                recv_buffer, UTIL_FW_MAJOR_MINOR,
-                                VSS_MAJOR_MINOR);
+                                recv_buffer, UTIL_FW_VERSION,
+                                VSS_VERSION);
                } else {
                        vss_msg = (struct hv_vss_msg *)&recv_buffer[
                                sizeof(struct vmbuspipe_hdr) +
index cb82233..273e3dd 100644 (file)
 #include <linux/reboot.h>
 #include <linux/hyperv.h>
 
-#define SHUTDOWN_MAJOR 3
-#define SHUTDOWN_MINOR  0
-#define SHUTDOWN_MAJOR_MINOR   (SHUTDOWN_MAJOR << 16 | SHUTDOWN_MINOR)
 
-#define TIMESYNCH_MAJOR        3
-#define TIMESYNCH_MINOR 0
-#define TIMESYNCH_MAJOR_MINOR  (TIMESYNCH_MAJOR << 16 | TIMESYNCH_MINOR)
+#define SD_MAJOR       3
+#define SD_MINOR       0
+#define SD_VERSION     (SD_MAJOR << 16 | SD_MINOR)
 
-#define HEARTBEAT_MAJOR        3
-#define HEARTBEAT_MINOR 0
-#define HEARTBEAT_MAJOR_MINOR  (HEARTBEAT_MAJOR << 16 | HEARTBEAT_MINOR)
+#define SD_WS2008_MAJOR                1
+#define SD_WS2008_VERSION      (SD_WS2008_MAJOR << 16 | SD_MINOR)
+
+#define TS_MAJOR       3
+#define TS_MINOR       0
+#define TS_VERSION     (TS_MAJOR << 16 | TS_MINOR)
+
+#define TS_WS2008_MAJOR                1
+#define TS_WS2008_VERSION      (TS_WS2008_MAJOR << 16 | TS_MINOR)
+
+#define HB_MAJOR       3
+#define HB_MINOR 0
+#define HB_VERSION     (HB_MAJOR << 16 | HB_MINOR)
+
+#define HB_WS2008_MAJOR        1
+#define HB_WS2008_VERSION      (HB_WS2008_MAJOR << 16 | HB_MINOR)
+
+static int sd_srv_version;
+static int ts_srv_version;
+static int hb_srv_version;
+static int util_fw_version;
 
 static void shutdown_onchannelcallback(void *context);
 static struct hv_util_service util_shutdown = {
@@ -99,8 +114,8 @@ static void shutdown_onchannelcallback(void *context)
 
                if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
                        vmbus_prep_negotiate_resp(icmsghdrp, negop,
-                                       shut_txf_buf, UTIL_FW_MAJOR_MINOR,
-                                       SHUTDOWN_MAJOR_MINOR);
+                                       shut_txf_buf, util_fw_version,
+                                       sd_srv_version);
                } else {
                        shutdown_msg =
                                (struct shutdown_msg_data *)&shut_txf_buf[
@@ -216,6 +231,7 @@ static void timesync_onchannelcallback(void *context)
        struct icmsg_hdr *icmsghdrp;
        struct ictimesync_data *timedatap;
        u8 *time_txf_buf = util_timesynch.recv_buffer;
+       struct icmsg_negotiate *negop = NULL;
 
        vmbus_recvpacket(channel, time_txf_buf,
                         PAGE_SIZE, &recvlen, &requestid);
@@ -225,9 +241,10 @@ static void timesync_onchannelcallback(void *context)
                                sizeof(struct vmbuspipe_hdr)];
 
                if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
-                       vmbus_prep_negotiate_resp(icmsghdrp, NULL, time_txf_buf,
-                                               UTIL_FW_MAJOR_MINOR,
-                                               TIMESYNCH_MAJOR_MINOR);
+                       vmbus_prep_negotiate_resp(icmsghdrp, negop,
+                                               time_txf_buf,
+                                               util_fw_version,
+                                               ts_srv_version);
                } else {
                        timedatap = (struct ictimesync_data *)&time_txf_buf[
                                sizeof(struct vmbuspipe_hdr) +
@@ -257,6 +274,7 @@ static void heartbeat_onchannelcallback(void *context)
        struct icmsg_hdr *icmsghdrp;
        struct heartbeat_msg_data *heartbeat_msg;
        u8 *hbeat_txf_buf = util_heartbeat.recv_buffer;
+       struct icmsg_negotiate *negop = NULL;
 
        vmbus_recvpacket(channel, hbeat_txf_buf,
                         PAGE_SIZE, &recvlen, &requestid);
@@ -266,9 +284,9 @@ static void heartbeat_onchannelcallback(void *context)
                                sizeof(struct vmbuspipe_hdr)];
 
                if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
-                       vmbus_prep_negotiate_resp(icmsghdrp, NULL,
-                               hbeat_txf_buf, UTIL_FW_MAJOR_MINOR,
-                               HEARTBEAT_MAJOR_MINOR);
+                       vmbus_prep_negotiate_resp(icmsghdrp, negop,
+                               hbeat_txf_buf, util_fw_version,
+                               hb_srv_version);
                } else {
                        heartbeat_msg =
                                (struct heartbeat_msg_data *)&hbeat_txf_buf[
@@ -321,6 +339,25 @@ static int util_probe(struct hv_device *dev,
                goto error;
 
        hv_set_drvdata(dev, srv);
+       /*
+        * Based on the host; initialize the framework and
+        * service version numbers we will negotiate.
+        */
+       switch (vmbus_proto_version) {
+       case (VERSION_WS2008):
+               util_fw_version = UTIL_WS2K8_FW_VERSION;
+               sd_srv_version = SD_WS2008_VERSION;
+               ts_srv_version = TS_WS2008_VERSION;
+               hb_srv_version = HB_WS2008_VERSION;
+               break;
+
+       default:
+               util_fw_version = UTIL_FW_VERSION;
+               sd_srv_version = SD_VERSION;
+               ts_srv_version = TS_VERSION;
+               hb_srv_version = HB_VERSION;
+       }
+
        return 0;
 
 error:
index 62c2e32..3288f13 100644 (file)
@@ -230,6 +230,7 @@ static int send_argument(const char *key)
 
 static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len)
 {
+       u8 status, data = 0;
        int i;
 
        if (send_command(cmd) || send_argument(key)) {
@@ -237,6 +238,7 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len)
                return -EIO;
        }
 
+       /* This has no effect on newer (2012) SMCs */
        if (send_byte(len, APPLESMC_DATA_PORT)) {
                pr_warn("%.4s: read len fail\n", key);
                return -EIO;
@@ -250,6 +252,17 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len)
                buffer[i] = inb(APPLESMC_DATA_PORT);
        }
 
+       /* Read the data port until bit0 is cleared */
+       for (i = 0; i < 16; i++) {
+               udelay(APPLESMC_MIN_WAIT);
+               status = inb(APPLESMC_CMD_PORT);
+               if (!(status & 0x01))
+                       break;
+               data = inb(APPLESMC_DATA_PORT);
+       }
+       if (i)
+               pr_warn("flushed %d bytes, last value is: %d\n", i, data);
+
        return 0;
 }
 
@@ -525,16 +538,25 @@ static int applesmc_init_smcreg_try(void)
 {
        struct applesmc_registers *s = &smcreg;
        bool left_light_sensor, right_light_sensor;
+       unsigned int count;
        u8 tmp[1];
        int ret;
 
        if (s->init_complete)
                return 0;
 
-       ret = read_register_count(&s->key_count);
+       ret = read_register_count(&count);
        if (ret)
                return ret;
 
+       if (s->cache && s->key_count != count) {
+               pr_warn("key count changed from %d to %d\n",
+                       s->key_count, count);
+               kfree(s->cache);
+               s->cache = NULL;
+       }
+       s->key_count = count;
+
        if (!s->cache)
                s->cache = kcalloc(s->key_count, sizeof(*s->cache), GFP_KERNEL);
        if (!s->cache)
index dbecf08..5888fee 100644 (file)
@@ -98,6 +98,8 @@
 
 #define DW_IC_ERR_TX_ABRT      0x1
 
+#define DW_IC_TAR_10BITADDR_MASTER BIT(12)
+
 /*
  * status codes
  */
@@ -388,22 +390,34 @@ static int i2c_dw_wait_bus_not_busy(struct dw_i2c_dev *dev)
 static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
 {
        struct i2c_msg *msgs = dev->msgs;
-       u32 ic_con;
+       u32 ic_con, ic_tar = 0;
 
        /* Disable the adapter */
        __i2c_dw_enable(dev, false);
 
-       /* set the slave (target) address */
-       dw_writel(dev, msgs[dev->msg_write_idx].addr, DW_IC_TAR);
-
        /* if the slave address is ten bit address, enable 10BITADDR */
        ic_con = dw_readl(dev, DW_IC_CON);
-       if (msgs[dev->msg_write_idx].flags & I2C_M_TEN)
+       if (msgs[dev->msg_write_idx].flags & I2C_M_TEN) {
                ic_con |= DW_IC_CON_10BITADDR_MASTER;
-       else
+               /*
+                * If I2C_DYNAMIC_TAR_UPDATE is set, the 10-bit addressing
+                * mode has to be enabled via bit 12 of IC_TAR register.
+                * We set it always as I2C_DYNAMIC_TAR_UPDATE can't be
+                * detected from registers.
+                */
+               ic_tar = DW_IC_TAR_10BITADDR_MASTER;
+       } else {
                ic_con &= ~DW_IC_CON_10BITADDR_MASTER;
+       }
+
        dw_writel(dev, ic_con, DW_IC_CON);
 
+       /*
+        * Set the slave (target) address and enable 10-bit addressing mode
+        * if applicable.
+        */
+       dw_writel(dev, msgs[dev->msg_write_idx].addr | ic_tar, DW_IC_TAR);
+
        /* Enable the adapter */
        __i2c_dw_enable(dev, true);
 
index 4c1b605..0aa0113 100644 (file)
@@ -270,7 +270,8 @@ static SIMPLE_DEV_PM_OPS(dw_i2c_dev_pm_ops, dw_i2c_suspend, dw_i2c_resume);
 MODULE_ALIAS("platform:i2c_designware");
 
 static struct platform_driver dw_i2c_driver = {
-       .remove         = dw_i2c_remove,
+       .probe = dw_i2c_probe,
+       .remove = dw_i2c_remove,
        .driver         = {
                .name   = "i2c_designware",
                .owner  = THIS_MODULE,
@@ -282,7 +283,7 @@ static struct platform_driver dw_i2c_driver = {
 
 static int __init dw_i2c_init_driver(void)
 {
-       return platform_driver_probe(&dw_i2c_driver, dw_i2c_probe);
+       return platform_driver_register(&dw_i2c_driver);
 }
 subsys_initcall(dw_i2c_init_driver);
 
index ccf4665..1d7efa3 100644 (file)
@@ -365,7 +365,7 @@ static void i2c_imx_stop(struct imx_i2c_struct *i2c_imx)
        clk_disable_unprepare(i2c_imx->clk);
 }
 
-static void __init i2c_imx_set_clk(struct imx_i2c_struct *i2c_imx,
+static void i2c_imx_set_clk(struct imx_i2c_struct *i2c_imx,
                                                        unsigned int rate)
 {
        struct imx_i2c_clk_pair *i2c_clk_div = i2c_imx->hwdata->clk_div;
@@ -589,7 +589,7 @@ static struct i2c_algorithm i2c_imx_algo = {
        .functionality  = i2c_imx_func,
 };
 
-static int __init i2c_imx_probe(struct platform_device *pdev)
+static int i2c_imx_probe(struct platform_device *pdev)
 {
        const struct of_device_id *of_id = of_match_device(i2c_imx_dt_ids,
                                                           &pdev->dev);
@@ -697,7 +697,7 @@ static int __init i2c_imx_probe(struct platform_device *pdev)
        return 0;   /* Return OK */
 }
 
-static int __exit i2c_imx_remove(struct platform_device *pdev)
+static int i2c_imx_remove(struct platform_device *pdev)
 {
        struct imx_i2c_struct *i2c_imx = platform_get_drvdata(pdev);
 
@@ -715,7 +715,8 @@ static int __exit i2c_imx_remove(struct platform_device *pdev)
 }
 
 static struct platform_driver i2c_imx_driver = {
-       .remove         = __exit_p(i2c_imx_remove),
+       .probe = i2c_imx_probe,
+       .remove = i2c_imx_remove,
        .driver = {
                .name   = DRIVER_NAME,
                .owner  = THIS_MODULE,
@@ -726,7 +727,7 @@ static struct platform_driver i2c_imx_driver = {
 
 static int __init i2c_adap_imx_init(void)
 {
-       return platform_driver_probe(&i2c_imx_driver, i2c_imx_probe);
+       return platform_driver_register(&i2c_imx_driver);
 }
 subsys_initcall(i2c_adap_imx_init);
 
index 8ed79a0..1672eff 100644 (file)
@@ -393,6 +393,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
 
        desc = &priv->hw[priv->head];
 
+       /* Initialize the DMA buffer */
+       memset(priv->dma_buffer, 0, sizeof(priv->dma_buffer));
+
        /* Initialize the descriptor */
        memset(desc, 0, sizeof(struct ismt_desc));
        desc->tgtaddr_rw = ISMT_DESC_ADDR_RW(addr, read_write);
index 7f3a474..d3e9cc3 100644 (file)
@@ -234,9 +234,9 @@ static int mv64xxx_i2c_offload_msg(struct mv64xxx_i2c_data *drv_data)
                ctrl_reg |= MV64XXX_I2C_BRIDGE_CONTROL_WR |
                    (msg->len - 1) << MV64XXX_I2C_BRIDGE_CONTROL_TX_SIZE_SHIFT;
 
-               writel_relaxed(data_reg_lo,
+               writel(data_reg_lo,
                        drv_data->reg_base + MV64XXX_I2C_REG_TX_DATA_LO);
-               writel_relaxed(data_reg_hi,
+               writel(data_reg_hi,
                        drv_data->reg_base + MV64XXX_I2C_REG_TX_DATA_HI);
 
        } else {
@@ -697,6 +697,7 @@ static const struct of_device_id mv64xxx_i2c_of_match_table[] = {
 MODULE_DEVICE_TABLE(of, mv64xxx_i2c_of_match_table);
 
 #ifdef CONFIG_OF
+#ifdef CONFIG_HAVE_CLK
 static int
 mv64xxx_calc_freq(const int tclk, const int n, const int m)
 {
@@ -726,16 +727,12 @@ mv64xxx_find_baud_factors(const u32 req_freq, const u32 tclk, u32 *best_n,
                return false;
        return true;
 }
+#endif /* CONFIG_HAVE_CLK */
 
 static int
 mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data,
                  struct device *dev)
 {
-       const struct of_device_id *device;
-       struct device_node *np = dev->of_node;
-       u32 bus_freq, tclk;
-       int rc = 0;
-
        /* CLK is mandatory when using DT to describe the i2c bus. We
         * need to know tclk in order to calculate bus clock
         * factors.
@@ -744,6 +741,11 @@ mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data,
        /* Have OF but no CLK */
        return -ENODEV;
 #else
+       const struct of_device_id *device;
+       struct device_node *np = dev->of_node;
+       u32 bus_freq, tclk;
+       int rc = 0;
+
        if (IS_ERR(drv_data->clk)) {
                rc = -ENODEV;
                goto out;
index f4a0167..b7c8577 100644 (file)
@@ -780,12 +780,13 @@ static struct platform_driver mxs_i2c_driver = {
                   .owner = THIS_MODULE,
                   .of_match_table = mxs_i2c_dt_ids,
                   },
+       .probe = mxs_i2c_probe,
        .remove = mxs_i2c_remove,
 };
 
 static int __init mxs_i2c_init(void)
 {
-       return platform_driver_probe(&mxs_i2c_driver, mxs_i2c_probe);
+       return platform_driver_register(&mxs_i2c_driver);
 }
 subsys_initcall(mxs_i2c_init);
 
index 6d8308d..9967a6f 100644 (file)
@@ -939,6 +939,9 @@ omap_i2c_isr_thread(int this_irq, void *dev_id)
                /*
                 * ProDB0017052: Clear ARDY bit twice
                 */
+               if (stat & OMAP_I2C_STAT_ARDY)
+                       omap_i2c_ack_stat(dev, OMAP_I2C_STAT_ARDY);
+
                if (stat & (OMAP_I2C_STAT_ARDY | OMAP_I2C_STAT_NACK |
                                        OMAP_I2C_STAT_AL)) {
                        omap_i2c_ack_stat(dev, (OMAP_I2C_STAT_RRDY |
index 3535f3c..3747b9b 100644 (file)
@@ -1178,8 +1178,6 @@ static int s3c24xx_i2c_remove(struct platform_device *pdev)
 
        i2c_del_adapter(&i2c->adap);
 
-       clk_disable_unprepare(i2c->clk);
-
        if (pdev->dev.of_node && IS_ERR(i2c->pctrl))
                s3c24xx_i2c_dt_gpio_free(i2c);
 
index f8f6f2e..04a17b9 100644 (file)
@@ -859,8 +859,7 @@ static const struct i2c_algorithm stu300_algo = {
        .functionality  = stu300_func,
 };
 
-static int __init
-stu300_probe(struct platform_device *pdev)
+static int stu300_probe(struct platform_device *pdev)
 {
        struct stu300_dev *dev;
        struct i2c_adapter *adap;
@@ -966,8 +965,7 @@ static SIMPLE_DEV_PM_OPS(stu300_pm, stu300_suspend, stu300_resume);
 #define STU300_I2C_PM  NULL
 #endif
 
-static int __exit
-stu300_remove(struct platform_device *pdev)
+static int stu300_remove(struct platform_device *pdev)
 {
        struct stu300_dev *dev = platform_get_drvdata(pdev);
 
@@ -989,13 +987,14 @@ static struct platform_driver stu300_i2c_driver = {
                .pm     = STU300_I2C_PM,
                .of_match_table = stu300_dt_match,
        },
-       .remove         = __exit_p(stu300_remove),
+       .probe = stu300_probe,
+       .remove = stu300_remove,
 
 };
 
 static int __init stu300_init(void)
 {
-       return platform_driver_probe(&stu300_i2c_driver, stu300_probe);
+       return platform_driver_register(&stu300_i2c_driver);
 }
 
 static void __exit stu300_exit(void)
index 29d3f04..3be58f8 100644 (file)
@@ -1134,6 +1134,9 @@ static void acpi_i2c_register_devices(struct i2c_adapter *adap)
        acpi_handle handle;
        acpi_status status;
 
+       if (!adap->dev.parent)
+               return;
+
        handle = ACPI_HANDLE(adap->dev.parent);
        if (!handle)
                return;
index 74b41ae..928656e 100644 (file)
@@ -200,7 +200,7 @@ static int i2c_arbitrator_probe(struct platform_device *pdev)
        arb->parent = of_find_i2c_adapter_by_node(parent_np);
        if (!arb->parent) {
                dev_err(dev, "Cannot find parent bus\n");
-               return -EINVAL;
+               return -EPROBE_DEFER;
        }
 
        /* Actually add the mux adapter */
index 5d4a99b..a764da7 100644 (file)
@@ -66,7 +66,7 @@ static int i2c_mux_gpio_probe_dt(struct gpiomux *mux,
        struct device_node *adapter_np, *child;
        struct i2c_adapter *adapter;
        unsigned *values, *gpios;
-       int i = 0;
+       int i = 0, ret;
 
        if (!np)
                return -ENODEV;
@@ -79,7 +79,7 @@ static int i2c_mux_gpio_probe_dt(struct gpiomux *mux,
        adapter = of_find_i2c_adapter_by_node(adapter_np);
        if (!adapter) {
                dev_err(&pdev->dev, "Cannot find parent bus\n");
-               return -ENODEV;
+               return -EPROBE_DEFER;
        }
        mux->data.parent = i2c_adapter_id(adapter);
        put_device(&adapter->dev);
@@ -116,8 +116,12 @@ static int i2c_mux_gpio_probe_dt(struct gpiomux *mux,
                return -ENOMEM;
        }
 
-       for (i = 0; i < mux->data.n_gpios; i++)
-               gpios[i] = of_get_named_gpio(np, "mux-gpios", i);
+       for (i = 0; i < mux->data.n_gpios; i++) {
+               ret = of_get_named_gpio(np, "mux-gpios", i);
+               if (ret < 0)
+                       return ret;
+               gpios[i] = ret;
+       }
 
        mux->data.gpios = gpios;
 
@@ -177,7 +181,7 @@ static int i2c_mux_gpio_probe(struct platform_device *pdev)
        if (!parent) {
                dev_err(&pdev->dev, "Parent adapter (%d) not found\n",
                        mux->data.parent);
-               return -ENODEV;
+               return -EPROBE_DEFER;
        }
 
        mux->parent = parent;
index 69a9173..68a3715 100644 (file)
@@ -113,7 +113,7 @@ static int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux,
        adapter = of_find_i2c_adapter_by_node(adapter_np);
        if (!adapter) {
                dev_err(mux->dev, "Cannot find parent bus\n");
-               return -ENODEV;
+               return -EPROBE_DEFER;
        }
        mux->pdata->parent_bus_num = i2c_adapter_id(adapter);
        put_device(&adapter->dev);
@@ -211,7 +211,7 @@ static int i2c_mux_pinctrl_probe(struct platform_device *pdev)
        if (!mux->parent) {
                dev_err(&pdev->dev, "Parent adapter (%d) not found\n",
                        mux->pdata->parent_bus_num);
-               ret = -ENODEV;
+               ret = -EPROBE_DEFER;
                goto err;
        }
 
index d0a79a4..ba6f6a9 100644 (file)
@@ -185,10 +185,8 @@ static int ad8366_remove(struct spi_device *spi)
 
        iio_device_unregister(indio_dev);
 
-       if (!IS_ERR(reg)) {
+       if (!IS_ERR(reg))
                regulator_disable(reg);
-               regulator_put(reg);
-       }
 
        return 0;
 }
index 8e84cd5..f95c697 100644 (file)
@@ -852,7 +852,6 @@ static void iio_dev_release(struct device *device)
                iio_device_unregister_trigger_consumer(indio_dev);
        iio_device_unregister_eventset(indio_dev);
        iio_device_unregister_sysfs(indio_dev);
-       iio_device_unregister_debugfs(indio_dev);
 
        ida_simple_remove(&iio_ida, indio_dev->id);
        kfree(indio_dev);
@@ -1087,6 +1086,7 @@ void iio_device_unregister(struct iio_dev *indio_dev)
 
        if (indio_dev->chrdev.dev)
                cdev_del(&indio_dev->chrdev);
+       iio_device_unregister_debugfs(indio_dev);
 
        iio_disable_all_buffers(indio_dev);
 
index e8d2849..cab3bc7 100644 (file)
@@ -29,9 +29,9 @@
 #define ST_MAGN_NUMBER_DATA_CHANNELS           3
 
 /* DEFAULT VALUE FOR SENSORS */
-#define ST_MAGN_DEFAULT_OUT_X_L_ADDR           0X04
-#define ST_MAGN_DEFAULT_OUT_Y_L_ADDR           0X08
-#define ST_MAGN_DEFAULT_OUT_Z_L_ADDR           0X06
+#define ST_MAGN_DEFAULT_OUT_X_H_ADDR           0X03
+#define ST_MAGN_DEFAULT_OUT_Y_H_ADDR           0X07
+#define ST_MAGN_DEFAULT_OUT_Z_H_ADDR           0X05
 
 /* FULLSCALE */
 #define ST_MAGN_FS_AVL_1300MG                  1300
 static const struct iio_chan_spec st_magn_16bit_channels[] = {
        ST_SENSORS_LSM_CHANNELS(IIO_MAGN,
                        BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE),
-                       ST_SENSORS_SCAN_X, 1, IIO_MOD_X, 's', IIO_LE, 16, 16,
-                       ST_MAGN_DEFAULT_OUT_X_L_ADDR),
+                       ST_SENSORS_SCAN_X, 1, IIO_MOD_X, 's', IIO_BE, 16, 16,
+                       ST_MAGN_DEFAULT_OUT_X_H_ADDR),
        ST_SENSORS_LSM_CHANNELS(IIO_MAGN,
                        BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE),
-                       ST_SENSORS_SCAN_Y, 1, IIO_MOD_Y, 's', IIO_LE, 16, 16,
-                       ST_MAGN_DEFAULT_OUT_Y_L_ADDR),
+                       ST_SENSORS_SCAN_Y, 1, IIO_MOD_Y, 's', IIO_BE, 16, 16,
+                       ST_MAGN_DEFAULT_OUT_Y_H_ADDR),
        ST_SENSORS_LSM_CHANNELS(IIO_MAGN,
                        BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE),
-                       ST_SENSORS_SCAN_Z, 1, IIO_MOD_Z, 's', IIO_LE, 16, 16,
-                       ST_MAGN_DEFAULT_OUT_Z_L_ADDR),
+                       ST_SENSORS_SCAN_Z, 1, IIO_MOD_Z, 's', IIO_BE, 16, 16,
+                       ST_MAGN_DEFAULT_OUT_Z_H_ADDR),
        IIO_CHAN_SOFT_TIMESTAMP(3)
 };
 
index d5d1929..cedda25 100644 (file)
@@ -141,7 +141,7 @@ static const char *to_qp_state_str(int state)
                return "C2_QP_STATE_ERROR";
        default:
                return "<invalid QP state>";
-       };
+       }
 }
 
 void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
index 3f831de..b1a6cb3 100644 (file)
@@ -164,6 +164,7 @@ int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn)
 static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
 {
        struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
+       char name[MLX5_MAX_EQ_NAME];
        struct mlx5_eq *eq, *n;
        int ncomp_vec;
        int nent;
@@ -180,11 +181,10 @@ static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
                        goto clean;
                }
 
-               snprintf(eq->name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
+               snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
                err = mlx5_create_map_eq(&dev->mdev, eq,
                                         i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
-                                        eq->name,
-                                        &dev->mdev.priv.uuari.uars[0]);
+                                        name, &dev->mdev.priv.uuari.uars[0]);
                if (err) {
                        kfree(eq);
                        goto clean;
@@ -301,9 +301,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        props->max_srq_sge         = max_rq_sg - 1;
        props->max_fast_reg_page_list_len = (unsigned int)-1;
        props->local_ca_ack_delay  = dev->mdev.caps.local_ca_ack_delay;
-       props->atomic_cap          = dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_ATOMIC ?
-               IB_ATOMIC_HCA : IB_ATOMIC_NONE;
-       props->masked_atomic_cap   = IB_ATOMIC_HCA;
+       props->atomic_cap          = IB_ATOMIC_NONE;
+       props->masked_atomic_cap   = IB_ATOMIC_NONE;
        props->max_pkeys           = be16_to_cpup((__be16 *)(out_mad->data + 28));
        props->max_mcast_grp       = 1 << dev->mdev.caps.log_max_mcg;
        props->max_mcast_qp_attach = dev->mdev.caps.max_qp_mcg;
@@ -1006,6 +1005,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
        ibev.device           = &ibdev->ib_dev;
        ibev.element.port_num = port;
 
+       if (port < 1 || port > ibdev->num_ports) {
+               mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
+               return;
+       }
+
        if (ibdev->ib_active)
                ib_dispatch_event(&ibev);
 }
index bd41df9..3453580 100644 (file)
@@ -42,6 +42,10 @@ enum {
        DEF_CACHE_SIZE  = 10,
 };
 
+enum {
+       MLX5_UMR_ALIGN  = 2048
+};
+
 static __be64 *mr_align(__be64 *ptr, int align)
 {
        unsigned long mask = align - 1;
@@ -61,13 +65,11 @@ static int order2idx(struct mlx5_ib_dev *dev, int order)
 
 static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 {
-       struct device *ddev = dev->ib_dev.dma_device;
        struct mlx5_mr_cache *cache = &dev->cache;
        struct mlx5_cache_ent *ent = &cache->ent[c];
        struct mlx5_create_mkey_mbox_in *in;
        struct mlx5_ib_mr *mr;
        int npages = 1 << ent->order;
-       int size = sizeof(u64) * npages;
        int err = 0;
        int i;
 
@@ -83,21 +85,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
                }
                mr->order = ent->order;
                mr->umred = 1;
-               mr->pas = kmalloc(size + 0x3f, GFP_KERNEL);
-               if (!mr->pas) {
-                       kfree(mr);
-                       err = -ENOMEM;
-                       goto out;
-               }
-               mr->dma = dma_map_single(ddev, mr_align(mr->pas, 0x40), size,
-                                        DMA_TO_DEVICE);
-               if (dma_mapping_error(ddev, mr->dma)) {
-                       kfree(mr->pas);
-                       kfree(mr);
-                       err = -ENOMEM;
-                       goto out;
-               }
-
                in->seg.status = 1 << 6;
                in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
                in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
@@ -108,8 +95,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
                                            sizeof(*in));
                if (err) {
                        mlx5_ib_warn(dev, "create mkey failed %d\n", err);
-                       dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
-                       kfree(mr->pas);
                        kfree(mr);
                        goto out;
                }
@@ -129,11 +114,9 @@ out:
 
 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
 {
-       struct device *ddev = dev->ib_dev.dma_device;
        struct mlx5_mr_cache *cache = &dev->cache;
        struct mlx5_cache_ent *ent = &cache->ent[c];
        struct mlx5_ib_mr *mr;
-       int size;
        int err;
        int i;
 
@@ -149,14 +132,10 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
                ent->size--;
                spin_unlock(&ent->lock);
                err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
-               if (err) {
+               if (err)
                        mlx5_ib_warn(dev, "failed destroy mkey\n");
-               } else {
-                       size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
-                       dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
-                       kfree(mr->pas);
+               else
                        kfree(mr);
-               }
        }
 }
 
@@ -408,13 +387,12 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 
 static void clean_keys(struct mlx5_ib_dev *dev, int c)
 {
-       struct device *ddev = dev->ib_dev.dma_device;
        struct mlx5_mr_cache *cache = &dev->cache;
        struct mlx5_cache_ent *ent = &cache->ent[c];
        struct mlx5_ib_mr *mr;
-       int size;
        int err;
 
+       cancel_delayed_work(&ent->dwork);
        while (1) {
                spin_lock(&ent->lock);
                if (list_empty(&ent->head)) {
@@ -427,14 +405,10 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
                ent->size--;
                spin_unlock(&ent->lock);
                err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
-               if (err) {
+               if (err)
                        mlx5_ib_warn(dev, "failed destroy mkey\n");
-               } else {
-                       size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
-                       dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
-                       kfree(mr->pas);
+               else
                        kfree(mr);
-               }
        }
 }
 
@@ -540,13 +514,15 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
        int i;
 
        dev->cache.stopped = 1;
-       destroy_workqueue(dev->cache.wq);
+       flush_workqueue(dev->cache.wq);
 
        mlx5_mr_cache_debugfs_cleanup(dev);
 
        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
                clean_keys(dev, i);
 
+       destroy_workqueue(dev->cache.wq);
+
        return 0;
 }
 
@@ -675,10 +651,12 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
                                  int page_shift, int order, int access_flags)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       struct device *ddev = dev->ib_dev.dma_device;
        struct umr_common *umrc = &dev->umrc;
        struct ib_send_wr wr, *bad;
        struct mlx5_ib_mr *mr;
        struct ib_sge sg;
+       int size = sizeof(u64) * npages;
        int err;
        int i;
 
@@ -697,7 +675,22 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
        if (!mr)
                return ERR_PTR(-EAGAIN);
 
-       mlx5_ib_populate_pas(dev, umem, page_shift, mr_align(mr->pas, 0x40), 1);
+       mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
+       if (!mr->pas) {
+               err = -ENOMEM;
+               goto error;
+       }
+
+       mlx5_ib_populate_pas(dev, umem, page_shift,
+                            mr_align(mr->pas, MLX5_UMR_ALIGN), 1);
+
+       mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
+                                DMA_TO_DEVICE);
+       if (dma_mapping_error(ddev, mr->dma)) {
+               kfree(mr->pas);
+               err = -ENOMEM;
+               goto error;
+       }
 
        memset(&wr, 0, sizeof(wr));
        wr.wr_id = (u64)(unsigned long)mr;
@@ -718,6 +711,9 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
        wait_for_completion(&mr->done);
        up(&umrc->sem);
 
+       dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
+       kfree(mr->pas);
+
        if (mr->status != IB_WC_SUCCESS) {
                mlx5_ib_warn(dev, "reg umr failed\n");
                err = -EFAULT;
index 045f8cd..5659ea8 100644 (file)
@@ -203,7 +203,7 @@ static int sq_overhead(enum ib_qp_type qp_type)
 
        switch (qp_type) {
        case IB_QPT_XRC_INI:
-               size = sizeof(struct mlx5_wqe_xrc_seg);
+               size += sizeof(struct mlx5_wqe_xrc_seg);
                /* fall through */
        case IB_QPT_RC:
                size += sizeof(struct mlx5_wqe_ctrl_seg) +
@@ -211,20 +211,23 @@ static int sq_overhead(enum ib_qp_type qp_type)
                        sizeof(struct mlx5_wqe_raddr_seg);
                break;
 
+       case IB_QPT_XRC_TGT:
+               return 0;
+
        case IB_QPT_UC:
-               size = sizeof(struct mlx5_wqe_ctrl_seg) +
+               size += sizeof(struct mlx5_wqe_ctrl_seg) +
                        sizeof(struct mlx5_wqe_raddr_seg);
                break;
 
        case IB_QPT_UD:
        case IB_QPT_SMI:
        case IB_QPT_GSI:
-               size = sizeof(struct mlx5_wqe_ctrl_seg) +
+               size += sizeof(struct mlx5_wqe_ctrl_seg) +
                        sizeof(struct mlx5_wqe_datagram_seg);
                break;
 
        case MLX5_IB_QPT_REG_UMR:
-               size = sizeof(struct mlx5_wqe_ctrl_seg) +
+               size += sizeof(struct mlx5_wqe_ctrl_seg) +
                        sizeof(struct mlx5_wqe_umr_ctrl_seg) +
                        sizeof(struct mlx5_mkey_seg);
                break;
@@ -270,7 +273,8 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
                return wqe_size;
 
        if (wqe_size > dev->mdev.caps.max_sq_desc_sz) {
-               mlx5_ib_dbg(dev, "\n");
+               mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n",
+                           wqe_size, dev->mdev.caps.max_sq_desc_sz);
                return -EINVAL;
        }
 
@@ -280,9 +284,15 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
 
        wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
        qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
+       if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) {
+               mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n",
+                           qp->sq.wqe_cnt, dev->mdev.caps.max_wqes);
+               return -ENOMEM;
+       }
        qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
        qp->sq.max_gs = attr->cap.max_send_sge;
-       qp->sq.max_post = 1 << ilog2(wq_size / wqe_size);
+       qp->sq.max_post = wq_size / wqe_size;
+       attr->cap.max_send_wr = qp->sq.max_post;
 
        return wq_size;
 }
@@ -1280,6 +1290,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
                                          MLX5_QP_OPTPAR_Q_KEY,
                        [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX    |
                                           MLX5_QP_OPTPAR_Q_KEY,
+                       [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
+                                         MLX5_QP_OPTPAR_RRE            |
+                                         MLX5_QP_OPTPAR_RAE            |
+                                         MLX5_QP_OPTPAR_RWE            |
+                                         MLX5_QP_OPTPAR_PKEY_INDEX,
                },
        },
        [MLX5_QP_STATE_RTR] = {
@@ -1314,6 +1329,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
                [MLX5_QP_STATE_RTS] = {
                        [MLX5_QP_ST_UD]  = MLX5_QP_OPTPAR_Q_KEY,
                        [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
+                       [MLX5_QP_ST_UC]  = MLX5_QP_OPTPAR_RWE,
+                       [MLX5_QP_ST_RC]  = MLX5_QP_OPTPAR_RNR_TIMEOUT   |
+                                          MLX5_QP_OPTPAR_RWE           |
+                                          MLX5_QP_OPTPAR_RAE           |
+                                          MLX5_QP_OPTPAR_RRE,
                },
        },
 };
@@ -1651,29 +1671,6 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
        rseg->reserved = 0;
 }
 
-static void set_atomic_seg(struct mlx5_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
-{
-       if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
-               aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
-               aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add);
-       } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
-               aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
-               aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add_mask);
-       } else {
-               aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
-               aseg->compare  = 0;
-       }
-}
-
-static void set_masked_atomic_seg(struct mlx5_wqe_masked_atomic_seg *aseg,
-                                 struct ib_send_wr *wr)
-{
-       aseg->swap_add          = cpu_to_be64(wr->wr.atomic.swap);
-       aseg->swap_add_mask     = cpu_to_be64(wr->wr.atomic.swap_mask);
-       aseg->compare           = cpu_to_be64(wr->wr.atomic.compare_add);
-       aseg->compare_mask      = cpu_to_be64(wr->wr.atomic.compare_add_mask);
-}
-
 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
                             struct ib_send_wr *wr)
 {
@@ -2063,28 +2060,11 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
                        case IB_WR_ATOMIC_CMP_AND_SWP:
                        case IB_WR_ATOMIC_FETCH_AND_ADD:
-                               set_raddr_seg(seg, wr->wr.atomic.remote_addr,
-                                             wr->wr.atomic.rkey);
-                               seg  += sizeof(struct mlx5_wqe_raddr_seg);
-
-                               set_atomic_seg(seg, wr);
-                               seg  += sizeof(struct mlx5_wqe_atomic_seg);
-
-                               size += (sizeof(struct mlx5_wqe_raddr_seg) +
-                                        sizeof(struct mlx5_wqe_atomic_seg)) / 16;
-                               break;
-
                        case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
-                               set_raddr_seg(seg, wr->wr.atomic.remote_addr,
-                                             wr->wr.atomic.rkey);
-                               seg  += sizeof(struct mlx5_wqe_raddr_seg);
-
-                               set_masked_atomic_seg(seg, wr);
-                               seg  += sizeof(struct mlx5_wqe_masked_atomic_seg);
-
-                               size += (sizeof(struct mlx5_wqe_raddr_seg) +
-                                        sizeof(struct mlx5_wqe_masked_atomic_seg)) / 16;
-                               break;
+                               mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
+                               err = -ENOSYS;
+                               *bad_wr = wr;
+                               goto out;
 
                        case IB_WR_LOCAL_INV:
                                next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
index 84d297a..0aa478b 100644 (file)
@@ -295,7 +295,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
        mlx5_vfree(in);
        if (err) {
                mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
-               goto err_srq;
+               goto err_usr_kern_srq;
        }
 
        mlx5_ib_dbg(dev, "create SRQ with srqn 0x%x\n", srq->msrq.srqn);
@@ -316,6 +316,8 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 
 err_core:
        mlx5_core_destroy_srq(&dev->mdev, &srq->msrq);
+
+err_usr_kern_srq:
        if (pd->uobject)
                destroy_srq_user(pd, srq);
        else
index 7c9d35f..6902017 100644 (file)
@@ -357,7 +357,7 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
                        mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n",
                                   eqe->type, eqe->subtype, eq->eqn);
                        break;
-               };
+               }
 
                set_eqe_hw(eqe);
                ++eq->cons_index;
index 4ed8235..50219ab 100644 (file)
@@ -150,7 +150,7 @@ enum ib_qp_state get_ibqp_state(enum ocrdma_qp_state qps)
                return IB_QPS_SQE;
        case OCRDMA_QPS_ERR:
                return IB_QPS_ERR;
-       };
+       }
        return IB_QPS_ERR;
 }
 
@@ -171,7 +171,7 @@ static enum ocrdma_qp_state get_ocrdma_qp_state(enum ib_qp_state qps)
                return OCRDMA_QPS_SQE;
        case IB_QPS_ERR:
                return OCRDMA_QPS_ERR;
-       };
+       }
        return OCRDMA_QPS_ERR;
 }
 
@@ -1982,7 +1982,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs,
                break;
        default:
                return -EINVAL;
-       };
+       }
 
        cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_QP, sizeof(*cmd));
        if (!cmd)
index 56e0049..0ce7674 100644 (file)
@@ -531,7 +531,7 @@ static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
        case BE_DEV_DOWN:
                ocrdma_close(dev);
                break;
-       };
+       }
 }
 
 static struct ocrdma_driver ocrdma_drv = {
index 6e982bb..69f1d12 100644 (file)
@@ -141,7 +141,7 @@ static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
                /* Unsupported */
                *ib_speed = IB_SPEED_SDR;
                *ib_width = IB_WIDTH_1X;
-       };
+       }
 }
 
 
@@ -2331,7 +2331,7 @@ static enum ib_wc_status ocrdma_to_ibwc_err(u16 status)
        default:
                ibwc_status = IB_WC_GENERAL_ERR;
                break;
-       };
+       }
        return ibwc_status;
 }
 
@@ -2370,7 +2370,7 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
                pr_err("%s() invalid opcode received = 0x%x\n",
                       __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
                break;
-       };
+       }
 }
 
 static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp,
index 653ac6b..6c923c7 100644 (file)
@@ -1588,7 +1588,7 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
        int resp_data_len;
        int resp_len;
 
-       resp_data_len = (rsp_code == SRP_TSK_MGMT_SUCCESS) ? 0 : 4;
+       resp_data_len = 4;
        resp_len = sizeof(*srp_rsp) + resp_data_len;
 
        srp_rsp = ioctx->ioctx.buf;
@@ -1600,11 +1600,9 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
                                    + atomic_xchg(&ch->req_lim_delta, 0));
        srp_rsp->tag = tag;
 
-       if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
-               srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
-               srp_rsp->resp_data_len = cpu_to_be32(resp_data_len);
-               srp_rsp->data[3] = rsp_code;
-       }
+       srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
+       srp_rsp->resp_data_len = cpu_to_be32(resp_data_len);
+       srp_rsp->data[3] = rsp_code;
 
        return resp_len;
 }
@@ -2358,6 +2356,8 @@ static void srpt_release_channel_work(struct work_struct *w)
        transport_deregister_session(se_sess);
        ch->sess = NULL;
 
+       ib_destroy_cm_id(ch->cm_id);
+
        srpt_destroy_ch_ib(ch);
 
        srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
@@ -2368,8 +2368,6 @@ static void srpt_release_channel_work(struct work_struct *w)
        list_del(&ch->list);
        spin_unlock_irq(&sdev->spinlock);
 
-       ib_destroy_cm_id(ch->cm_id);
-
        if (ch->release_done)
                complete(ch->release_done);
 
index fe302e3..c880eba 100644 (file)
@@ -52,7 +52,7 @@ config AMD_IOMMU
        select PCI_PRI
        select PCI_PASID
        select IOMMU_API
-       depends on X86_64 && PCI && ACPI && X86_IO_APIC
+       depends on X86_64 && PCI && ACPI
        ---help---
          With this option you can enable support for AMD IOMMU hardware in
          your system. An IOMMU is a hardware component which provides
index f417e89..181c9ba 100644 (file)
@@ -377,6 +377,7 @@ struct arm_smmu_cfg {
        u32                             cbar;
        pgd_t                           *pgd;
 };
+#define INVALID_IRPTNDX                        0xff
 
 #define ARM_SMMU_CB_ASID(cfg)          ((cfg)->cbndx)
 #define ARM_SMMU_CB_VMID(cfg)          ((cfg)->cbndx + 1)
@@ -840,7 +841,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
        if (IS_ERR_VALUE(ret)) {
                dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
                        root_cfg->irptndx, irq);
-               root_cfg->irptndx = -1;
+               root_cfg->irptndx = INVALID_IRPTNDX;
                goto out_free_context;
        }
 
@@ -869,7 +870,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
        writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
        arm_smmu_tlb_inv_context(root_cfg);
 
-       if (root_cfg->irptndx != -1) {
+       if (root_cfg->irptndx != INVALID_IRPTNDX) {
                irq = smmu->irqs[smmu->num_global_irqs + root_cfg->irptndx];
                free_irq(irq, domain);
        }
@@ -1857,8 +1858,6 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
                goto out_put_parent;
        }
 
-       arm_smmu_device_reset(smmu);
-
        for (i = 0; i < smmu->num_global_irqs; ++i) {
                err = request_irq(smmu->irqs[i],
                                  arm_smmu_global_fault,
@@ -1876,6 +1875,8 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
        spin_lock(&arm_smmu_devices_lock);
        list_add(&smmu->list, &arm_smmu_devices);
        spin_unlock(&arm_smmu_devices_lock);
+
+       arm_smmu_device_reset(smmu);
        return 0;
 
 out_free_irqs:
@@ -1966,10 +1967,10 @@ static int __init arm_smmu_init(void)
                return ret;
 
        /* Oh, for a proper bus abstraction */
-       if (!iommu_present(&platform_bus_type));
+       if (!iommu_present(&platform_bus_type))
                bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
 
-       if (!iommu_present(&amba_bustype));
+       if (!iommu_present(&amba_bustype))
                bus_set_iommu(&amba_bustype, &arm_smmu_ops);
 
        return 0;
index b39f6f0..0f12382 100644 (file)
@@ -498,7 +498,7 @@ struct cached_dev {
         */
        atomic_t                has_dirty;
 
-       struct ratelimit        writeback_rate;
+       struct bch_ratelimit    writeback_rate;
        struct delayed_work     writeback_rate_update;
 
        /*
@@ -507,10 +507,9 @@ struct cached_dev {
         */
        sector_t                last_read;
 
-       /* Number of writeback bios in flight */
-       atomic_t                in_flight;
+       /* Limit number of writeback bios in flight */
+       struct semaphore        in_flight;
        struct closure_with_timer writeback;
-       struct closure_waitlist writeback_wait;
 
        struct keybuf           writeback_keys;
 
index 8010eed..22d1ae7 100644 (file)
@@ -926,28 +926,45 @@ struct bkey *bch_next_recurse_key(struct btree *b, struct bkey *search)
 
 /* Mergesort */
 
+static void sort_key_next(struct btree_iter *iter,
+                         struct btree_iter_set *i)
+{
+       i->k = bkey_next(i->k);
+
+       if (i->k == i->end)
+               *i = iter->data[--iter->used];
+}
+
 static void btree_sort_fixup(struct btree_iter *iter)
 {
        while (iter->used > 1) {
                struct btree_iter_set *top = iter->data, *i = top + 1;
-               struct bkey *k;
 
                if (iter->used > 2 &&
                    btree_iter_cmp(i[0], i[1]))
                        i++;
 
-               for (k = i->k;
-                    k != i->end && bkey_cmp(top->k, &START_KEY(k)) > 0;
-                    k = bkey_next(k))
-                       if (top->k > i->k)
-                               __bch_cut_front(top->k, k);
-                       else if (KEY_SIZE(k))
-                               bch_cut_back(&START_KEY(k), top->k);
-
-               if (top->k < i->k || k == i->k)
+               if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
                        break;
 
-               heap_sift(iter, i - top, btree_iter_cmp);
+               if (!KEY_SIZE(i->k)) {
+                       sort_key_next(iter, i);
+                       heap_sift(iter, i - top, btree_iter_cmp);
+                       continue;
+               }
+
+               if (top->k > i->k) {
+                       if (bkey_cmp(top->k, i->k) >= 0)
+                               sort_key_next(iter, i);
+                       else
+                               bch_cut_front(top->k, i->k);
+
+                       heap_sift(iter, i - top, btree_iter_cmp);
+               } else {
+                       /* can't happen because of comparison func */
+                       BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
+                       bch_cut_back(&START_KEY(i->k), top->k);
+               }
        }
 }
 
index f9764e6..f42fc7e 100644 (file)
@@ -255,7 +255,7 @@ void bch_btree_node_read(struct btree *b)
 
        return;
 err:
-       bch_cache_set_error(b->c, "io error reading bucket %lu",
+       bch_cache_set_error(b->c, "io error reading bucket %zu",
                            PTR_BUCKET_NR(b->c, &b->key, 0));
 }
 
@@ -612,7 +612,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
                return SHRINK_STOP;
 
        /* Return -1 if we can't do anything right now */
-       if (sc->gfp_mask & __GFP_WAIT)
+       if (sc->gfp_mask & __GFP_IO)
                mutex_lock(&c->bucket_lock);
        else if (!mutex_trylock(&c->bucket_lock))
                return -1;
index ba95ab8..8435f81 100644 (file)
@@ -153,7 +153,8 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
                bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
                pr_debug("%u journal buckets", ca->sb.njournal_buckets);
 
-               /* Read journal buckets ordered by golden ratio hash to quickly
+               /*
+                * Read journal buckets ordered by golden ratio hash to quickly
                 * find a sequence of buckets with valid journal entries
                 */
                for (i = 0; i < ca->sb.njournal_buckets; i++) {
@@ -166,18 +167,20 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
                                goto bsearch;
                }
 
-               /* If that fails, check all the buckets we haven't checked
+               /*
+                * If that fails, check all the buckets we haven't checked
                 * already
                 */
                pr_debug("falling back to linear search");
 
-               for (l = 0; l < ca->sb.njournal_buckets; l++) {
-                       if (test_bit(l, bitmap))
-                               continue;
-
+               for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets);
+                    l < ca->sb.njournal_buckets;
+                    l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, l + 1))
                        if (read_bucket(l))
                                goto bsearch;
-               }
+
+               if (list_empty(list))
+                       continue;
 bsearch:
                /* Binary search */
                m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
@@ -197,10 +200,12 @@ bsearch:
                                r = m;
                }
 
-               /* Read buckets in reverse order until we stop finding more
+               /*
+                * Read buckets in reverse order until we stop finding more
                 * journal entries
                 */
-               pr_debug("finishing up");
+               pr_debug("finishing up: m %u njournal_buckets %u",
+                        m, ca->sb.njournal_buckets);
                l = m;
 
                while (1) {
@@ -228,9 +233,10 @@ bsearch:
                        }
        }
 
-       c->journal.seq = list_entry(list->prev,
-                                   struct journal_replay,
-                                   list)->j.seq;
+       if (!list_empty(list))
+               c->journal.seq = list_entry(list->prev,
+                                           struct journal_replay,
+                                           list)->j.seq;
 
        return 0;
 #undef read_bucket
@@ -428,7 +434,7 @@ static void do_journal_discard(struct cache *ca)
                return;
        }
 
-       switch (atomic_read(&ja->discard_in_flight) == DISCARD_IN_FLIGHT) {
+       switch (atomic_read(&ja->discard_in_flight)) {
        case DISCARD_IN_FLIGHT:
                return;
 
@@ -689,6 +695,7 @@ void bch_journal_meta(struct cache_set *c, struct closure *cl)
                if (cl)
                        BUG_ON(!closure_wait(&w->wait, cl));
 
+               closure_flush(&c->journal.io);
                __journal_try_write(c, true);
        }
 }
index 786a1a4..b6a74bc 100644 (file)
@@ -996,17 +996,19 @@ static void request_write(struct cached_dev *dc, struct search *s)
                closure_bio_submit(bio, cl, s->d);
        } else {
                bch_writeback_add(dc);
+               s->op.cache_bio = bio;
 
-               if (s->op.flush_journal) {
+               if (bio->bi_rw & REQ_FLUSH) {
                        /* Also need to send a flush to the backing device */
-                       s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
-                                                          dc->disk.bio_split);
-
-                       bio->bi_size = 0;
-                       bio->bi_vcnt = 0;
-                       closure_bio_submit(bio, cl, s->d);
-               } else {
-                       s->op.cache_bio = bio;
+                       struct bio *flush = bio_alloc_bioset(0, GFP_NOIO,
+                                                            dc->disk.bio_split);
+
+                       flush->bi_rw    = WRITE_FLUSH;
+                       flush->bi_bdev  = bio->bi_bdev;
+                       flush->bi_end_io = request_endio;
+                       flush->bi_private = cl;
+
+                       closure_bio_submit(flush, cl, s->d);
                }
        }
 out:
index 4fe6ab2..924dcfd 100644 (file)
@@ -223,8 +223,13 @@ STORE(__cached_dev)
        }
 
        if (attr == &sysfs_label) {
-               /* note: endlines are preserved */
-               memcpy(dc->sb.label, buf, SB_LABEL_SIZE);
+               if (size > SB_LABEL_SIZE)
+                       return -EINVAL;
+               memcpy(dc->sb.label, buf, size);
+               if (size < SB_LABEL_SIZE)
+                       dc->sb.label[size] = '\0';
+               if (size && dc->sb.label[size - 1] == '\n')
+                       dc->sb.label[size - 1] = '\0';
                bch_write_bdev_super(dc, NULL);
                if (dc->disk.c) {
                        memcpy(dc->disk.c->uuids[dc->disk.id].label,
index 98eb811..420dad5 100644 (file)
@@ -190,7 +190,16 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
        stats->last = now ?: 1;
 }
 
-unsigned bch_next_delay(struct ratelimit *d, uint64_t done)
+/**
+ * bch_next_delay() - increment @d by the amount of work done, and return how
+ * long to delay until the next time to do some work.
+ *
+ * @d - the struct bch_ratelimit to update
+ * @done - the amount of work done, in arbitrary units
+ *
+ * Returns the amount of time to delay by, in jiffies
+ */
+uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
 {
        uint64_t now = local_clock();
 
index 1ae2a73..ea345c6 100644 (file)
@@ -450,17 +450,23 @@ read_attribute(name ## _last_ ## frequency_units)
        (ewma) >> factor;                                               \
 })
 
-struct ratelimit {
+struct bch_ratelimit {
+       /* Next time we want to do some work, in nanoseconds */
        uint64_t                next;
+
+       /*
+        * Rate at which we want to do work, in units per nanosecond
+        * The units here correspond to the units passed to bch_next_delay()
+        */
        unsigned                rate;
 };
 
-static inline void ratelimit_reset(struct ratelimit *d)
+static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
 {
        d->next = local_clock();
 }
 
-unsigned bch_next_delay(struct ratelimit *d, uint64_t done);
+uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done);
 
 #define __DIV_SAFE(n, d, zero)                                         \
 ({                                                                     \
index 22cbff5..ba3ee48 100644 (file)
@@ -94,11 +94,15 @@ static void update_writeback_rate(struct work_struct *work)
 
 static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
 {
+       uint64_t ret;
+
        if (atomic_read(&dc->disk.detaching) ||
            !dc->writeback_percent)
                return 0;
 
-       return bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
+       ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
+
+       return min_t(uint64_t, ret, HZ);
 }
 
 /* Background writeback */
@@ -208,7 +212,7 @@ normal_refill:
 
        up_write(&dc->writeback_lock);
 
-       ratelimit_reset(&dc->writeback_rate);
+       bch_ratelimit_reset(&dc->writeback_rate);
 
        /* Punt to workqueue only so we don't recurse and blow the stack */
        continue_at(cl, read_dirty, dirty_wq);
@@ -318,9 +322,7 @@ static void write_dirty_finish(struct closure *cl)
        }
 
        bch_keybuf_del(&dc->writeback_keys, w);
-       atomic_dec_bug(&dc->in_flight);
-
-       closure_wake_up(&dc->writeback_wait);
+       up(&dc->in_flight);
 
        closure_return_with_destructor(cl, dirty_io_destructor);
 }
@@ -349,7 +351,7 @@ static void write_dirty(struct closure *cl)
 
        closure_bio_submit(&io->bio, cl, &io->dc->disk);
 
-       continue_at(cl, write_dirty_finish, dirty_wq);
+       continue_at(cl, write_dirty_finish, system_wq);
 }
 
 static void read_dirty_endio(struct bio *bio, int error)
@@ -369,7 +371,7 @@ static void read_dirty_submit(struct closure *cl)
 
        closure_bio_submit(&io->bio, cl, &io->dc->disk);
 
-       continue_at(cl, write_dirty, dirty_wq);
+       continue_at(cl, write_dirty, system_wq);
 }
 
 static void read_dirty(struct closure *cl)
@@ -394,12 +396,8 @@ static void read_dirty(struct closure *cl)
 
                if (delay > 0 &&
                    (KEY_START(&w->key) != dc->last_read ||
-                    jiffies_to_msecs(delay) > 50)) {
-                       w->private = NULL;
-
-                       closure_delay(&dc->writeback, delay);
-                       continue_at(cl, read_dirty, dirty_wq);
-               }
+                    jiffies_to_msecs(delay) > 50))
+                       delay = schedule_timeout_uninterruptible(delay);
 
                dc->last_read   = KEY_OFFSET(&w->key);
 
@@ -424,15 +422,10 @@ static void read_dirty(struct closure *cl)
 
                trace_bcache_writeback(&w->key);
 
-               closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl);
+               down(&dc->in_flight);
+               closure_call(&io->cl, read_dirty_submit, NULL, cl);
 
                delay = writeback_delay(dc, KEY_SIZE(&w->key));
-
-               atomic_inc(&dc->in_flight);
-
-               if (!closure_wait_event(&dc->writeback_wait, cl,
-                                       atomic_read(&dc->in_flight) < 64))
-                       continue_at(cl, read_dirty, dirty_wq);
        }
 
        if (0) {
@@ -442,7 +435,11 @@ err:
                bch_keybuf_del(&dc->writeback_keys, w);
        }
 
-       refill_dirty(cl);
+       /*
+        * Wait for outstanding writeback IOs to finish (and keybuf slots to be
+        * freed) before refilling again
+        */
+       continue_at(cl, refill_dirty, dirty_wq);
 }
 
 /* Init */
@@ -484,6 +481,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
 
 void bch_cached_dev_writeback_init(struct cached_dev *dc)
 {
+       sema_init(&dc->in_flight, 64);
        closure_init_unlocked(&dc->writeback);
        init_rwsem(&dc->writeback_lock);
 
@@ -513,7 +511,7 @@ void bch_writeback_exit(void)
 
 int __init bch_writeback_init(void)
 {
-       dirty_wq = create_singlethread_workqueue("bcache_writeback");
+       dirty_wq = create_workqueue("bcache_writeback");
        if (!dirty_wq)
                return -ENOMEM;
 
index ea49834..2a20986 100644 (file)
@@ -19,8 +19,6 @@
 #define DM_MSG_PREFIX "io"
 
 #define DM_IO_MAX_REGIONS      BITS_PER_LONG
-#define MIN_IOS                16
-#define MIN_BIOS       16
 
 struct dm_io_client {
        mempool_t *pool;
@@ -50,16 +48,17 @@ static struct kmem_cache *_dm_io_cache;
 struct dm_io_client *dm_io_client_create(void)
 {
        struct dm_io_client *client;
+       unsigned min_ios = dm_get_reserved_bio_based_ios();
 
        client = kmalloc(sizeof(*client), GFP_KERNEL);
        if (!client)
                return ERR_PTR(-ENOMEM);
 
-       client->pool = mempool_create_slab_pool(MIN_IOS, _dm_io_cache);
+       client->pool = mempool_create_slab_pool(min_ios, _dm_io_cache);
        if (!client->pool)
                goto bad;
 
-       client->bios = bioset_create(MIN_BIOS, 0);
+       client->bios = bioset_create(min_ios, 0);
        if (!client->bios)
                goto bad;
 
index b759a12..de570a5 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/device-mapper.h>
 
+#include "dm.h"
 #include "dm-path-selector.h"
 #include "dm-uevent.h"
 
@@ -116,8 +117,6 @@ struct dm_mpath_io {
 
 typedef int (*action_fn) (struct pgpath *pgpath);
 
-#define MIN_IOS 256    /* Mempool size */
-
 static struct kmem_cache *_mpio_cache;
 
 static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
@@ -190,6 +189,7 @@ static void free_priority_group(struct priority_group *pg,
 static struct multipath *alloc_multipath(struct dm_target *ti)
 {
        struct multipath *m;
+       unsigned min_ios = dm_get_reserved_rq_based_ios();
 
        m = kzalloc(sizeof(*m), GFP_KERNEL);
        if (m) {
@@ -202,7 +202,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
                INIT_WORK(&m->trigger_event, trigger_event);
                init_waitqueue_head(&m->pg_init_wait);
                mutex_init(&m->work_mutex);
-               m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
+               m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
                if (!m->mpio_pool) {
                        kfree(m);
                        return NULL;
@@ -1268,6 +1268,7 @@ static int noretry_error(int error)
        case -EREMOTEIO:
        case -EILSEQ:
        case -ENODATA:
+       case -ENOSPC:
                return 1;
        }
 
@@ -1298,8 +1299,17 @@ static int do_end_io(struct multipath *m, struct request *clone,
        if (!error && !clone->errors)
                return 0;       /* I/O complete */
 
-       if (noretry_error(error))
+       if (noretry_error(error)) {
+               if ((clone->cmd_flags & REQ_WRITE_SAME) &&
+                   !clone->q->limits.max_write_same_sectors) {
+                       struct queue_limits *limits;
+
+                       /* device doesn't really support WRITE SAME, disable it */
+                       limits = dm_get_queue_limits(dm_table_get_md(m->ti->table));
+                       limits->max_write_same_sectors = 0;
+               }
                return error;
+       }
 
        if (mpio->pgpath)
                fail_path(mpio->pgpath);
index 3ac4156..4caa8e6 100644 (file)
@@ -256,7 +256,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
         */
        INIT_WORK_ONSTACK(&req.work, do_metadata);
        queue_work(ps->metadata_wq, &req.work);
-       flush_work(&req.work);
+       flush_workqueue(ps->metadata_wq);
 
        return req.result;
 }
index c434e5a..aec57d7 100644 (file)
@@ -725,17 +725,16 @@ static int calc_max_buckets(void)
  */
 static int init_hash_tables(struct dm_snapshot *s)
 {
-       sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
+       sector_t hash_size, cow_dev_size, max_buckets;
 
        /*
         * Calculate based on the size of the original volume or
         * the COW volume...
         */
        cow_dev_size = get_dev_size(s->cow->bdev);
-       origin_dev_size = get_dev_size(s->origin->bdev);
        max_buckets = calc_max_buckets();
 
-       hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
+       hash_size = cow_dev_size >> s->store->chunk_shift;
        hash_size = min(hash_size, max_buckets);
 
        if (hash_size < 64)
index 8ae31e8..3d404c1 100644 (file)
@@ -451,19 +451,26 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
        struct dm_stat_percpu *p;
 
        /*
-        * For strict correctness we should use local_irq_disable/enable
+        * For strict correctness we should use local_irq_save/restore
         * instead of preempt_disable/enable.
         *
-        * This is racy if the driver finishes bios from non-interrupt
-        * context as well as from interrupt context or from more different
-        * interrupts.
+        * preempt_disable/enable is racy if the driver finishes bios
+        * from non-interrupt context as well as from interrupt context
+        * or from more different interrupts.
         *
-        * However, the race only results in not counting some events,
-        * so it is acceptable.
+        * On 64-bit architectures the race only results in not counting some
+        * events, so it is acceptable.  On 32-bit architectures the race could
+        * cause the counter going off by 2^32, so we need to do proper locking
+        * there.
         *
         * part_stat_lock()/part_stat_unlock() have this race too.
         */
+#if BITS_PER_LONG == 32
+       unsigned long flags;
+       local_irq_save(flags);
+#else
        preempt_disable();
+#endif
        p = &s->stat_percpu[smp_processor_id()][entry];
 
        if (!end) {
@@ -478,7 +485,11 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
                p->ticks[idx] += duration;
        }
 
+#if BITS_PER_LONG == 32
+       local_irq_restore(flags);
+#else
        preempt_enable();
+#endif
 }
 
 static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
index ed06342..2c0cf51 100644 (file)
@@ -2095,6 +2095,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
         * them down to the data device.  The thin device's discard
         * processing will cause mappings to be removed from the btree.
         */
+       ti->discard_zeroes_data_unsupported = true;
        if (pf.discard_enabled && pf.discard_passdown) {
                ti->num_discard_bios = 1;
 
@@ -2104,7 +2105,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
                 * thin devices' discard limits consistent).
                 */
                ti->discards_supported = true;
-               ti->discard_zeroes_data_unsupported = true;
        }
        ti->private = pt;
 
@@ -2689,8 +2689,16 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
         * They get transferred to the live pool in bind_control_target()
         * called from pool_preresume().
         */
-       if (!pt->adjusted_pf.discard_enabled)
+       if (!pt->adjusted_pf.discard_enabled) {
+               /*
+                * Must explicitly disallow stacking discard limits otherwise the
+                * block layer will stack them if pool's data device has support.
+                * QUEUE_FLAG_DISCARD wouldn't be set but there is no way for the
+                * user to see that, so make sure to set all discard limits to 0.
+                */
+               limits->discard_granularity = 0;
                return;
+       }
 
        disable_passdown_if_not_supported(pt);
 
@@ -2826,10 +2834,10 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
        ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook);
 
        /* In case the pool supports discards, pass them on. */
+       ti->discard_zeroes_data_unsupported = true;
        if (tc->pool->pf.discard_enabled) {
                ti->discards_supported = true;
                ti->num_discard_bios = 1;
-               ti->discard_zeroes_data_unsupported = true;
                /* Discard bios must be split on a block boundary */
                ti->split_discard_bios = true;
        }
index 6a5e9ed..b3e26c7 100644 (file)
@@ -211,10 +211,55 @@ struct dm_md_mempools {
        struct bio_set *bs;
 };
 
-#define MIN_IOS 256
+#define RESERVED_BIO_BASED_IOS         16
+#define RESERVED_REQUEST_BASED_IOS     256
+#define RESERVED_MAX_IOS               1024
 static struct kmem_cache *_io_cache;
 static struct kmem_cache *_rq_tio_cache;
 
+/*
+ * Bio-based DM's mempools' reserved IOs set by the user.
+ */
+static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
+
+/*
+ * Request-based DM's mempools' reserved IOs set by the user.
+ */
+static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
+
+static unsigned __dm_get_reserved_ios(unsigned *reserved_ios,
+                                     unsigned def, unsigned max)
+{
+       unsigned ios = ACCESS_ONCE(*reserved_ios);
+       unsigned modified_ios = 0;
+
+       if (!ios)
+               modified_ios = def;
+       else if (ios > max)
+               modified_ios = max;
+
+       if (modified_ios) {
+               (void)cmpxchg(reserved_ios, ios, modified_ios);
+               ios = modified_ios;
+       }
+
+       return ios;
+}
+
+unsigned dm_get_reserved_bio_based_ios(void)
+{
+       return __dm_get_reserved_ios(&reserved_bio_based_ios,
+                                    RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS);
+}
+EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
+
+unsigned dm_get_reserved_rq_based_ios(void)
+{
+       return __dm_get_reserved_ios(&reserved_rq_based_ios,
+                                    RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS);
+}
+EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
+
 static int __init local_init(void)
 {
        int r = -ENOMEM;
@@ -2278,6 +2323,17 @@ struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
 }
 
 /*
+ * The queue_limits are only valid as long as you have a reference
+ * count on 'md'.
+ */
+struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
+{
+       BUG_ON(!atomic_read(&md->holders));
+       return &md->queue->limits;
+}
+EXPORT_SYMBOL_GPL(dm_get_queue_limits);
+
+/*
  * Fully initialize a request-based queue (->elevator, ->request_fn, etc).
  */
 static int dm_init_request_based_queue(struct mapped_device *md)
@@ -2862,18 +2918,18 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u
 
        if (type == DM_TYPE_BIO_BASED) {
                cachep = _io_cache;
-               pool_size = 16;
+               pool_size = dm_get_reserved_bio_based_ios();
                front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
        } else if (type == DM_TYPE_REQUEST_BASED) {
                cachep = _rq_tio_cache;
-               pool_size = MIN_IOS;
+               pool_size = dm_get_reserved_rq_based_ios();
                front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
                /* per_bio_data_size is not used. See __bind_mempools(). */
                WARN_ON(per_bio_data_size != 0);
        } else
                goto out;
 
-       pools->io_pool = mempool_create_slab_pool(MIN_IOS, cachep);
+       pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
        if (!pools->io_pool)
                goto out;
 
@@ -2924,6 +2980,13 @@ module_exit(dm_exit);
 
 module_param(major, uint, 0);
 MODULE_PARM_DESC(major, "The major number of the device mapper");
+
+module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
+
+module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
+
 MODULE_DESCRIPTION(DM_NAME " driver");
 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
 MODULE_LICENSE("GPL");
index 5e604cc..1d1ad7b 100644 (file)
@@ -184,6 +184,9 @@ void dm_free_md_mempools(struct dm_md_mempools *pools);
 /*
  * Helpers that are used by DM core
  */
+unsigned dm_get_reserved_bio_based_ios(void);
+unsigned dm_get_reserved_rq_based_ios(void);
+
 static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
 {
        return !maxlen || strlen(result) + 1 >= maxlen;
index d0fdc13..f6ff711 100644 (file)
@@ -57,6 +57,7 @@ void mei_amthif_reset_params(struct mei_device *dev)
        dev->iamthif_ioctl = false;
        dev->iamthif_state = MEI_IAMTHIF_IDLE;
        dev->iamthif_timer = 0;
+       dev->iamthif_stall_timer = 0;
 }
 
 /**
index 6d0282c..cd2033c 100644 (file)
@@ -297,10 +297,13 @@ int __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length)
 
        if (cl->reading_state != MEI_READ_COMPLETE &&
            !waitqueue_active(&cl->rx_wait)) {
+
                mutex_unlock(&dev->device_lock);
 
                if (wait_event_interruptible(cl->rx_wait,
-                               (MEI_READ_COMPLETE == cl->reading_state))) {
+                               cl->reading_state == MEI_READ_COMPLETE  ||
+                               mei_cl_is_transitioning(cl))) {
+
                        if (signal_pending(current))
                                return -EINTR;
                        return -ERESTARTSYS;
index 9eb031e..892cc42 100644 (file)
@@ -90,6 +90,12 @@ static inline bool mei_cl_is_connected(struct mei_cl *cl)
                cl->dev->dev_state == MEI_DEV_ENABLED &&
                cl->state == MEI_FILE_CONNECTED);
 }
+static inline bool mei_cl_is_transitioning(struct mei_cl *cl)
+{
+       return (MEI_FILE_INITIALIZING == cl->state ||
+               MEI_FILE_DISCONNECTED == cl->state ||
+               MEI_FILE_DISCONNECTING == cl->state);
+}
 
 bool mei_cl_is_other_connecting(struct mei_cl *cl);
 int mei_cl_disconnect(struct mei_cl *cl);
index 6127ab6..0a04483 100644 (file)
@@ -35,11 +35,15 @@ static void mei_hbm_me_cl_allocate(struct mei_device *dev)
        struct mei_me_client *clients;
        int b;
 
+       dev->me_clients_num = 0;
+       dev->me_client_presentation_num = 0;
+       dev->me_client_index = 0;
+
        /* count how many ME clients we have */
        for_each_set_bit(b, dev->me_clients_map, MEI_CLIENTS_MAX)
                dev->me_clients_num++;
 
-       if (dev->me_clients_num <= 0)
+       if (dev->me_clients_num == 0)
                return;
 
        kfree(dev->me_clients);
@@ -221,7 +225,7 @@ static int mei_hbm_prop_req(struct mei_device *dev)
        struct hbm_props_request *prop_req;
        const size_t len = sizeof(struct hbm_props_request);
        unsigned long next_client_index;
-       u8 client_num;
+       unsigned long client_num;
 
 
        client_num = dev->me_client_presentation_num;
@@ -677,8 +681,6 @@ void mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
                if (dev->dev_state == MEI_DEV_INIT_CLIENTS &&
                    dev->hbm_state == MEI_HBM_ENUM_CLIENTS) {
                                dev->init_clients_timer = 0;
-                               dev->me_client_presentation_num = 0;
-                               dev->me_client_index = 0;
                                mei_hbm_me_cl_allocate(dev);
                                dev->hbm_state = MEI_HBM_CLIENT_PROPERTIES;
 
index 92c7311..6197018 100644 (file)
@@ -175,6 +175,9 @@ void mei_reset(struct mei_device *dev, int interrupts_enabled)
                memset(&dev->wr_ext_msg, 0, sizeof(dev->wr_ext_msg));
        }
 
+       /* we're already in reset, cancel the init timer */
+       dev->init_clients_timer = 0;
+
        dev->me_clients_num = 0;
        dev->rd_msg_hdr = 0;
        dev->wd_pending = false;
index 173ff09..cabeddd 100644 (file)
@@ -249,19 +249,16 @@ static ssize_t mei_read(struct file *file, char __user *ubuf,
                mutex_unlock(&dev->device_lock);
 
                if (wait_event_interruptible(cl->rx_wait,
-                       (MEI_READ_COMPLETE == cl->reading_state ||
-                        MEI_FILE_INITIALIZING == cl->state ||
-                        MEI_FILE_DISCONNECTED == cl->state ||
-                        MEI_FILE_DISCONNECTING == cl->state))) {
+                               MEI_READ_COMPLETE == cl->reading_state ||
+                               mei_cl_is_transitioning(cl))) {
+
                        if (signal_pending(current))
                                return -EINTR;
                        return -ERESTARTSYS;
                }
 
                mutex_lock(&dev->device_lock);
-               if (MEI_FILE_INITIALIZING == cl->state ||
-                   MEI_FILE_DISCONNECTED == cl->state ||
-                   MEI_FILE_DISCONNECTING == cl->state) {
+               if (mei_cl_is_transitioning(cl)) {
                        rets = -EBUSY;
                        goto out;
                }
index 7b918b2..456b322 100644 (file)
@@ -396,9 +396,9 @@ struct mei_device {
        struct mei_me_client *me_clients; /* Note: memory has to be allocated */
        DECLARE_BITMAP(me_clients_map, MEI_CLIENTS_MAX);
        DECLARE_BITMAP(host_clients_map, MEI_CLIENTS_MAX);
-       u8 me_clients_num;
-       u8 me_client_presentation_num;
-       u8 me_client_index;
+       unsigned long me_clients_num;
+       unsigned long me_client_presentation_num;
+       unsigned long me_client_index;
 
        struct mei_cl wd_cl;
        enum mei_wd_states wd_state;
index 87ed3fb..f344659 100644 (file)
@@ -113,14 +113,14 @@ static const struct sh_mobile_sdhi_ops sdhi_ops = {
 };
 
 static const struct of_device_id sh_mobile_sdhi_of_match[] = {
-       { .compatible = "renesas,shmobile-sdhi" },
-       { .compatible = "renesas,sh7372-sdhi" },
-       { .compatible = "renesas,sh73a0-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], },
-       { .compatible = "renesas,r8a73a4-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], },
-       { .compatible = "renesas,r8a7740-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], },
-       { .compatible = "renesas,r8a7778-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], },
-       { .compatible = "renesas,r8a7779-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], },
-       { .compatible = "renesas,r8a7790-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], },
+       { .compatible = "renesas,sdhi-shmobile" },
+       { .compatible = "renesas,sdhi-sh7372" },
+       { .compatible = "renesas,sdhi-sh73a0", .data = &sh_mobile_sdhi_of_cfg[0], },
+       { .compatible = "renesas,sdhi-r8a73a4", .data = &sh_mobile_sdhi_of_cfg[0], },
+       { .compatible = "renesas,sdhi-r8a7740", .data = &sh_mobile_sdhi_of_cfg[0], },
+       { .compatible = "renesas,sdhi-r8a7778", .data = &sh_mobile_sdhi_of_cfg[0], },
+       { .compatible = "renesas,sdhi-r8a7779", .data = &sh_mobile_sdhi_of_cfg[0], },
+       { .compatible = "renesas,sdhi-r8a7790", .data = &sh_mobile_sdhi_of_cfg[0], },
        {},
 };
 MODULE_DEVICE_TABLE(of, sh_mobile_sdhi_of_match);
index 26b14f9..6bc9618 100644 (file)
@@ -168,12 +168,25 @@ static inline int write_disable(struct m25p *flash)
  */
 static inline int set_4byte(struct m25p *flash, u32 jedec_id, int enable)
 {
+       int status;
+       bool need_wren = false;
+
        switch (JEDEC_MFR(jedec_id)) {
-       case CFI_MFR_MACRONIX:
        case CFI_MFR_ST: /* Micron, actually */
+               /* Some Micron need WREN command; all will accept it */
+               need_wren = true;
+       case CFI_MFR_MACRONIX:
        case 0xEF /* winbond */:
+               if (need_wren)
+                       write_enable(flash);
+
                flash->command[0] = enable ? OPCODE_EN4B : OPCODE_EX4B;
-               return spi_write(flash->spi, flash->command, 1);
+               status = spi_write(flash->spi, flash->command, 1);
+
+               if (need_wren)
+                       write_disable(flash);
+
+               return status;
        default:
                /* Spansion style */
                flash->command[0] = OPCODE_BRWR;
index 7ed4841..d340b2f 100644 (file)
@@ -2869,10 +2869,8 @@ static int nand_flash_detect_ext_param_page(struct mtd_info *mtd,
 
        len = le16_to_cpu(p->ext_param_page_length) * 16;
        ep = kmalloc(len, GFP_KERNEL);
-       if (!ep) {
-               ret = -ENOMEM;
-               goto ext_out;
-       }
+       if (!ep)
+               return -ENOMEM;
 
        /* Send our own NAND_CMD_PARAM. */
        chip->cmdfunc(mtd, NAND_CMD_PARAM, 0, -1);
@@ -2920,7 +2918,7 @@ static int nand_flash_detect_ext_param_page(struct mtd_info *mtd,
        }
 
        pr_info("ONFI extended param page detected.\n");
-       return 0;
+       ret = 0;
 
 ext_out:
        kfree(ep);
index 55bbb8b..e883bfe 100644 (file)
@@ -1724,6 +1724,7 @@ static int __bond_release_one(struct net_device *bond_dev,
        struct bonding *bond = netdev_priv(bond_dev);
        struct slave *slave, *oldcurrent;
        struct sockaddr addr;
+       int old_flags = bond_dev->flags;
        netdev_features_t old_features = bond_dev->features;
 
        /* slave is not a slave or master is not master of this slave */
@@ -1855,12 +1856,18 @@ static int __bond_release_one(struct net_device *bond_dev,
         * bond_change_active_slave(..., NULL)
         */
        if (!USES_PRIMARY(bond->params.mode)) {
-               /* unset promiscuity level from slave */
-               if (bond_dev->flags & IFF_PROMISC)
+               /* unset promiscuity level from slave
+                * NOTE: The NETDEV_CHANGEADDR call above may change the value
+                * of the IFF_PROMISC flag in the bond_dev, but we need the
+                * value of that flag before that change, as that was the value
+                * when this slave was attached, so we cache at the start of the
+                * function and use it here. Same goes for ALLMULTI below
+                */
+               if (old_flags & IFF_PROMISC)
                        dev_set_promiscuity(slave_dev, -1);
 
                /* unset allmulti level from slave */
-               if (bond_dev->flags & IFF_ALLMULTI)
+               if (old_flags & IFF_ALLMULTI)
                        dev_set_allmulti(slave_dev, -1);
 
                bond_hw_addr_flush(bond_dev, slave_dev);
index 71c677e..3f21142 100644 (file)
@@ -702,7 +702,6 @@ static int flexcan_chip_start(struct net_device *dev)
 {
        struct flexcan_priv *priv = netdev_priv(dev);
        struct flexcan_regs __iomem *regs = priv->base;
-       unsigned int i;
        int err;
        u32 reg_mcr, reg_ctrl;
 
@@ -772,17 +771,6 @@ static int flexcan_chip_start(struct net_device *dev)
        netdev_dbg(dev, "%s: writing ctrl=0x%08x", __func__, reg_ctrl);
        flexcan_write(reg_ctrl, &regs->ctrl);
 
-       for (i = 0; i < ARRAY_SIZE(regs->cantxfg); i++) {
-               flexcan_write(0, &regs->cantxfg[i].can_ctrl);
-               flexcan_write(0, &regs->cantxfg[i].can_id);
-               flexcan_write(0, &regs->cantxfg[i].data[0]);
-               flexcan_write(0, &regs->cantxfg[i].data[1]);
-
-               /* put MB into rx queue */
-               flexcan_write(FLEXCAN_MB_CNT_CODE(0x4),
-                       &regs->cantxfg[i].can_ctrl);
-       }
-
        /* acceptance mask/acceptance code (accept everything) */
        flexcan_write(0x0, &regs->rxgmask);
        flexcan_write(0x0, &regs->rx14mask);
index 874188b..25377e5 100644 (file)
@@ -76,6 +76,10 @@ MODULE_PARM_DESC(maxdev, "Maximum number of slcan interfaces");
 /* maximum rx buffer len: extended CAN frame with timestamp */
 #define SLC_MTU (sizeof("T1111222281122334455667788EA5F\r")+1)
 
+#define SLC_CMD_LEN 1
+#define SLC_SFF_ID_LEN 3
+#define SLC_EFF_ID_LEN 8
+
 struct slcan {
        int                     magic;
 
@@ -142,47 +146,63 @@ static void slc_bump(struct slcan *sl)
 {
        struct sk_buff *skb;
        struct can_frame cf;
-       int i, dlc_pos, tmp;
-       unsigned long ultmp;
-       char cmd = sl->rbuff[0];
-
-       if ((cmd != 't') && (cmd != 'T') && (cmd != 'r') && (cmd != 'R'))
+       int i, tmp;
+       u32 tmpid;
+       char *cmd = sl->rbuff;
+
+       cf.can_id = 0;
+
+       switch (*cmd) {
+       case 'r':
+               cf.can_id = CAN_RTR_FLAG;
+               /* fallthrough */
+       case 't':
+               /* store dlc ASCII value and terminate SFF CAN ID string */
+               cf.can_dlc = sl->rbuff[SLC_CMD_LEN + SLC_SFF_ID_LEN];
+               sl->rbuff[SLC_CMD_LEN + SLC_SFF_ID_LEN] = 0;
+               /* point to payload data behind the dlc */
+               cmd += SLC_CMD_LEN + SLC_SFF_ID_LEN + 1;
+               break;
+       case 'R':
+               cf.can_id = CAN_RTR_FLAG;
+               /* fallthrough */
+       case 'T':
+               cf.can_id |= CAN_EFF_FLAG;
+               /* store dlc ASCII value and terminate EFF CAN ID string */
+               cf.can_dlc = sl->rbuff[SLC_CMD_LEN + SLC_EFF_ID_LEN];
+               sl->rbuff[SLC_CMD_LEN + SLC_EFF_ID_LEN] = 0;
+               /* point to payload data behind the dlc */
+               cmd += SLC_CMD_LEN + SLC_EFF_ID_LEN + 1;
+               break;
+       default:
                return;
+       }
 
-       if (cmd & 0x20) /* tiny chars 'r' 't' => standard frame format */
-               dlc_pos = 4; /* dlc position tiiid */
-       else
-               dlc_pos = 9; /* dlc position Tiiiiiiiid */
-
-       if (!((sl->rbuff[dlc_pos] >= '0') && (sl->rbuff[dlc_pos] < '9')))
+       if (kstrtou32(sl->rbuff + SLC_CMD_LEN, 16, &tmpid))
                return;
 
-       cf.can_dlc = sl->rbuff[dlc_pos] - '0'; /* get can_dlc from ASCII val */
+       cf.can_id |= tmpid;
 
-       sl->rbuff[dlc_pos] = 0; /* terminate can_id string */
-
-       if (kstrtoul(sl->rbuff+1, 16, &ultmp))
+       /* get can_dlc from sanitized ASCII value */
+       if (cf.can_dlc >= '0' && cf.can_dlc < '9')
+               cf.can_dlc -= '0';
+       else
                return;
 
-       cf.can_id = ultmp;
-
-       if (!(cmd & 0x20)) /* NO tiny chars => extended frame format */
-               cf.can_id |= CAN_EFF_FLAG;
-
-       if ((cmd | 0x20) == 'r') /* RTR frame */
-               cf.can_id |= CAN_RTR_FLAG;
-
        *(u64 *) (&cf.data) = 0; /* clear payload */
 
-       for (i = 0, dlc_pos++; i < cf.can_dlc; i++) {
-               tmp = hex_to_bin(sl->rbuff[dlc_pos++]);
-               if (tmp < 0)
-                       return;
-               cf.data[i] = (tmp << 4);
-               tmp = hex_to_bin(sl->rbuff[dlc_pos++]);
-               if (tmp < 0)
-                       return;
-               cf.data[i] |= tmp;
+       /* RTR frames may have a dlc > 0 but they never have any data bytes */
+       if (!(cf.can_id & CAN_RTR_FLAG)) {
+               for (i = 0; i < cf.can_dlc; i++) {
+                       tmp = hex_to_bin(*cmd++);
+                       if (tmp < 0)
+                               return;
+                       cf.data[i] = (tmp << 4);
+                       tmp = hex_to_bin(*cmd++);
+                       if (tmp < 0)
+                               return;
+                       cf.data[i] |= tmp;
+               }
        }
 
        skb = dev_alloc_skb(sizeof(struct can_frame) +
@@ -209,7 +229,6 @@ static void slc_bump(struct slcan *sl)
 /* parse tty input stream */
 static void slcan_unesc(struct slcan *sl, unsigned char s)
 {
-
        if ((s == '\r') || (s == '\a')) { /* CR or BEL ends the pdu */
                if (!test_and_clear_bit(SLF_ERROR, &sl->flags) &&
                    (sl->rcount > 4))  {
@@ -236,27 +255,46 @@ static void slcan_unesc(struct slcan *sl, unsigned char s)
 /* Encapsulate one can_frame and stuff into a TTY queue. */
 static void slc_encaps(struct slcan *sl, struct can_frame *cf)
 {
-       int actual, idx, i;
-       char cmd;
+       int actual, i;
+       unsigned char *pos;
+       unsigned char *endpos;
+       canid_t id = cf->can_id;
+
+       pos = sl->xbuff;
 
        if (cf->can_id & CAN_RTR_FLAG)
-               cmd = 'R'; /* becomes 'r' in standard frame format */
+               *pos = 'R'; /* becomes 'r' in standard frame format (SFF) */
        else
-               cmd = 'T'; /* becomes 't' in standard frame format */
+               *pos = 'T'; /* becomes 't' in standard frame format (SSF) */
 
-       if (cf->can_id & CAN_EFF_FLAG)
-               sprintf(sl->xbuff, "%c%08X%d", cmd,
-                       cf->can_id & CAN_EFF_MASK, cf->can_dlc);
-       else
-               sprintf(sl->xbuff, "%c%03X%d", cmd | 0x20,
-                       cf->can_id & CAN_SFF_MASK, cf->can_dlc);
+       /* determine number of chars for the CAN-identifier */
+       if (cf->can_id & CAN_EFF_FLAG) {
+               id &= CAN_EFF_MASK;
+               endpos = pos + SLC_EFF_ID_LEN;
+       } else {
+               *pos |= 0x20; /* convert R/T to lower case for SFF */
+               id &= CAN_SFF_MASK;
+               endpos = pos + SLC_SFF_ID_LEN;
+       }
 
-       idx = strlen(sl->xbuff);
+       /* build 3 (SFF) or 8 (EFF) digit CAN identifier */
+       pos++;
+       while (endpos >= pos) {
+               *endpos-- = hex_asc_upper[id & 0xf];
+               id >>= 4;
+       }
+
+       pos += (cf->can_id & CAN_EFF_FLAG) ? SLC_EFF_ID_LEN : SLC_SFF_ID_LEN;
 
-       for (i = 0; i < cf->can_dlc; i++)
-               sprintf(&sl->xbuff[idx + 2*i], "%02X", cf->data[i]);
+       *pos++ = cf->can_dlc + '0';
+
+       /* RTR frames may have a dlc > 0 but they never have any data bytes */
+       if (!(cf->can_id & CAN_RTR_FLAG)) {
+               for (i = 0; i < cf->can_dlc; i++)
+                       pos = hex_byte_pack_upper(pos, cf->data[i]);
+       }
 
-       strcat(sl->xbuff, "\r"); /* add terminating character */
+       *pos++ = '\r';
 
        /* Order of next two lines is *very* important.
         * When we are sending a little amount of data,
@@ -267,8 +305,8 @@ static void slc_encaps(struct slcan *sl, struct can_frame *cf)
         *       14 Oct 1994  Dmitry Gorodchanin.
         */
        set_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags);
-       actual = sl->tty->ops->write(sl->tty, sl->xbuff, strlen(sl->xbuff));
-       sl->xleft = strlen(sl->xbuff) - actual;
+       actual = sl->tty->ops->write(sl->tty, sl->xbuff, pos - sl->xbuff);
+       sl->xleft = (pos - sl->xbuff) - actual;
        sl->xhead = sl->xbuff + actual;
        sl->dev->stats.tx_bytes += cf->can_dlc;
 }
@@ -286,11 +324,13 @@ static void slcan_write_wakeup(struct tty_struct *tty)
        if (!sl || sl->magic != SLCAN_MAGIC || !netif_running(sl->dev))
                return;
 
+       spin_lock(&sl->lock);
        if (sl->xleft <= 0)  {
                /* Now serial buffer is almost free & we can start
                 * transmission of another packet */
                sl->dev->stats.tx_packets++;
                clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+               spin_unlock(&sl->lock);
                netif_wake_queue(sl->dev);
                return;
        }
@@ -298,6 +338,7 @@ static void slcan_write_wakeup(struct tty_struct *tty)
        actual = tty->ops->write(tty, sl->xhead, sl->xleft);
        sl->xleft -= actual;
        sl->xhead += actual;
+       spin_unlock(&sl->lock);
 }
 
 /* Send a can_frame to a TTY queue. */
index a0f647f..0b7a4c3 100644 (file)
@@ -463,7 +463,7 @@ static int peak_usb_start(struct peak_usb_device *dev)
        if (i < PCAN_USB_MAX_TX_URBS) {
                if (i == 0) {
                        netdev_err(netdev, "couldn't setup any tx URB\n");
-                       return err;
+                       goto err_tx;
                }
 
                netdev_warn(netdev, "tx performance may be slow\n");
@@ -472,7 +472,7 @@ static int peak_usb_start(struct peak_usb_device *dev)
        if (dev->adapter->dev_start) {
                err = dev->adapter->dev_start(dev);
                if (err)
-                       goto failed;
+                       goto err_adapter;
        }
 
        dev->state |= PCAN_USB_STATE_STARTED;
@@ -481,19 +481,26 @@ static int peak_usb_start(struct peak_usb_device *dev)
        if (dev->adapter->dev_set_bus) {
                err = dev->adapter->dev_set_bus(dev, 1);
                if (err)
-                       goto failed;
+                       goto err_adapter;
        }
 
        dev->can.state = CAN_STATE_ERROR_ACTIVE;
 
        return 0;
 
-failed:
+err_adapter:
        if (err == -ENODEV)
                netif_device_detach(dev->netdev);
 
        netdev_warn(netdev, "couldn't submit control: %d\n", err);
 
+       for (i = 0; i < PCAN_USB_MAX_TX_URBS; i++) {
+               usb_free_urb(dev->tx_contexts[i].urb);
+               dev->tx_contexts[i].urb = NULL;
+       }
+err_tx:
+       usb_kill_anchored_urbs(&dev->rx_submitted);
+
        return err;
 }
 
index 61726af..e66beff 100644 (file)
@@ -2481,8 +2481,7 @@ load_error_cnic2:
 load_error_cnic1:
        bnx2x_napi_disable_cnic(bp);
        /* Update the number of queues without the cnic queues */
-       rc = bnx2x_set_real_num_queues(bp, 0);
-       if (rc)
+       if (bnx2x_set_real_num_queues(bp, 0))
                BNX2X_ERR("Unable to set real_num_queues not including cnic\n");
 load_error_cnic0:
        BNX2X_ERR("CNIC-related load failed\n");
index d60a2ea..5146822 100644 (file)
@@ -175,6 +175,7 @@ typedef int (*read_sfp_module_eeprom_func_p)(struct bnx2x_phy *phy,
 #define EDC_MODE_LINEAR                                0x0022
 #define EDC_MODE_LIMITING                              0x0044
 #define EDC_MODE_PASSIVE_DAC                   0x0055
+#define EDC_MODE_ACTIVE_DAC                    0x0066
 
 /* ETS defines*/
 #define DCBX_INVALID_COS                                       (0xFF)
@@ -3684,6 +3685,41 @@ static void bnx2x_warpcore_enable_AN_KR2(struct bnx2x_phy *phy,
        bnx2x_update_link_attr(params, vars->link_attr_sync);
 }
 
+static void bnx2x_disable_kr2(struct link_params *params,
+                             struct link_vars *vars,
+                             struct bnx2x_phy *phy)
+{
+       struct bnx2x *bp = params->bp;
+       int i;
+       static struct bnx2x_reg_set reg_set[] = {
+               /* Step 1 - Program the TX/RX alignment markers */
+               {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL5, 0x7690},
+               {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL7, 0xe647},
+               {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL6, 0xc4f0},
+               {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL9, 0x7690},
+               {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_RX_CTRL11, 0xe647},
+               {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_RX_CTRL10, 0xc4f0},
+               {MDIO_WC_DEVAD, MDIO_WC_REG_CL73_USERB0_CTRL, 0x000c},
+               {MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CTRL1, 0x6000},
+               {MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CTRL3, 0x0000},
+               {MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CODE_FIELD, 0x0002},
+               {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI1, 0x0000},
+               {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI2, 0x0af7},
+               {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI3, 0x0af7},
+               {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_LD_BAM_CODE, 0x0002},
+               {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_LD_UD_CODE, 0x0000}
+       };
+       DP(NETIF_MSG_LINK, "Disabling 20G-KR2\n");
+
+       for (i = 0; i < ARRAY_SIZE(reg_set); i++)
+               bnx2x_cl45_write(bp, phy, reg_set[i].devad, reg_set[i].reg,
+                                reg_set[i].val);
+       vars->link_attr_sync &= ~LINK_ATTR_SYNC_KR2_ENABLE;
+       bnx2x_update_link_attr(params, vars->link_attr_sync);
+
+       vars->check_kr2_recovery_cnt = CHECK_KR2_RECOVERY_CNT;
+}
+
 static void bnx2x_warpcore_set_lpi_passthrough(struct bnx2x_phy *phy,
                                               struct link_params *params)
 {
@@ -3715,7 +3751,6 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
                                        struct link_params *params,
                                        struct link_vars *vars) {
        u16 lane, i, cl72_ctrl, an_adv = 0;
-       u16 ucode_ver;
        struct bnx2x *bp = params->bp;
        static struct bnx2x_reg_set reg_set[] = {
                {MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2, 0x7},
@@ -3806,15 +3841,7 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
 
        /* Advertise pause */
        bnx2x_ext_phy_set_pause(params, phy, vars);
-       /* Set KR Autoneg Work-Around flag for Warpcore version older than D108
-        */
-       bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD,
-                       MDIO_WC_REG_UC_INFO_B1_VERSION, &ucode_ver);
-       if (ucode_ver < 0xd108) {
-               DP(NETIF_MSG_LINK, "Enable AN KR work-around. WC ver:0x%x\n",
-                              ucode_ver);
-               vars->rx_tx_asic_rst = MAX_KR_LINK_RETRY;
-       }
+       vars->rx_tx_asic_rst = MAX_KR_LINK_RETRY;
        bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD,
                                 MDIO_WC_REG_DIGITAL5_MISC7, 0x100);
 
@@ -3838,6 +3865,8 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
                bnx2x_set_aer_mmd(params, phy);
 
                bnx2x_warpcore_enable_AN_KR2(phy, params, vars);
+       } else {
+               bnx2x_disable_kr2(params, vars, phy);
        }
 
        /* Enable Autoneg: only on the main lane */
@@ -4347,20 +4376,14 @@ static void bnx2x_warpcore_config_runtime(struct bnx2x_phy *phy,
        struct bnx2x *bp = params->bp;
        u32 serdes_net_if;
        u16 gp_status1 = 0, lnkup = 0, lnkup_kr = 0;
-       u16 lane = bnx2x_get_warpcore_lane(phy, params);
 
        vars->turn_to_run_wc_rt = vars->turn_to_run_wc_rt ? 0 : 1;
 
        if (!vars->turn_to_run_wc_rt)
                return;
 
-       /* Return if there is no link partner */
-       if (!(bnx2x_warpcore_get_sigdet(phy, params))) {
-               DP(NETIF_MSG_LINK, "bnx2x_warpcore_get_sigdet false\n");
-               return;
-       }
-
        if (vars->rx_tx_asic_rst) {
+               u16 lane = bnx2x_get_warpcore_lane(phy, params);
                serdes_net_if = (REG_RD(bp, params->shmem_base +
                                offsetof(struct shmem_region, dev_info.
                                port_hw_config[params->port].default_cfg)) &
@@ -4375,14 +4398,8 @@ static void bnx2x_warpcore_config_runtime(struct bnx2x_phy *phy,
                                /*10G KR*/
                        lnkup_kr = (gp_status1 >> (12+lane)) & 0x1;
 
-                       DP(NETIF_MSG_LINK,
-                               "gp_status1 0x%x\n", gp_status1);
-
                        if (lnkup_kr || lnkup) {
-                                       vars->rx_tx_asic_rst = 0;
-                                       DP(NETIF_MSG_LINK,
-                                       "link up, rx_tx_asic_rst 0x%x\n",
-                                       vars->rx_tx_asic_rst);
+                               vars->rx_tx_asic_rst = 0;
                        } else {
                                /* Reset the lane to see if link comes up.*/
                                bnx2x_warpcore_reset_lane(bp, phy, 1);
@@ -4507,10 +4524,14 @@ static void bnx2x_warpcore_config_init(struct bnx2x_phy *phy,
                         * enabled transmitter to avoid current leakage in case
                         * no module is connected
                         */
-                       if (bnx2x_is_sfp_module_plugged(phy, params))
-                               bnx2x_sfp_module_detection(phy, params);
-                       else
-                               bnx2x_sfp_e3_set_transmitter(params, phy, 1);
+                       if ((params->loopback_mode == LOOPBACK_NONE) ||
+                           (params->loopback_mode == LOOPBACK_EXT)) {
+                               if (bnx2x_is_sfp_module_plugged(phy, params))
+                                       bnx2x_sfp_module_detection(phy, params);
+                               else
+                                       bnx2x_sfp_e3_set_transmitter(params,
+                                                                    phy, 1);
+                       }
 
                        bnx2x_warpcore_config_sfi(phy, params);
                        break;
@@ -5757,6 +5778,11 @@ static int bnx2x_warpcore_read_status(struct bnx2x_phy *phy,
        rc = bnx2x_get_link_speed_duplex(phy, params, vars, link_up, gp_speed,
                                         duplex);
 
+       /* In case of KR link down, start up the recovering procedure */
+       if ((!link_up) && (phy->media_type == ETH_PHY_KR) &&
+           (!(phy->flags & FLAGS_WC_DUAL_MODE)))
+               vars->rx_tx_asic_rst = MAX_KR_LINK_RETRY;
+
        DP(NETIF_MSG_LINK, "duplex %x  flow_ctrl 0x%x link_status 0x%x\n",
                   vars->duplex, vars->flow_ctrl, vars->link_status);
        return rc;
@@ -6507,6 +6533,11 @@ static int bnx2x_link_initialize(struct link_params *params,
                        params->phy[INT_PHY].config_init(phy, params, vars);
        }
 
+       /* Re-read this value in case it was changed inside config_init due to
+        * limitations of optic module
+        */
+       vars->line_speed = params->phy[INT_PHY].req_line_speed;
+
        /* Init external phy*/
        if (non_ext_phy) {
                if (params->phy[INT_PHY].supported &
@@ -8080,7 +8111,10 @@ static int bnx2x_get_edc_mode(struct bnx2x_phy *phy,
                if (copper_module_type &
                    SFP_EEPROM_FC_TX_TECH_BITMASK_COPPER_ACTIVE) {
                        DP(NETIF_MSG_LINK, "Active Copper cable detected\n");
-                       check_limiting_mode = 1;
+                       if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT)
+                               *edc_mode = EDC_MODE_ACTIVE_DAC;
+                       else
+                               check_limiting_mode = 1;
                } else if (copper_module_type &
                        SFP_EEPROM_FC_TX_TECH_BITMASK_COPPER_PASSIVE) {
                                DP(NETIF_MSG_LINK,
@@ -8555,6 +8589,7 @@ static void bnx2x_warpcore_set_limiting_mode(struct link_params *params,
                mode = MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE_DEFAULT;
                break;
        case EDC_MODE_PASSIVE_DAC:
+       case EDC_MODE_ACTIVE_DAC:
                mode = MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE_SFP_DAC;
                break;
        default:
@@ -9730,32 +9765,41 @@ static int bnx2x_848xx_cmn_config_init(struct bnx2x_phy *phy,
                         MDIO_AN_DEVAD, MDIO_AN_REG_8481_1000T_CTRL,
                         an_1000_val);
 
-       /* set 100 speed advertisement */
-       if ((phy->req_line_speed == SPEED_AUTO_NEG) &&
-            (phy->speed_cap_mask &
-             (PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_FULL |
-              PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_HALF))) {
-               an_10_100_val |= (1<<7);
-               /* Enable autoneg and restart autoneg for legacy speeds */
-               autoneg_val |= (1<<9 | 1<<12);
-
-               if (phy->req_duplex == DUPLEX_FULL)
+       /* Set 10/100 speed advertisement */
+       if (phy->req_line_speed == SPEED_AUTO_NEG) {
+               if (phy->speed_cap_mask &
+                   PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_FULL) {
+                       /* Enable autoneg and restart autoneg for legacy speeds
+                        */
+                       autoneg_val |= (1<<9 | 1<<12);
                        an_10_100_val |= (1<<8);
-               DP(NETIF_MSG_LINK, "Advertising 100M\n");
-       }
-       /* set 10 speed advertisement */
-       if (((phy->req_line_speed == SPEED_AUTO_NEG) &&
-            (phy->speed_cap_mask &
-             (PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_FULL |
-              PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_HALF)) &&
-            (phy->supported &
-             (SUPPORTED_10baseT_Half |
-              SUPPORTED_10baseT_Full)))) {
-               an_10_100_val |= (1<<5);
-               autoneg_val |= (1<<9 | 1<<12);
-               if (phy->req_duplex == DUPLEX_FULL)
+                       DP(NETIF_MSG_LINK, "Advertising 100M-FD\n");
+               }
+
+               if (phy->speed_cap_mask &
+                   PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_HALF) {
+                       /* Enable autoneg and restart autoneg for legacy speeds
+                        */
+                       autoneg_val |= (1<<9 | 1<<12);
+                       an_10_100_val |= (1<<7);
+                       DP(NETIF_MSG_LINK, "Advertising 100M-HD\n");
+               }
+
+               if ((phy->speed_cap_mask &
+                    PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_FULL) &&
+                   (phy->supported & SUPPORTED_10baseT_Full)) {
                        an_10_100_val |= (1<<6);
-               DP(NETIF_MSG_LINK, "Advertising 10M\n");
+                       autoneg_val |= (1<<9 | 1<<12);
+                       DP(NETIF_MSG_LINK, "Advertising 10M-FD\n");
+               }
+
+               if ((phy->speed_cap_mask &
+                    PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_HALF) &&
+                   (phy->supported & SUPPORTED_10baseT_Half)) {
+                       an_10_100_val |= (1<<5);
+                       autoneg_val |= (1<<9 | 1<<12);
+                       DP(NETIF_MSG_LINK, "Advertising 10M-HD\n");
+               }
        }
 
        /* Only 10/100 are allowed to work in FORCE mode */
@@ -13432,43 +13476,6 @@ static void bnx2x_sfp_tx_fault_detection(struct bnx2x_phy *phy,
                }
        }
 }
-static void bnx2x_disable_kr2(struct link_params *params,
-                             struct link_vars *vars,
-                             struct bnx2x_phy *phy)
-{
-       struct bnx2x *bp = params->bp;
-       int i;
-       static struct bnx2x_reg_set reg_set[] = {
-               /* Step 1 - Program the TX/RX alignment markers */
-               {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL5, 0x7690},
-               {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL7, 0xe647},
-               {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL6, 0xc4f0},
-               {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL9, 0x7690},
-               {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_RX_CTRL11, 0xe647},
-               {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_RX_CTRL10, 0xc4f0},
-               {MDIO_WC_DEVAD, MDIO_WC_REG_CL73_USERB0_CTRL, 0x000c},
-               {MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CTRL1, 0x6000},
-               {MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CTRL3, 0x0000},
-               {MDIO_WC_DEVAD, MDIO_WC_REG_CL73_BAM_CODE_FIELD, 0x0002},
-               {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI1, 0x0000},
-               {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI2, 0x0af7},
-               {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_OUI3, 0x0af7},
-               {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_LD_BAM_CODE, 0x0002},
-               {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_LD_UD_CODE, 0x0000}
-       };
-       DP(NETIF_MSG_LINK, "Disabling 20G-KR2\n");
-
-       for (i = 0; i < ARRAY_SIZE(reg_set); i++)
-               bnx2x_cl45_write(bp, phy, reg_set[i].devad, reg_set[i].reg,
-                                reg_set[i].val);
-       vars->link_attr_sync &= ~LINK_ATTR_SYNC_KR2_ENABLE;
-       bnx2x_update_link_attr(params, vars->link_attr_sync);
-
-       vars->check_kr2_recovery_cnt = CHECK_KR2_RECOVERY_CNT;
-       /* Restart AN on leading lane */
-       bnx2x_warpcore_restart_AN_KR(phy, params);
-}
-
 static void bnx2x_kr2_recovery(struct link_params *params,
                               struct link_vars *vars,
                               struct bnx2x_phy *phy)
@@ -13546,6 +13553,8 @@ static void bnx2x_check_kr2_wa(struct link_params *params,
                /* Disable KR2 on both lanes */
                DP(NETIF_MSG_LINK, "BP=0x%x, NP=0x%x\n", base_page, next_page);
                bnx2x_disable_kr2(params, vars, phy);
+               /* Restart AN on leading lane */
+               bnx2x_warpcore_restart_AN_KR(phy, params);
                return;
        }
 }
index a6704b5..82b658d 100644 (file)
@@ -4703,6 +4703,14 @@ bool bnx2x_chk_parity_attn(struct bnx2x *bp, bool *global, bool print)
        attn.sig[3] = REG_RD(bp,
                MISC_REG_AEU_AFTER_INVERT_4_FUNC_0 +
                             port*4);
+       /* Since MCP attentions can't be disabled inside the block, we need to
+        * read AEU registers to see whether they're currently disabled
+        */
+       attn.sig[3] &= ((REG_RD(bp,
+                               !port ? MISC_REG_AEU_ENABLE4_FUNC_0_OUT_0
+                                     : MISC_REG_AEU_ENABLE4_FUNC_1_OUT_0) &
+                        MISC_AEU_ENABLE_MCP_PRTY_BITS) |
+                       ~MISC_AEU_ENABLE_MCP_PRTY_BITS);
 
        if (!CHIP_IS_E1x(bp))
                attn.sig[4] = REG_RD(bp,
@@ -5447,26 +5455,24 @@ static void bnx2x_timer(unsigned long data)
        if (IS_PF(bp) &&
            !BP_NOMCP(bp)) {
                int mb_idx = BP_FW_MB_IDX(bp);
-               u32 drv_pulse;
-               u32 mcp_pulse;
+               u16 drv_pulse;
+               u16 mcp_pulse;
 
                ++bp->fw_drv_pulse_wr_seq;
                bp->fw_drv_pulse_wr_seq &= DRV_PULSE_SEQ_MASK;
-               /* TBD - add SYSTEM_TIME */
                drv_pulse = bp->fw_drv_pulse_wr_seq;
                bnx2x_drv_pulse(bp);
 
                mcp_pulse = (SHMEM_RD(bp, func_mb[mb_idx].mcp_pulse_mb) &
                             MCP_PULSE_SEQ_MASK);
                /* The delta between driver pulse and mcp response
-                * should be 1 (before mcp response) or 0 (after mcp response)
+                * should not get too big. If the MFW is more than 5 pulses
+                * behind, we should worry about it enough to generate an error
+                * log.
                 */
-               if ((drv_pulse != mcp_pulse) &&
-                   (drv_pulse != ((mcp_pulse + 1) & MCP_PULSE_SEQ_MASK))) {
-                       /* someone lost a heartbeat... */
-                       BNX2X_ERR("drv_pulse (0x%x) != mcp_pulse (0x%x)\n",
+               if (((drv_pulse - mcp_pulse) & MCP_PULSE_SEQ_MASK) > 5)
+                       BNX2X_ERR("MFW seems hanged: drv_pulse (0x%x) != mcp_pulse (0x%x)\n",
                                  drv_pulse, mcp_pulse);
-               }
        }
 
        if (bp->state == BNX2X_STATE_OPEN)
index 2604b62..9ad012b 100644 (file)
@@ -1819,7 +1819,7 @@ bnx2x_get_vf_igu_cam_info(struct bnx2x *bp)
                fid = GET_FIELD((val), IGU_REG_MAPPING_MEMORY_FID);
                if (fid & IGU_FID_ENCODE_IS_PF)
                        current_pf = fid & IGU_FID_PF_NUM_MASK;
-               else if (current_pf == BP_ABS_FUNC(bp))
+               else if (current_pf == BP_FUNC(bp))
                        bnx2x_vf_set_igu_info(bp, sb_id,
                                              (fid & IGU_FID_VF_NUM_MASK));
                DP(BNX2X_MSG_IOV, "%s[%d], igu_sb_id=%d, msix=%d\n",
@@ -3180,6 +3180,7 @@ int bnx2x_enable_sriov(struct bnx2x *bp)
                /* set local queue arrays */
                vf->vfqs = &bp->vfdb->vfqs[qcount];
                qcount += vf_sb_count(vf);
+               bnx2x_iov_static_resc(bp, vf);
        }
 
        /* prepare msix vectors in VF configuration space */
@@ -3187,6 +3188,8 @@ int bnx2x_enable_sriov(struct bnx2x *bp)
                bnx2x_pretend_func(bp, HW_VF_HANDLE(bp, vf_idx));
                REG_WR(bp, PCICFG_OFFSET + GRC_CONFIG_REG_VF_MSIX_CONTROL,
                       num_vf_queues);
+               DP(BNX2X_MSG_IOV, "set msix vec num in VF %d cfg space to %d\n",
+                  vf_idx, num_vf_queues);
        }
        bnx2x_pretend_func(bp, BP_ABS_FUNC(bp));
 
index 6cfb887..da16953 100644 (file)
@@ -1765,28 +1765,28 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
                switch (mbx->first_tlv.tl.type) {
                case CHANNEL_TLV_ACQUIRE:
                        bnx2x_vf_mbx_acquire(bp, vf, mbx);
-                       break;
+                       return;
                case CHANNEL_TLV_INIT:
                        bnx2x_vf_mbx_init_vf(bp, vf, mbx);
-                       break;
+                       return;
                case CHANNEL_TLV_SETUP_Q:
                        bnx2x_vf_mbx_setup_q(bp, vf, mbx);
-                       break;
+                       return;
                case CHANNEL_TLV_SET_Q_FILTERS:
                        bnx2x_vf_mbx_set_q_filters(bp, vf, mbx);
-                       break;
+                       return;
                case CHANNEL_TLV_TEARDOWN_Q:
                        bnx2x_vf_mbx_teardown_q(bp, vf, mbx);
-                       break;
+                       return;
                case CHANNEL_TLV_CLOSE:
                        bnx2x_vf_mbx_close_vf(bp, vf, mbx);
-                       break;
+                       return;
                case CHANNEL_TLV_RELEASE:
                        bnx2x_vf_mbx_release_vf(bp, vf, mbx);
-                       break;
+                       return;
                case CHANNEL_TLV_UPDATE_RSS:
                        bnx2x_vf_mbx_update_rss(bp, vf, mbx);
-                       break;
+                       return;
                }
 
        } else {
@@ -1802,26 +1802,24 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
                for (i = 0; i < 20; i++)
                        DP_CONT(BNX2X_MSG_IOV, "%x ",
                                mbx->msg->req.tlv_buf_size.tlv_buffer[i]);
+       }
 
-               /* test whether we can respond to the VF (do we have an address
-                * for it?)
-                */
-               if (vf->state == VF_ACQUIRED || vf->state == VF_ENABLED) {
-                       /* mbx_resp uses the op_rc of the VF */
-                       vf->op_rc = PFVF_STATUS_NOT_SUPPORTED;
+       /* can we respond to VF (do we have an address for it?) */
+       if (vf->state == VF_ACQUIRED || vf->state == VF_ENABLED) {
+               /* mbx_resp uses the op_rc of the VF */
+               vf->op_rc = PFVF_STATUS_NOT_SUPPORTED;
 
-                       /* notify the VF that we do not support this request */
-                       bnx2x_vf_mbx_resp(bp, vf);
-               } else {
-                       /* can't send a response since this VF is unknown to us
-                        * just ack the FW to release the mailbox and unlock
-                        * the channel.
-                        */
-                       storm_memset_vf_mbx_ack(bp, vf->abs_vfid);
-                       mmiowb();
-                       bnx2x_unlock_vf_pf_channel(bp, vf,
-                                                  mbx->first_tlv.tl.type);
-               }
+               /* notify the VF that we do not support this request */
+               bnx2x_vf_mbx_resp(bp, vf);
+       } else {
+               /* can't send a response since this VF is unknown to us
+                * just ack the FW to release the mailbox and unlock
+                * the channel.
+                */
+               storm_memset_vf_mbx_ack(bp, vf->abs_vfid);
+               /* Firmware ack should be written before unlocking channel */
+               mmiowb();
+               bnx2x_unlock_vf_pf_channel(bp, vf, mbx->first_tlv.tl.type);
        }
 }
 
index ace5050..db02023 100644 (file)
@@ -88,6 +88,7 @@ static inline char *nic_name(struct pci_dev *pdev)
 #define BE_MIN_MTU             256
 
 #define BE_NUM_VLANS_SUPPORTED 64
+#define BE_UMC_NUM_VLANS_SUPPORTED     15
 #define BE_MAX_EQD             96u
 #define        BE_MAX_TX_FRAG_COUNT    30
 
@@ -333,6 +334,7 @@ enum vf_state {
 
 #define BE_FLAGS_LINK_STATUS_INIT              1
 #define BE_FLAGS_WORKER_SCHEDULED              (1 << 3)
+#define BE_FLAGS_VLAN_PROMISC                  (1 << 4)
 #define BE_FLAGS_NAPI_ENABLED                  (1 << 9)
 #define BE_UC_PMAC_COUNT               30
 #define BE_VF_UC_PMAC_COUNT            2
index 1ab5dab..bd0e0c0 100644 (file)
@@ -180,6 +180,9 @@ static int be_mcc_compl_process(struct be_adapter *adapter,
                        dev_err(&adapter->pdev->dev,
                                "opcode %d-%d failed:status %d-%d\n",
                                opcode, subsystem, compl_status, extd_status);
+
+                       if (extd_status == MCC_ADDL_STS_INSUFFICIENT_RESOURCES)
+                               return extd_status;
                }
        }
 done:
@@ -1812,6 +1815,12 @@ int be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value)
        } else if (flags & IFF_ALLMULTI) {
                req->if_flags_mask = req->if_flags =
                                cpu_to_le32(BE_IF_FLAGS_MCAST_PROMISCUOUS);
+       } else if (flags & BE_FLAGS_VLAN_PROMISC) {
+               req->if_flags_mask = cpu_to_le32(BE_IF_FLAGS_VLAN_PROMISCUOUS);
+
+               if (value == ON)
+                       req->if_flags =
+                               cpu_to_le32(BE_IF_FLAGS_VLAN_PROMISCUOUS);
        } else {
                struct netdev_hw_addr *ha;
                int i = 0;
index d026226..108ca8a 100644 (file)
@@ -60,6 +60,8 @@ enum {
        MCC_STATUS_NOT_SUPPORTED = 66
 };
 
+#define MCC_ADDL_STS_INSUFFICIENT_RESOURCES    0x16
+
 #define CQE_STATUS_COMPL_MASK          0xFFFF
 #define CQE_STATUS_COMPL_SHIFT         0       /* bits 0 - 15 */
 #define CQE_STATUS_EXTD_MASK           0xFFFF
@@ -1791,7 +1793,7 @@ struct be_nic_res_desc {
        u8 acpi_params;
        u8 wol_param;
        u16 rsvd7;
-       u32 rsvd8[3];
+       u32 rsvd8[7];
 } __packed;
 
 struct be_cmd_req_get_func_config {
index 100b528..2c38cc4 100644 (file)
@@ -855,11 +855,11 @@ static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
        unsigned int eth_hdr_len;
        struct iphdr *ip;
 
-       /* Lancer ASIC has a bug wherein packets that are 32 bytes or less
+       /* Lancer, SH-R ASICs have a bug wherein Packets that are 32 bytes or less
         * may cause a transmit stall on that port. So the work-around is to
-        * pad such packets to a 36-byte length.
+        * pad short packets (<= 32 bytes) to a 36-byte length.
         */
-       if (unlikely(lancer_chip(adapter) && skb->len <= 32)) {
+       if (unlikely(!BEx_chip(adapter) && skb->len <= 32)) {
                if (skb_padto(skb, 36))
                        goto tx_drop;
                skb->len = 36;
@@ -1013,18 +1013,40 @@ static int be_vid_config(struct be_adapter *adapter)
        status = be_cmd_vlan_config(adapter, adapter->if_handle,
                                    vids, num, 1, 0);
 
-       /* Set to VLAN promisc mode as setting VLAN filter failed */
        if (status) {
-               dev_info(&adapter->pdev->dev, "Exhausted VLAN HW filters.\n");
-               dev_info(&adapter->pdev->dev, "Disabling HW VLAN filtering.\n");
-               goto set_vlan_promisc;
+               /* Set to VLAN promisc mode as setting VLAN filter failed */
+               if (status == MCC_ADDL_STS_INSUFFICIENT_RESOURCES)
+                       goto set_vlan_promisc;
+               dev_err(&adapter->pdev->dev,
+                       "Setting HW VLAN filtering failed.\n");
+       } else {
+               if (adapter->flags & BE_FLAGS_VLAN_PROMISC) {
+                       /* hw VLAN filtering re-enabled. */
+                       status = be_cmd_rx_filter(adapter,
+                                                 BE_FLAGS_VLAN_PROMISC, OFF);
+                       if (!status) {
+                               dev_info(&adapter->pdev->dev,
+                                        "Disabling VLAN Promiscuous mode.\n");
+                               adapter->flags &= ~BE_FLAGS_VLAN_PROMISC;
+                               dev_info(&adapter->pdev->dev,
+                                        "Re-Enabling HW VLAN filtering\n");
+                       }
+               }
        }
 
        return status;
 
 set_vlan_promisc:
-       status = be_cmd_vlan_config(adapter, adapter->if_handle,
-                                   NULL, 0, 1, 1);
+       dev_warn(&adapter->pdev->dev, "Exhausted VLAN HW filters.\n");
+
+       status = be_cmd_rx_filter(adapter, BE_FLAGS_VLAN_PROMISC, ON);
+       if (!status) {
+               dev_info(&adapter->pdev->dev, "Enable VLAN Promiscuous mode\n");
+               dev_info(&adapter->pdev->dev, "Disabling HW VLAN filtering\n");
+               adapter->flags |= BE_FLAGS_VLAN_PROMISC;
+       } else
+               dev_err(&adapter->pdev->dev,
+                       "Failed to enable VLAN Promiscuous mode.\n");
        return status;
 }
 
@@ -1033,10 +1055,6 @@ static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
        struct be_adapter *adapter = netdev_priv(netdev);
        int status = 0;
 
-       if (!lancer_chip(adapter) && !be_physfn(adapter)) {
-               status = -EINVAL;
-               goto ret;
-       }
 
        /* Packets with VID 0 are always received by Lancer by default */
        if (lancer_chip(adapter) && vid == 0)
@@ -1059,11 +1077,6 @@ static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
        struct be_adapter *adapter = netdev_priv(netdev);
        int status = 0;
 
-       if (!lancer_chip(adapter) && !be_physfn(adapter)) {
-               status = -EINVAL;
-               goto ret;
-       }
-
        /* Packets with VID 0 are always received by Lancer by default */
        if (lancer_chip(adapter) && vid == 0)
                goto ret;
@@ -1188,8 +1201,8 @@ static int be_get_vf_config(struct net_device *netdev, int vf,
 
        vi->vf = vf;
        vi->tx_rate = vf_cfg->tx_rate;
-       vi->vlan = vf_cfg->vlan_tag;
-       vi->qos = 0;
+       vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
+       vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
        memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
 
        return 0;
@@ -1199,28 +1212,29 @@ static int be_set_vf_vlan(struct net_device *netdev,
                        int vf, u16 vlan, u8 qos)
 {
        struct be_adapter *adapter = netdev_priv(netdev);
+       struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
        int status = 0;
 
        if (!sriov_enabled(adapter))
                return -EPERM;
 
-       if (vf >= adapter->num_vfs || vlan > 4095)
+       if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
                return -EINVAL;
 
-       if (vlan) {
-               if (adapter->vf_cfg[vf].vlan_tag != vlan) {
+       if (vlan || qos) {
+               vlan |= qos << VLAN_PRIO_SHIFT;
+               if (vf_cfg->vlan_tag != vlan) {
                        /* If this is new value, program it. Else skip. */
-                       adapter->vf_cfg[vf].vlan_tag = vlan;
-
-                       status = be_cmd_set_hsw_config(adapter, vlan,
-                               vf + 1, adapter->vf_cfg[vf].if_handle, 0);
+                       vf_cfg->vlan_tag = vlan;
+                       status = be_cmd_set_hsw_config(adapter, vlan, vf + 1,
+                                                      vf_cfg->if_handle, 0);
                }
        } else {
                /* Reset Transparent Vlan Tagging. */
-               adapter->vf_cfg[vf].vlan_tag = 0;
-               vlan = adapter->vf_cfg[vf].def_vid;
+               vf_cfg->vlan_tag = 0;
+               vlan = vf_cfg->def_vid;
                status = be_cmd_set_hsw_config(adapter, vlan, vf + 1,
-                       adapter->vf_cfg[vf].if_handle, 0);
+                                              vf_cfg->if_handle, 0);
        }
 
 
@@ -2963,6 +2977,8 @@ static void BEx_get_resources(struct be_adapter *adapter,
 
        if (adapter->function_mode & FLEX10_MODE)
                res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
+       else if (adapter->function_mode & UMC_ENABLED)
+               res->max_vlans = BE_UMC_NUM_VLANS_SUPPORTED;
        else
                res->max_vlans = BE_NUM_VLANS_SUPPORTED;
        res->max_mcast_mac = BE_MAX_MC;
index 098f133..e006a09 100644 (file)
@@ -452,7 +452,9 @@ static int gianfar_ptp_probe(struct platform_device *dev)
        err = -ENODEV;
 
        etsects->caps = ptp_gianfar_caps;
-       etsects->cksel = DEFAULT_CKSEL;
+
+       if (get_of_u32(node, "fsl,cksel", &etsects->cksel))
+               etsects->cksel = DEFAULT_CKSEL;
 
        if (get_of_u32(node, "fsl,tclk-period", &etsects->tclk_period) ||
            get_of_u32(node, "fsl,tmr-prsc", &etsects->tmr_prsc) ||
index 0c524fa..cfef7fc 100644 (file)
@@ -701,8 +701,7 @@ i40e_status i40e_asq_send_command(struct i40e_hw *hw,
 
        details = I40E_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use);
        if (cmd_details) {
-               memcpy(details, cmd_details,
-                      sizeof(struct i40e_asq_cmd_details));
+               *details = *cmd_details;
 
                /* If the cmd_details are defined copy the cookie.  The
                 * cpu_to_le32 is not needed here because the data is ignored
@@ -760,7 +759,7 @@ i40e_status i40e_asq_send_command(struct i40e_hw *hw,
        desc_on_ring = I40E_ADMINQ_DESC(hw->aq.asq, hw->aq.asq.next_to_use);
 
        /* if the desc is available copy the temp desc to the right place */
-       memcpy(desc_on_ring, desc, sizeof(struct i40e_aq_desc));
+       *desc_on_ring = *desc;
 
        /* if buff is not NULL assume indirect command */
        if (buff != NULL) {
@@ -807,7 +806,7 @@ i40e_status i40e_asq_send_command(struct i40e_hw *hw,
 
        /* if ready, copy the desc back to temp */
        if (i40e_asq_done(hw)) {
-               memcpy(desc, desc_on_ring, sizeof(struct i40e_aq_desc));
+               *desc = *desc_on_ring;
                if (buff != NULL)
                        memcpy(buff, dma_buff->va, buff_size);
                retval = le16_to_cpu(desc->retval);
index c21df7b..1e4ea13 100644 (file)
@@ -507,7 +507,7 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw,
 
        /* save link status information */
        if (link)
-               memcpy(link, hw_link_info, sizeof(struct i40e_link_status));
+               *link = *hw_link_info;
 
        /* flag cleared so helper functions don't call AQ again */
        hw->phy.get_link_info = false;
index 601d482..221aa47 100644 (file)
@@ -101,10 +101,10 @@ int i40e_allocate_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem,
        mem->size = ALIGN(size, alignment);
        mem->va = dma_zalloc_coherent(&pf->pdev->dev, mem->size,
                                      &mem->pa, GFP_KERNEL);
-       if (mem->va)
-               return 0;
+       if (!mem->va)
+               return -ENOMEM;
 
-       return -ENOMEM;
+       return 0;
 }
 
 /**
@@ -136,10 +136,10 @@ int i40e_allocate_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem,
        mem->size = size;
        mem->va = kzalloc(size, GFP_KERNEL);
 
-       if (mem->va)
-               return 0;
+       if (!mem->va)
+               return -ENOMEM;
 
-       return -ENOMEM;
+       return 0;
 }
 
 /**
@@ -174,8 +174,7 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
                         u16 needed, u16 id)
 {
        int ret = -ENOMEM;
-       int i = 0;
-       int j = 0;
+       int i, j;
 
        if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) {
                dev_info(&pf->pdev->dev,
@@ -186,7 +185,7 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
 
        /* start the linear search with an imperfect hint */
        i = pile->search_hint;
-       while (i < pile->num_entries && ret < 0) {
+       while (i < pile->num_entries) {
                /* skip already allocated entries */
                if (pile->list[i] & I40E_PILE_VALID_BIT) {
                        i++;
@@ -205,6 +204,7 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
                                pile->list[i+j] = id | I40E_PILE_VALID_BIT;
                        ret = i;
                        pile->search_hint = i + j;
+                       break;
                } else {
                        /* not enough, so skip over it and continue looking */
                        i += j;
@@ -1388,7 +1388,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
        bool add_happened = false;
        int filter_list_len = 0;
        u32 changed_flags = 0;
-       i40e_status ret = 0;
+       i40e_status aq_ret = 0;
        struct i40e_pf *pf;
        int num_add = 0;
        int num_del = 0;
@@ -1449,28 +1449,28 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 
                        /* flush a full buffer */
                        if (num_del == filter_list_len) {
-                               ret = i40e_aq_remove_macvlan(&pf->hw,
+                               aq_ret = i40e_aq_remove_macvlan(&pf->hw,
                                            vsi->seid, del_list, num_del,
                                            NULL);
                                num_del = 0;
                                memset(del_list, 0, sizeof(*del_list));
 
-                               if (ret)
+                               if (aq_ret)
                                        dev_info(&pf->pdev->dev,
                                                 "ignoring delete macvlan error, err %d, aq_err %d while flushing a full buffer\n",
-                                                ret,
+                                                aq_ret,
                                                 pf->hw.aq.asq_last_status);
                        }
                }
                if (num_del) {
-                       ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid,
+                       aq_ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid,
                                                     del_list, num_del, NULL);
                        num_del = 0;
 
-                       if (ret)
+                       if (aq_ret)
                                dev_info(&pf->pdev->dev,
                                         "ignoring delete macvlan error, err %d, aq_err %d\n",
-                                        ret, pf->hw.aq.asq_last_status);
+                                        aq_ret, pf->hw.aq.asq_last_status);
                }
 
                kfree(del_list);
@@ -1515,32 +1515,30 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 
                        /* flush a full buffer */
                        if (num_add == filter_list_len) {
-                               ret = i40e_aq_add_macvlan(&pf->hw,
-                                                         vsi->seid,
-                                                         add_list,
-                                                         num_add,
-                                                         NULL);
+                               aq_ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid,
+                                                            add_list, num_add,
+                                                            NULL);
                                num_add = 0;
 
-                               if (ret)
+                               if (aq_ret)
                                        break;
                                memset(add_list, 0, sizeof(*add_list));
                        }
                }
                if (num_add) {
-                       ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid,
-                                                 add_list, num_add, NULL);
+                       aq_ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid,
+                                                    add_list, num_add, NULL);
                        num_add = 0;
                }
                kfree(add_list);
                add_list = NULL;
 
-               if (add_happened && (!ret)) {
+               if (add_happened && (!aq_ret)) {
                        /* do nothing */;
-               } else if (add_happened && (ret)) {
+               } else if (add_happened && (aq_ret)) {
                        dev_info(&pf->pdev->dev,
                                 "add filter failed, err %d, aq_err %d\n",
-                                ret, pf->hw.aq.asq_last_status);
+                                aq_ret, pf->hw.aq.asq_last_status);
                        if ((pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOSPC) &&
                            !test_bit(__I40E_FILTER_OVERFLOW_PROMISC,
                                      &vsi->state)) {
@@ -1556,28 +1554,27 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
        if (changed_flags & IFF_ALLMULTI) {
                bool cur_multipromisc;
                cur_multipromisc = !!(vsi->current_netdev_flags & IFF_ALLMULTI);
-               ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw,
-                                                           vsi->seid,
-                                                           cur_multipromisc,
-                                                           NULL);
-               if (ret)
+               aq_ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw,
+                                                              vsi->seid,
+                                                              cur_multipromisc,
+                                                              NULL);
+               if (aq_ret)
                        dev_info(&pf->pdev->dev,
                                 "set multi promisc failed, err %d, aq_err %d\n",
-                                ret, pf->hw.aq.asq_last_status);
+                                aq_ret, pf->hw.aq.asq_last_status);
        }
        if ((changed_flags & IFF_PROMISC) || promisc_forced_on) {
                bool cur_promisc;
                cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) ||
                               test_bit(__I40E_FILTER_OVERFLOW_PROMISC,
                                        &vsi->state));
-               ret = i40e_aq_set_vsi_unicast_promiscuous(&vsi->back->hw,
-                                                         vsi->seid,
-                                                         cur_promisc,
-                                                         NULL);
-               if (ret)
+               aq_ret = i40e_aq_set_vsi_unicast_promiscuous(&vsi->back->hw,
+                                                            vsi->seid,
+                                                            cur_promisc, NULL);
+               if (aq_ret)
                        dev_info(&pf->pdev->dev,
                                 "set uni promisc failed, err %d, aq_err %d\n",
-                                ret, pf->hw.aq.asq_last_status);
+                                aq_ret, pf->hw.aq.asq_last_status);
        }
 
        clear_bit(__I40E_CONFIG_BUSY, &vsi->state);
@@ -1790,6 +1787,8 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
  * i40e_vsi_kill_vlan - Remove vsi membership for given vlan
  * @vsi: the vsi being configured
  * @vid: vlan id to be removed (0 = untagged only , -1 = any)
+ *
+ * Return: 0 on success or negative otherwise
  **/
 int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid)
 {
@@ -1863,37 +1862,39 @@ int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid)
  * i40e_vlan_rx_add_vid - Add a vlan id filter to HW offload
  * @netdev: network interface to be adjusted
  * @vid: vlan id to be added
+ *
+ * net_device_ops implementation for adding vlan ids
  **/
 static int i40e_vlan_rx_add_vid(struct net_device *netdev,
                                __always_unused __be16 proto, u16 vid)
 {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
-       int ret;
+       int ret = 0;
 
        if (vid > 4095)
-               return 0;
+               return -EINVAL;
+
+       netdev_info(netdev, "adding %pM vid=%d\n", netdev->dev_addr, vid);
 
-       netdev_info(vsi->netdev, "adding %pM vid=%d\n",
-                   netdev->dev_addr, vid);
        /* If the network stack called us with vid = 0, we should
         * indicate to i40e_vsi_add_vlan() that we want to receive
         * any traffic (i.e. with any vlan tag, or untagged)
         */
        ret = i40e_vsi_add_vlan(vsi, vid ? vid : I40E_VLAN_ANY);
 
-       if (!ret) {
-               if (vid < VLAN_N_VID)
-                       set_bit(vid, vsi->active_vlans);
-       }
+       if (!ret && (vid < VLAN_N_VID))
+               set_bit(vid, vsi->active_vlans);
 
-       return 0;
+       return ret;
 }
 
 /**
  * i40e_vlan_rx_kill_vid - Remove a vlan id filter from HW offload
  * @netdev: network interface to be adjusted
  * @vid: vlan id to be removed
+ *
+ * net_device_ops implementation for adding vlan ids
  **/
 static int i40e_vlan_rx_kill_vid(struct net_device *netdev,
                                 __always_unused __be16 proto, u16 vid)
@@ -1901,15 +1902,16 @@ static int i40e_vlan_rx_kill_vid(struct net_device *netdev,
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
 
-       netdev_info(vsi->netdev, "removing %pM vid=%d\n",
-                   netdev->dev_addr, vid);
+       netdev_info(netdev, "removing %pM vid=%d\n", netdev->dev_addr, vid);
+
        /* return code is ignored as there is nothing a user
         * can do about failure to remove and a log message was
-        * already printed from another function
+        * already printed from the other function
         */
        i40e_vsi_kill_vlan(vsi, vid);
 
        clear_bit(vid, vsi->active_vlans);
+
        return 0;
 }
 
@@ -1936,10 +1938,10 @@ static void i40e_restore_vlan(struct i40e_vsi *vsi)
  * @vsi: the vsi being adjusted
  * @vid: the vlan id to set as a PVID
  **/
-i40e_status i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid)
+int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid)
 {
        struct i40e_vsi_context ctxt;
-       i40e_status ret;
+       i40e_status aq_ret;
 
        vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
        vsi->info.pvid = cpu_to_le16(vid);
@@ -1948,14 +1950,15 @@ i40e_status i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid)
 
        ctxt.seid = vsi->seid;
        memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
-       ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
-       if (ret) {
+       aq_ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+       if (aq_ret) {
                dev_info(&vsi->back->pdev->dev,
                         "%s: update vsi failed, aq_err=%d\n",
                         __func__, vsi->back->hw.aq.asq_last_status);
+               return -ENOENT;
        }
 
-       return ret;
+       return 0;
 }
 
 /**
@@ -3326,7 +3329,8 @@ static void i40e_pf_unquiesce_all_vsi(struct i40e_pf *pf)
  **/
 static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg)
 {
-       int num_tc = 0, i;
+       u8 num_tc = 0;
+       int i;
 
        /* Scan the ETS Config Priority Table to find
         * traffic class enabled for a given priority
@@ -3341,9 +3345,7 @@ static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg)
        /* Traffic class index starts from zero so
         * increment to return the actual count
         */
-       num_tc++;
-
-       return num_tc;
+       return num_tc + 1;
 }
 
 /**
@@ -3451,28 +3453,27 @@ static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi)
        struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0};
        struct i40e_pf *pf = vsi->back;
        struct i40e_hw *hw = &pf->hw;
+       i40e_status aq_ret;
        u32 tc_bw_max;
-       int ret;
        int i;
 
        /* Get the VSI level BW configuration */
-       ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid, &bw_config, NULL);
-       if (ret) {
+       aq_ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid, &bw_config, NULL);
+       if (aq_ret) {
                dev_info(&pf->pdev->dev,
                         "couldn't get pf vsi bw config, err %d, aq_err %d\n",
-                        ret, pf->hw.aq.asq_last_status);
-               return ret;
+                        aq_ret, pf->hw.aq.asq_last_status);
+               return -EINVAL;
        }
 
        /* Get the VSI level BW configuration per TC */
-       ret = i40e_aq_query_vsi_ets_sla_config(hw, vsi->seid,
-                                              &bw_ets_config,
-                                              NULL);
-       if (ret) {
+       aq_ret = i40e_aq_query_vsi_ets_sla_config(hw, vsi->seid, &bw_ets_config,
+                                                 NULL);
+       if (aq_ret) {
                dev_info(&pf->pdev->dev,
                         "couldn't get pf vsi ets bw config, err %d, aq_err %d\n",
-                        ret, pf->hw.aq.asq_last_status);
-               return ret;
+                        aq_ret, pf->hw.aq.asq_last_status);
+               return -EINVAL;
        }
 
        if (bw_config.tc_valid_bits != bw_ets_config.tc_valid_bits) {
@@ -3494,7 +3495,8 @@ static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi)
                /* 3 bits out of 4 for each TC */
                vsi->bw_ets_max_quanta[i] = (u8)((tc_bw_max >> (i*4)) & 0x7);
        }
-       return ret;
+
+       return 0;
 }
 
 /**
@@ -3505,30 +3507,30 @@ static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi)
  *
  * Returns 0 on success, negative value on failure
  **/
-static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi,
-                                      u8 enabled_tc,
+static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc,
                                       u8 *bw_share)
 {
        struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
-       int i, ret = 0;
+       i40e_status aq_ret;
+       int i;
 
        bw_data.tc_valid_bits = enabled_tc;
        for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
                bw_data.tc_bw_credits[i] = bw_share[i];
 
-       ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, vsi->seid,
-                                      &bw_data, NULL);
-       if (ret) {
+       aq_ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, vsi->seid, &bw_data,
+                                         NULL);
+       if (aq_ret) {
                dev_info(&vsi->back->pdev->dev,
                         "%s: AQ command Config VSI BW allocation per TC failed = %d\n",
                         __func__, vsi->back->hw.aq.asq_last_status);
-               return ret;
+               return -EINVAL;
        }
 
        for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
                vsi->info.qs_handle[i] = bw_data.qs_handles[i];
 
-       return ret;
+       return 0;
 }
 
 /**
index 48cbc83..86d5142 100644 (file)
@@ -1607,6 +1607,9 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter)
                        igb_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0);
                        igb_write_phy_reg(hw, PHY_CONTROL, 0x4140);
                }
+       } else if (hw->phy.type == e1000_phy_82580) {
+               /* enable MII loopback */
+               igb_write_phy_reg(hw, I82580_PHY_LBK_CTRL, 0x8041);
        }
 
        /* add small delay to avoid loopback test failure */
index 1a9c4f6..ecc7f7b 100644 (file)
@@ -3086,13 +3086,16 @@ static struct sk_buff *skge_rx_get(struct net_device *dev,
                                               PCI_DMA_FROMDEVICE);
                skge_rx_reuse(e, skge->rx_buf_size);
        } else {
+               struct skge_element ee;
                struct sk_buff *nskb;
 
                nskb = netdev_alloc_skb_ip_align(dev, skge->rx_buf_size);
                if (!nskb)
                        goto resubmit;
 
-               skb = e->skb;
+               ee = *e;
+
+               skb = ee.skb;
                prefetch(skb->data);
 
                if (skge_rx_setup(skge, e, nskb, skge->rx_buf_size) < 0) {
@@ -3101,8 +3104,8 @@ static struct sk_buff *skge_rx_get(struct net_device *dev,
                }
 
                pci_unmap_single(skge->hw->pdev,
-                                dma_unmap_addr(e, mapaddr),
-                                dma_unmap_len(e, maplen),
+                                dma_unmap_addr(&ee, mapaddr),
+                                dma_unmap_len(&ee, maplen),
                                 PCI_DMA_FROMDEVICE);
        }
 
index 5472cbd..6ca3073 100644 (file)
@@ -180,28 +180,32 @@ static int verify_block_sig(struct mlx5_cmd_prot_block *block)
        return 0;
 }
 
-static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token)
+static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token,
+                          int csum)
 {
        block->token = token;
-       block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 2);
-       block->sig = ~xor8_buf(block, sizeof(*block) - 1);
+       if (csum) {
+               block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) -
+                                           sizeof(block->data) - 2);
+               block->sig = ~xor8_buf(block, sizeof(*block) - 1);
+       }
 }
 
-static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token)
+static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum)
 {
        struct mlx5_cmd_mailbox *next = msg->next;
 
        while (next) {
-               calc_block_sig(next->buf, token);
+               calc_block_sig(next->buf, token, csum);
                next = next->next;
        }
 }
 
-static void set_signature(struct mlx5_cmd_work_ent *ent)
+static void set_signature(struct mlx5_cmd_work_ent *ent, int csum)
 {
        ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay));
-       calc_chain_sig(ent->in, ent->token);
-       calc_chain_sig(ent->out, ent->token);
+       calc_chain_sig(ent->in, ent->token, csum);
+       calc_chain_sig(ent->out, ent->token, csum);
 }
 
 static void poll_timeout(struct mlx5_cmd_work_ent *ent)
@@ -539,8 +543,7 @@ static void cmd_work_handler(struct work_struct *work)
        lay->type = MLX5_PCI_CMD_XPORT;
        lay->token = ent->token;
        lay->status_own = CMD_OWNER_HW;
-       if (!cmd->checksum_disabled)
-               set_signature(ent);
+       set_signature(ent, !cmd->checksum_disabled);
        dump_command(dev, ent, 1);
        ktime_get_ts(&ent->ts1);
 
@@ -773,8 +776,6 @@ static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size)
 
                copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE);
                block = next->buf;
-               if (xor8_buf(block, sizeof(*block)) != 0xff)
-                       return -EINVAL;
 
                memcpy(to, block->data, copy);
                to += copy;
@@ -1361,6 +1362,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
                goto err_map;
        }
 
+       cmd->checksum_disabled = 1;
        cmd->max_reg_cmds = (1 << cmd->log_sz) - 1;
        cmd->bitmask = (1 << cmd->max_reg_cmds) - 1;
 
@@ -1510,7 +1512,7 @@ int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr)
        case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:           return -EIO;
        case MLX5_CMD_STAT_BAD_RES_ERR:                 return -EINVAL;
        case MLX5_CMD_STAT_RES_BUSY:                    return -EBUSY;
-       case MLX5_CMD_STAT_LIM_ERR:                     return -EINVAL;
+       case MLX5_CMD_STAT_LIM_ERR:                     return -ENOMEM;
        case MLX5_CMD_STAT_BAD_RES_STATE_ERR:           return -EINVAL;
        case MLX5_CMD_STAT_IX_ERR:                      return -EINVAL;
        case MLX5_CMD_STAT_NO_RES_ERR:                  return -EAGAIN;
index 443cc4d..2231d93 100644 (file)
@@ -366,9 +366,11 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
                goto err_in;
        }
 
+       snprintf(eq->name, MLX5_MAX_EQ_NAME, "%s@pci:%s",
+                name, pci_name(dev->pdev));
        eq->eqn = out.eq_number;
        err = request_irq(table->msix_arr[vecidx].vector, mlx5_msix_handler, 0,
-                         name, eq);
+                         eq->name, eq);
        if (err)
                goto err_eq;
 
index b47739b..bc0f5fb 100644 (file)
@@ -165,9 +165,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
        struct mlx5_cmd_set_hca_cap_mbox_in *set_ctx = NULL;
        struct mlx5_cmd_query_hca_cap_mbox_in query_ctx;
        struct mlx5_cmd_set_hca_cap_mbox_out set_out;
-       struct mlx5_profile *prof = dev->profile;
        u64 flags;
-       int csum = 1;
        int err;
 
        memset(&query_ctx, 0, sizeof(query_ctx));
@@ -197,20 +195,14 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
        memcpy(&set_ctx->hca_cap, &query_out->hca_cap,
               sizeof(set_ctx->hca_cap));
 
-       if (prof->mask & MLX5_PROF_MASK_CMDIF_CSUM) {
-               csum = !!prof->cmdif_csum;
-               flags = be64_to_cpu(set_ctx->hca_cap.flags);
-               if (csum)
-                       flags |= MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
-               else
-                       flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
-
-               set_ctx->hca_cap.flags = cpu_to_be64(flags);
-       }
-
        if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE)
                set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp;
 
+       flags = be64_to_cpu(query_out->hca_cap.flags);
+       /* disable checksum */
+       flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
+
+       set_ctx->hca_cap.flags = cpu_to_be64(flags);
        memset(&set_out, 0, sizeof(set_out));
        set_ctx->hca_cap.log_uar_page_sz = cpu_to_be16(PAGE_SHIFT - 12);
        set_ctx->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_SET_HCA_CAP);
@@ -225,9 +217,6 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
        if (err)
                goto query_ex;
 
-       if (!csum)
-               dev->cmd.checksum_disabled = 1;
-
 query_ex:
        kfree(query_out);
        kfree(set_ctx);
index 3a2408d..7b12acf 100644 (file)
@@ -90,6 +90,10 @@ struct mlx5_manage_pages_outbox {
        __be64                  pas[0];
 };
 
+enum {
+       MAX_RECLAIM_TIME_MSECS  = 5000,
+};
+
 static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
 {
        struct rb_root *root = &dev->priv.page_root;
@@ -279,6 +283,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
        int err;
        int i;
 
+       if (nclaimed)
+               *nclaimed = 0;
+
        memset(&in, 0, sizeof(in));
        outlen = sizeof(*out) + npages * sizeof(out->pas[0]);
        out = mlx5_vzalloc(outlen);
@@ -388,20 +395,25 @@ static int optimal_reclaimed_pages(void)
 
 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 {
-       unsigned long end = jiffies + msecs_to_jiffies(5000);
+       unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
        struct fw_page *fwp;
        struct rb_node *p;
+       int nclaimed = 0;
        int err;
 
        do {
                p = rb_first(&dev->priv.page_root);
                if (p) {
                        fwp = rb_entry(p, struct fw_page, rb_node);
-                       err = reclaim_pages(dev, fwp->func_id, optimal_reclaimed_pages(), NULL);
+                       err = reclaim_pages(dev, fwp->func_id,
+                                           optimal_reclaimed_pages(),
+                                           &nclaimed);
                        if (err) {
                                mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", err);
                                return err;
                        }
+                       if (nclaimed)
+                               end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
                }
                if (time_after(jiffies, end)) {
                        mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
index 83c2091..bd1a2d2 100644 (file)
@@ -543,7 +543,7 @@ static const struct of_device_id moxart_mac_match[] = {
        { }
 };
 
-struct __initdata platform_driver moxart_mac_driver = {
+static struct platform_driver moxart_mac_driver = {
        .probe  = moxart_mac_probe,
        .remove = moxart_remove,
        .driver = {
index 4d7ad00..ebe4c86 100644 (file)
@@ -1794,3 +1794,11 @@ const struct ethtool_ops qlcnic_sriov_vf_ethtool_ops = {
        .set_msglevel           = qlcnic_set_msglevel,
        .get_msglevel           = qlcnic_get_msglevel,
 };
+
+const struct ethtool_ops qlcnic_ethtool_failed_ops = {
+       .get_settings           = qlcnic_get_settings,
+       .get_drvinfo            = qlcnic_get_drvinfo,
+       .set_msglevel           = qlcnic_set_msglevel,
+       .get_msglevel           = qlcnic_get_msglevel,
+       .set_dump               = qlcnic_set_dump,
+};
index c4c5023..21d00a0 100644 (file)
@@ -431,6 +431,9 @@ static void qlcnic_82xx_cancel_idc_work(struct qlcnic_adapter *adapter)
        while (test_and_set_bit(__QLCNIC_RESETTING, &adapter->state))
                usleep_range(10000, 11000);
 
+       if (!adapter->fw_work.work.func)
+               return;
+
        cancel_delayed_work_sync(&adapter->fw_work);
 }
 
@@ -2275,8 +2278,9 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                adapter->portnum = adapter->ahw->pci_func;
                err = qlcnic_start_firmware(adapter);
                if (err) {
-                       dev_err(&pdev->dev, "Loading fw failed.Please Reboot\n");
-                       goto err_out_free_hw;
+                       dev_err(&pdev->dev, "Loading fw failed.Please Reboot\n"
+                               "\t\tIf reboot doesn't help, try flashing the card\n");
+                       goto err_out_maintenance_mode;
                }
 
                qlcnic_get_multiq_capability(adapter);
@@ -2408,6 +2412,22 @@ err_out_disable_pdev:
        pci_set_drvdata(pdev, NULL);
        pci_disable_device(pdev);
        return err;
+
+err_out_maintenance_mode:
+       netdev->netdev_ops = &qlcnic_netdev_failed_ops;
+       SET_ETHTOOL_OPS(netdev, &qlcnic_ethtool_failed_ops);
+       err = register_netdev(netdev);
+
+       if (err) {
+               dev_err(&pdev->dev, "Failed to register net device\n");
+               qlcnic_clr_all_drv_state(adapter, 0);
+               goto err_out_free_hw;
+       }
+
+       pci_set_drvdata(pdev, adapter);
+       qlcnic_add_sysfs(adapter);
+
+       return 0;
 }
 
 static void qlcnic_remove(struct pci_dev *pdev)
@@ -2518,8 +2538,16 @@ static int qlcnic_resume(struct pci_dev *pdev)
 static int qlcnic_open(struct net_device *netdev)
 {
        struct qlcnic_adapter *adapter = netdev_priv(netdev);
+       u32 state;
        int err;
 
+       state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE);
+       if (state == QLCNIC_DEV_FAILED || state == QLCNIC_DEV_BADBAD) {
+               netdev_err(netdev, "%s: Device is in FAILED state\n", __func__);
+
+               return -EIO;
+       }
+
        netif_carrier_off(netdev);
 
        err = qlcnic_attach(adapter);
@@ -3228,6 +3256,13 @@ void qlcnic_82xx_dev_request_reset(struct qlcnic_adapter *adapter, u32 key)
                return;
 
        state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE);
+       if (state == QLCNIC_DEV_FAILED || state == QLCNIC_DEV_BADBAD) {
+               netdev_err(adapter->netdev, "%s: Device is in FAILED state\n",
+                          __func__);
+               qlcnic_api_unlock(adapter);
+
+               return;
+       }
 
        if (state == QLCNIC_DEV_READY) {
                QLC_SHARED_REG_WR32(adapter, QLCNIC_CRB_DEV_STATE,
index 330d9a8..686f460 100644 (file)
@@ -397,6 +397,7 @@ static int qlcnic_pci_sriov_disable(struct qlcnic_adapter *adapter)
 {
        struct net_device *netdev = adapter->netdev;
 
+       rtnl_lock();
        if (netif_running(netdev))
                __qlcnic_down(adapter, netdev);
 
@@ -407,12 +408,15 @@ static int qlcnic_pci_sriov_disable(struct qlcnic_adapter *adapter)
        /* After disabling SRIOV re-init the driver in default mode
           configure opmode based on op_mode of function
         */
-       if (qlcnic_83xx_configure_opmode(adapter))
+       if (qlcnic_83xx_configure_opmode(adapter)) {
+               rtnl_unlock();
                return -EIO;
+       }
 
        if (netif_running(netdev))
                __qlcnic_up(adapter, netdev);
 
+       rtnl_unlock();
        return 0;
 }
 
@@ -533,6 +537,7 @@ static int qlcnic_pci_sriov_enable(struct qlcnic_adapter *adapter, int num_vfs)
                return -EIO;
        }
 
+       rtnl_lock();
        if (netif_running(netdev))
                __qlcnic_down(adapter, netdev);
 
@@ -555,6 +560,7 @@ static int qlcnic_pci_sriov_enable(struct qlcnic_adapter *adapter, int num_vfs)
                __qlcnic_up(adapter, netdev);
 
 error:
+       rtnl_unlock();
        return err;
 }
 
index c6165d0..019f437 100644 (file)
@@ -1272,6 +1272,7 @@ void qlcnic_remove_sysfs_entries(struct qlcnic_adapter *adapter)
 void qlcnic_create_diag_entries(struct qlcnic_adapter *adapter)
 {
        struct device *dev = &adapter->pdev->dev;
+       u32 state;
 
        if (device_create_bin_file(dev, &bin_attr_port_stats))
                dev_info(dev, "failed to create port stats sysfs entry");
@@ -1285,8 +1286,13 @@ void qlcnic_create_diag_entries(struct qlcnic_adapter *adapter)
        if (device_create_bin_file(dev, &bin_attr_mem))
                dev_info(dev, "failed to create mem sysfs entry\n");
 
+       state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE);
+       if (state == QLCNIC_DEV_FAILED || state == QLCNIC_DEV_BADBAD)
+               return;
+
        if (device_create_bin_file(dev, &bin_attr_pci_config))
                dev_info(dev, "failed to create pci config sysfs entry");
+
        if (device_create_file(dev, &dev_attr_beacon))
                dev_info(dev, "failed to create beacon sysfs entry");
 
@@ -1307,6 +1313,7 @@ void qlcnic_create_diag_entries(struct qlcnic_adapter *adapter)
 void qlcnic_remove_diag_entries(struct qlcnic_adapter *adapter)
 {
        struct device *dev = &adapter->pdev->dev;
+       u32 state;
 
        device_remove_bin_file(dev, &bin_attr_port_stats);
 
@@ -1315,6 +1322,11 @@ void qlcnic_remove_diag_entries(struct qlcnic_adapter *adapter)
        device_remove_file(dev, &dev_attr_diag_mode);
        device_remove_bin_file(dev, &bin_attr_crb);
        device_remove_bin_file(dev, &bin_attr_mem);
+
+       state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE);
+       if (state == QLCNIC_DEV_FAILED || state == QLCNIC_DEV_BADBAD)
+               return;
+
        device_remove_bin_file(dev, &bin_attr_pci_config);
        device_remove_file(dev, &dev_attr_beacon);
        if (!(adapter->flags & QLCNIC_ESWITCH_ENABLED))
index 10093f0..6bc5db7 100644 (file)
@@ -740,8 +740,8 @@ int ql_core_dump(struct ql_adapter *qdev, struct ql_mpi_coredump *mpi_coredump)
        int i;
 
        if (!mpi_coredump) {
-               netif_err(qdev, drv, qdev->ndev, "No memory available\n");
-               return -ENOMEM;
+               netif_err(qdev, drv, qdev->ndev, "No memory allocated\n");
+               return -EINVAL;
        }
 
        /* Try to get the spinlock, but dont worry if
index ff2bf8a..7ad1460 100644 (file)
@@ -1274,7 +1274,7 @@ void ql_mpi_reset_work(struct work_struct *work)
                return;
        }
 
-       if (!ql_core_dump(qdev, qdev->mpi_coredump)) {
+       if (qdev->mpi_coredump && !ql_core_dump(qdev, qdev->mpi_coredump)) {
                netif_err(qdev, drv, qdev->ndev, "Core is dumped!\n");
                qdev->core_is_dumped = 1;
                queue_delayed_work(qdev->workqueue,
index 128d7cd..c082562 100644 (file)
 
 /* A reboot/assertion causes the MCDI status word to be set after the
  * command word is set or a REBOOT event is sent. If we notice a reboot
- * via these mechanisms then wait 20ms for the status word to be set.
+ * via these mechanisms then wait 250ms for the status word to be set.
  */
 #define MCDI_STATUS_DELAY_US           100
-#define MCDI_STATUS_DELAY_COUNT                200
+#define MCDI_STATUS_DELAY_COUNT                2500
 #define MCDI_STATUS_SLEEP_MS                                           \
        (MCDI_STATUS_DELAY_US * MCDI_STATUS_DELAY_COUNT / 1000)
 
@@ -800,9 +800,6 @@ static void efx_mcdi_ev_death(struct efx_nic *efx, int rc)
        } else {
                int count;
 
-               /* Nobody was waiting for an MCDI request, so trigger a reset */
-               efx_schedule_reset(efx, RESET_TYPE_MC_FAILURE);
-
                /* Consume the status word since efx_mcdi_rpc_finish() won't */
                for (count = 0; count < MCDI_STATUS_DELAY_COUNT; ++count) {
                        if (efx_mcdi_poll_reboot(efx))
@@ -810,6 +807,9 @@ static void efx_mcdi_ev_death(struct efx_nic *efx, int rc)
                        udelay(MCDI_STATUS_DELAY_US);
                }
                mcdi->new_epoch = true;
+
+               /* Nobody was waiting for an MCDI request, so trigger a reset */
+               efx_schedule_reset(efx, RESET_TYPE_MC_FAILURE);
        }
 
        spin_unlock(&mcdi->iface_lock);
index c8f088a..bdf697b 100644 (file)
@@ -32,7 +32,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #define DRV_NAME       "via-rhine"
-#define DRV_VERSION    "1.5.0"
+#define DRV_VERSION    "1.5.1"
 #define DRV_RELDATE    "2010-10-09"
 
 #include <linux/types.h>
@@ -1704,7 +1704,12 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
                cpu_to_le32(TXDESC | (skb->len >= ETH_ZLEN ? skb->len : ETH_ZLEN));
 
        if (unlikely(vlan_tx_tag_present(skb))) {
-               rp->tx_ring[entry].tx_status = cpu_to_le32((vlan_tx_tag_get(skb)) << 16);
+               u16 vid_pcp = vlan_tx_tag_get(skb);
+
+               /* drop CFI/DEI bit, register needs VID and PCP */
+               vid_pcp = (vid_pcp & VLAN_VID_MASK) |
+                         ((vid_pcp & VLAN_PRIO_MASK) >> 1);
+               rp->tx_ring[entry].tx_status = cpu_to_le32((vid_pcp) << 16);
                /* request tagging */
                rp->tx_ring[entry].desc_length |= cpu_to_le32(0x020000);
        }
index b88121f..0029148 100644 (file)
@@ -297,6 +297,12 @@ static int temac_dma_bd_init(struct net_device *ndev)
                       lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1)));
        lp->dma_out(lp, TX_CURDESC_PTR, lp->tx_bd_p);
 
+       /* Init descriptor indexes */
+       lp->tx_bd_ci = 0;
+       lp->tx_bd_next = 0;
+       lp->tx_bd_tail = 0;
+       lp->rx_bd_ci = 0;
+
        return 0;
 
 out:
index a34d6bf..cc70ecf 100644 (file)
@@ -429,11 +429,13 @@ static void slip_write_wakeup(struct tty_struct *tty)
        if (!sl || sl->magic != SLIP_MAGIC || !netif_running(sl->dev))
                return;
 
+       spin_lock(&sl->lock);
        if (sl->xleft <= 0)  {
                /* Now serial buffer is almost free & we can start
                 * transmission of another packet */
                sl->dev->stats.tx_packets++;
                clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+               spin_unlock(&sl->lock);
                sl_unlock(sl);
                return;
        }
@@ -441,6 +443,7 @@ static void slip_write_wakeup(struct tty_struct *tty)
        actual = tty->ops->write(tty, sl->xhead, sl->xleft);
        sl->xleft -= actual;
        sl->xhead += actual;
+       spin_unlock(&sl->lock);
 }
 
 static void sl_tx_timeout(struct net_device *dev)
index 2dbb946..c6867f9 100644 (file)
@@ -303,7 +303,7 @@ static void dm9601_set_multicast(struct net_device *net)
                rx_ctl |= 0x02;
        } else if (net->flags & IFF_ALLMULTI ||
                   netdev_mc_count(net) > DM_MAX_MCAST) {
-               rx_ctl |= 0x04;
+               rx_ctl |= 0x08;
        } else if (!netdev_mc_empty(net)) {
                struct netdev_hw_addr *ha;
 
index 6312332..3d6aaf7 100644 (file)
@@ -714,7 +714,7 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x2357, 0x0201, 4)},    /* TP-LINK HSUPA Modem MA180 */
        {QMI_FIXED_INTF(0x2357, 0x9000, 4)},    /* TP-LINK MA260 */
        {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)},    /* Telit LE920 */
-       {QMI_FIXED_INTF(0x1e2d, 0x12d1, 4)},    /* Cinterion PLxx */
+       {QMI_FIXED_INTF(0x1e2d, 0x0060, 4)},    /* Cinterion PLxx */
 
        /* 4. Gobi 1000 devices */
        {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)},    /* Acer Gobi Modem Device */
index 7b331e6..bf94e10 100644 (file)
@@ -1241,7 +1241,9 @@ static int build_dma_sg(const struct sk_buff *skb, struct urb *urb)
        if (num_sgs == 1)
                return 0;
 
-       urb->sg = kmalloc(num_sgs * sizeof(struct scatterlist), GFP_ATOMIC);
+       /* reserve one for zero packet */
+       urb->sg = kmalloc((num_sgs + 1) * sizeof(struct scatterlist),
+                         GFP_ATOMIC);
        if (!urb->sg)
                return -ENOMEM;
 
@@ -1305,7 +1307,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
                if (build_dma_sg(skb, urb) < 0)
                        goto drop;
        }
-       entry->length = length = urb->transfer_buffer_length;
+       length = urb->transfer_buffer_length;
 
        /* don't assume the hardware handles USB_ZERO_PACKET
         * NOTE:  strictly conforming cdc-ether devices should expect
@@ -1317,15 +1319,18 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
        if (length % dev->maxpacket == 0) {
                if (!(info->flags & FLAG_SEND_ZLP)) {
                        if (!(info->flags & FLAG_MULTI_PACKET)) {
-                               urb->transfer_buffer_length++;
-                               if (skb_tailroom(skb)) {
+                               length++;
+                               if (skb_tailroom(skb) && !urb->num_sgs) {
                                        skb->data[skb->len] = 0;
                                        __skb_put(skb, 1);
-                               }
+                               } else if (urb->num_sgs)
+                                       sg_set_buf(&urb->sg[urb->num_sgs++],
+                                                       dev->padding_pkt, 1);
                        }
                } else
                        urb->transfer_flags |= URB_ZERO_PACKET;
        }
+       entry->length = urb->transfer_buffer_length = length;
 
        spin_lock_irqsave(&dev->txq.lock, flags);
        retval = usb_autopm_get_interface_async(dev->intf);
@@ -1509,6 +1514,7 @@ void usbnet_disconnect (struct usb_interface *intf)
 
        usb_kill_urb(dev->interrupt);
        usb_free_urb(dev->interrupt);
+       kfree(dev->padding_pkt);
 
        free_netdev(net);
 }
@@ -1679,9 +1685,16 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
        /* initialize max rx_qlen and tx_qlen */
        usbnet_update_max_qlen(dev);
 
+       if (dev->can_dma_sg && !(info->flags & FLAG_SEND_ZLP) &&
+               !(info->flags & FLAG_MULTI_PACKET)) {
+               dev->padding_pkt = kzalloc(1, GFP_KERNEL);
+               if (!dev->padding_pkt)
+                       goto out4;
+       }
+
        status = register_netdev (net);
        if (status)
-               goto out4;
+               goto out5;
        netif_info(dev, probe, dev->net,
                   "register '%s' at usb-%s-%s, %s, %pM\n",
                   udev->dev.driver->name,
@@ -1699,6 +1712,8 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 
        return 0;
 
+out5:
+       kfree(dev->padding_pkt);
 out4:
        usb_free_urb(dev->interrupt);
 out3:
index d1292fe..2ef5b62 100644 (file)
@@ -952,8 +952,7 @@ void vxlan_sock_release(struct vxlan_sock *vs)
 
        spin_lock(&vn->sock_lock);
        hlist_del_rcu(&vs->hlist);
-       smp_wmb();
-       vs->sock->sk->sk_user_data = NULL;
+       rcu_assign_sk_user_data(vs->sock->sk, NULL);
        vxlan_notify_del_rx_port(sk);
        spin_unlock(&vn->sock_lock);
 
@@ -1048,8 +1047,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 
        port = inet_sk(sk)->inet_sport;
 
-       smp_read_barrier_depends();
-       vs = (struct vxlan_sock *)sk->sk_user_data;
+       vs = rcu_dereference_sk_user_data(sk);
        if (!vs)
                goto drop;
 
@@ -2302,8 +2300,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
        atomic_set(&vs->refcnt, 1);
        vs->rcv = rcv;
        vs->data = data;
-       smp_wmb();
-       vs->sock->sk->sk_user_data = vs;
+       rcu_assign_sk_user_data(vs->sock->sk, vs);
 
        spin_lock(&vn->sock_lock);
        hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
index 4ee472a..ab9e3a8 100644 (file)
@@ -1270,13 +1270,6 @@ static void ath9k_antenna_check(struct ath_softc *sc,
                return;
 
        /*
-        * All MPDUs in an aggregate will use the same LNA
-        * as the first MPDU.
-        */
-       if (rs->rs_isaggr && !rs->rs_firstaggr)
-               return;
-
-       /*
         * Change the default rx antenna if rx diversity
         * chooses the other antenna 3 times in a row.
         */
index 35b515f..5ac713d 100644 (file)
@@ -399,6 +399,7 @@ static struct ath_buf* ath_clone_txbuf(struct ath_softc *sc, struct ath_buf *bf)
        tbf->bf_buf_addr = bf->bf_buf_addr;
        memcpy(tbf->bf_desc, bf->bf_desc, sc->sc_ah->caps.tx_desc_len);
        tbf->bf_state = bf->bf_state;
+       tbf->bf_state.stale = false;
 
        return tbf;
 }
@@ -1389,11 +1390,15 @@ int ath_tx_aggr_start(struct ath_softc *sc, struct ieee80211_sta *sta,
                      u16 tid, u16 *ssn)
 {
        struct ath_atx_tid *txtid;
+       struct ath_txq *txq;
        struct ath_node *an;
        u8 density;
 
        an = (struct ath_node *)sta->drv_priv;
        txtid = ATH_AN_2_TID(an, tid);
+       txq = txtid->ac->txq;
+
+       ath_txq_lock(sc, txq);
 
        /* update ampdu factor/density, they may have changed. This may happen
         * in HT IBSS when a beacon with HT-info is received after the station
@@ -1417,6 +1422,8 @@ int ath_tx_aggr_start(struct ath_softc *sc, struct ieee80211_sta *sta,
        memset(txtid->tx_buf, 0, sizeof(txtid->tx_buf));
        txtid->baw_head = txtid->baw_tail = 0;
 
+       ath_txq_unlock_complete(sc, txq);
+
        return 0;
 }
 
@@ -1555,8 +1562,10 @@ void ath9k_release_buffered_frames(struct ieee80211_hw *hw,
                        __skb_unlink(bf->bf_mpdu, tid_q);
                        list_add_tail(&bf->list, &bf_q);
                        ath_set_rates(tid->an->vif, tid->an->sta, bf);
-                       ath_tx_addto_baw(sc, tid, bf);
-                       bf->bf_state.bf_type &= ~BUF_AGGR;
+                       if (bf_isampdu(bf)) {
+                               ath_tx_addto_baw(sc, tid, bf);
+                               bf->bf_state.bf_type &= ~BUF_AGGR;
+                       }
                        if (bf_tail)
                                bf_tail->bf_next = bf;
 
@@ -1950,7 +1959,9 @@ static void ath_tx_txqaddbuf(struct ath_softc *sc, struct ath_txq *txq,
                        if (bf_is_ampdu_not_probing(bf))
                                txq->axq_ampdu_depth++;
 
-                       bf = bf->bf_lastbf->bf_next;
+                       bf_last = bf->bf_lastbf;
+                       bf = bf_last->bf_next;
+                       bf_last->bf_next = NULL;
                }
        }
 }
index 64f4a2b..c3462b7 100644 (file)
@@ -464,8 +464,6 @@ static struct sdio_driver brcmf_sdmmc_driver = {
 
 static int brcmf_sdio_pd_probe(struct platform_device *pdev)
 {
-       int ret;
-
        brcmf_dbg(SDIO, "Enter\n");
 
        brcmfmac_sdio_pdata = pdev->dev.platform_data;
@@ -473,11 +471,7 @@ static int brcmf_sdio_pd_probe(struct platform_device *pdev)
        if (brcmfmac_sdio_pdata->power_on)
                brcmfmac_sdio_pdata->power_on();
 
-       ret = sdio_register_driver(&brcmf_sdmmc_driver);
-       if (ret)
-               brcmf_err("sdio_register_driver failed: %d\n", ret);
-
-       return ret;
+       return 0;
 }
 
 static int brcmf_sdio_pd_remove(struct platform_device *pdev)
@@ -500,6 +494,15 @@ static struct platform_driver brcmf_sdio_pd = {
        }
 };
 
+void brcmf_sdio_register(void)
+{
+       int ret;
+
+       ret = sdio_register_driver(&brcmf_sdmmc_driver);
+       if (ret)
+               brcmf_err("sdio_register_driver failed: %d\n", ret);
+}
+
 void brcmf_sdio_exit(void)
 {
        brcmf_dbg(SDIO, "Enter\n");
@@ -510,18 +513,13 @@ void brcmf_sdio_exit(void)
                sdio_unregister_driver(&brcmf_sdmmc_driver);
 }
 
-void brcmf_sdio_init(void)
+void __init brcmf_sdio_init(void)
 {
        int ret;
 
        brcmf_dbg(SDIO, "Enter\n");
 
        ret = platform_driver_probe(&brcmf_sdio_pd, brcmf_sdio_pd_probe);
-       if (ret == -ENODEV) {
-               brcmf_dbg(SDIO, "No platform data available, registering without.\n");
-               ret = sdio_register_driver(&brcmf_sdmmc_driver);
-       }
-
-       if (ret)
-               brcmf_err("driver registration failed: %d\n", ret);
+       if (ret == -ENODEV)
+               brcmf_dbg(SDIO, "No platform data available.\n");
 }
index f7c1985..74156f8 100644 (file)
@@ -156,10 +156,11 @@ extern int brcmf_bus_start(struct device *dev);
 #ifdef CONFIG_BRCMFMAC_SDIO
 extern void brcmf_sdio_exit(void);
 extern void brcmf_sdio_init(void);
+extern void brcmf_sdio_register(void);
 #endif
 #ifdef CONFIG_BRCMFMAC_USB
 extern void brcmf_usb_exit(void);
-extern void brcmf_usb_init(void);
+extern void brcmf_usb_register(void);
 #endif
 
 #endif                         /* _BRCMF_BUS_H_ */
index e067aec..40e7f85 100644 (file)
@@ -1231,21 +1231,23 @@ u32 brcmf_get_chip_info(struct brcmf_if *ifp)
        return bus->chip << 4 | bus->chiprev;
 }
 
-static void brcmf_driver_init(struct work_struct *work)
+static void brcmf_driver_register(struct work_struct *work)
 {
-       brcmf_debugfs_init();
-
 #ifdef CONFIG_BRCMFMAC_SDIO
-       brcmf_sdio_init();
+       brcmf_sdio_register();
 #endif
 #ifdef CONFIG_BRCMFMAC_USB
-       brcmf_usb_init();
+       brcmf_usb_register();
 #endif
 }
-static DECLARE_WORK(brcmf_driver_work, brcmf_driver_init);
+static DECLARE_WORK(brcmf_driver_work, brcmf_driver_register);
 
 static int __init brcmfmac_module_init(void)
 {
+       brcmf_debugfs_init();
+#ifdef CONFIG_BRCMFMAC_SDIO
+       brcmf_sdio_init();
+#endif
        if (!schedule_work(&brcmf_driver_work))
                return -EBUSY;
 
index 39e01a7..f4aea47 100644 (file)
@@ -1539,7 +1539,7 @@ void brcmf_usb_exit(void)
        brcmf_release_fw(&fw_image_list);
 }
 
-void brcmf_usb_init(void)
+void brcmf_usb_register(void)
 {
        brcmf_dbg(USB, "Enter\n");
        INIT_LIST_HEAD(&fw_image_list);
index 3a65447..edc5d10 100644 (file)
@@ -457,6 +457,8 @@ static int brcms_ops_start(struct ieee80211_hw *hw)
        if (err != 0)
                brcms_err(wl->wlc->hw->d11core, "%s: brcms_up() returned %d\n",
                          __func__, err);
+
+       bcma_core_pci_power_save(wl->wlc->hw->d11core->bus, true);
        return err;
 }
 
@@ -479,6 +481,8 @@ static void brcms_ops_stop(struct ieee80211_hw *hw)
                return;
        }
 
+       bcma_core_pci_power_save(wl->wlc->hw->d11core->bus, false);
+
        /* put driver in down state */
        spin_lock_bh(&wl->lock);
        brcms_down(wl);
index f5e6b48..899cad3 100644 (file)
@@ -42,7 +42,6 @@ struct hwbus_priv {
        spinlock_t              lock; /* Serialize all bus operations */
        wait_queue_head_t       wq;
        int claimed;
-       int irq_disabled;
 };
 
 #define SDIO_TO_SPI_ADDR(addr) ((addr & 0x1f)>>2)
@@ -238,8 +237,6 @@ static irqreturn_t cw1200_spi_irq_handler(int irq, void *dev_id)
        struct hwbus_priv *self = dev_id;
 
        if (self->core) {
-               disable_irq_nosync(self->func->irq);
-               self->irq_disabled = 1;
                cw1200_irq_handler(self->core);
                return IRQ_HANDLED;
        } else {
@@ -253,9 +250,10 @@ static int cw1200_spi_irq_subscribe(struct hwbus_priv *self)
 
        pr_debug("SW IRQ subscribe\n");
 
-       ret = request_any_context_irq(self->func->irq, cw1200_spi_irq_handler,
-                                     IRQF_TRIGGER_HIGH,
-                                     "cw1200_wlan_irq", self);
+       ret = request_threaded_irq(self->func->irq, NULL,
+                                  cw1200_spi_irq_handler,
+                                  IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+                                  "cw1200_wlan_irq", self);
        if (WARN_ON(ret < 0))
                goto exit;
 
@@ -273,22 +271,13 @@ exit:
 
 static int cw1200_spi_irq_unsubscribe(struct hwbus_priv *self)
 {
+       int ret = 0;
+
        pr_debug("SW IRQ unsubscribe\n");
        disable_irq_wake(self->func->irq);
        free_irq(self->func->irq, self);
 
-       return 0;
-}
-
-static int cw1200_spi_irq_enable(struct hwbus_priv *self, int enable)
-{
-       /* Disables are handled by the interrupt handler */
-       if (enable && self->irq_disabled) {
-               enable_irq(self->func->irq);
-               self->irq_disabled = 0;
-       }
-
-       return 0;
+       return ret;
 }
 
 static int cw1200_spi_off(const struct cw1200_platform_data_spi *pdata)
@@ -368,7 +357,6 @@ static struct hwbus_ops cw1200_spi_hwbus_ops = {
        .unlock                 = cw1200_spi_unlock,
        .align_size             = cw1200_spi_align_size,
        .power_mgmt             = cw1200_spi_pm,
-       .irq_enable             = cw1200_spi_irq_enable,
 };
 
 /* Probe Function to be called by SPI stack when device is discovered */
index 0b2061b..acdff0f 100644 (file)
@@ -485,7 +485,7 @@ int cw1200_load_firmware(struct cw1200_common *priv)
 
        /* Enable interrupt signalling */
        priv->hwbus_ops->lock(priv->hwbus_priv);
-       ret = __cw1200_irq_enable(priv, 2);
+       ret = __cw1200_irq_enable(priv, 1);
        priv->hwbus_ops->unlock(priv->hwbus_priv);
        if (ret < 0)
                goto unsubscribe;
index 51dfb3a..8b2fc83 100644 (file)
@@ -28,7 +28,6 @@ struct hwbus_ops {
        void (*unlock)(struct hwbus_priv *self);
        size_t (*align_size)(struct hwbus_priv *self, size_t size);
        int (*power_mgmt)(struct hwbus_priv *self, bool suspend);
-       int (*irq_enable)(struct hwbus_priv *self, int enable);
 };
 
 #endif /* CW1200_HWBUS_H */
index 41bd761..ff230b7 100644 (file)
@@ -273,21 +273,6 @@ int __cw1200_irq_enable(struct cw1200_common *priv, int enable)
        u16 val16;
        int ret;
 
-       /* We need to do this hack because the SPI layer can sleep on I/O
-          and the general path involves I/O to the device in interrupt
-          context.
-
-          However, the initial enable call needs to go to the hardware.
-
-          We don't worry about shutdown because we do a full reset which
-          clears the interrupt enabled bits.
-       */
-       if (priv->hwbus_ops->irq_enable) {
-               ret = priv->hwbus_ops->irq_enable(priv->hwbus_priv, enable);
-               if (ret || enable < 2)
-                       return ret;
-       }
-
        if (HIF_8601_SILICON == priv->hw_type) {
                ret = __cw1200_reg_read_32(priv, ST90TDS_CONFIG_REG_ID, &val32);
                if (ret < 0) {
index 21c6882..1214c58 100644 (file)
@@ -150,7 +150,7 @@ mwifiex_11n_form_amsdu_txpd(struct mwifiex_private *priv,
  */
 int
 mwifiex_11n_aggregate_pkt(struct mwifiex_private *priv,
-                         struct mwifiex_ra_list_tbl *pra_list, int headroom,
+                         struct mwifiex_ra_list_tbl *pra_list,
                          int ptrindex, unsigned long ra_list_flags)
                          __releases(&priv->wmm.ra_list_spinlock)
 {
@@ -160,6 +160,7 @@ mwifiex_11n_aggregate_pkt(struct mwifiex_private *priv,
        int pad = 0, ret;
        struct mwifiex_tx_param tx_param;
        struct txpd *ptx_pd = NULL;
+       int headroom = adapter->iface_type == MWIFIEX_USB ? 0 : INTF_HEADER_LEN;
 
        skb_src = skb_peek(&pra_list->skb_head);
        if (!skb_src) {
index 900e1c6..892098d 100644 (file)
@@ -26,7 +26,7 @@
 int mwifiex_11n_deaggregate_pkt(struct mwifiex_private *priv,
                                struct sk_buff *skb);
 int mwifiex_11n_aggregate_pkt(struct mwifiex_private *priv,
-                             struct mwifiex_ra_list_tbl *ptr, int headroom,
+                             struct mwifiex_ra_list_tbl *ptr,
                              int ptr_index, unsigned long flags)
                              __releases(&priv->wmm.ra_list_spinlock);
 
index 2d76147..a6c46f3 100644 (file)
@@ -1155,7 +1155,7 @@ int mwifiex_ret_802_11_hs_cfg(struct mwifiex_private *priv,
        uint32_t conditions = le32_to_cpu(phs_cfg->params.hs_config.conditions);
 
        if (phs_cfg->action == cpu_to_le16(HS_ACTIVATE) &&
-           adapter->iface_type == MWIFIEX_SDIO) {
+           adapter->iface_type != MWIFIEX_USB) {
                mwifiex_hs_activated_event(priv, true);
                return 0;
        } else {
@@ -1167,8 +1167,7 @@ int mwifiex_ret_802_11_hs_cfg(struct mwifiex_private *priv,
        }
        if (conditions != HS_CFG_CANCEL) {
                adapter->is_hs_configured = true;
-               if (adapter->iface_type == MWIFIEX_USB ||
-                   adapter->iface_type == MWIFIEX_PCIE)
+               if (adapter->iface_type == MWIFIEX_USB)
                        mwifiex_hs_activated_event(priv, true);
        } else {
                adapter->is_hs_configured = false;
index 2472d4b..1c70b8d 100644 (file)
@@ -447,9 +447,6 @@ static int mwifiex_usb_suspend(struct usb_interface *intf, pm_message_t message)
         */
        adapter->is_suspended = true;
 
-       for (i = 0; i < adapter->priv_num; i++)
-               netif_carrier_off(adapter->priv[i]->netdev);
-
        if (atomic_read(&card->rx_cmd_urb_pending) && card->rx_cmd.urb)
                usb_kill_urb(card->rx_cmd.urb);
 
@@ -509,10 +506,6 @@ static int mwifiex_usb_resume(struct usb_interface *intf)
                                                  MWIFIEX_RX_CMD_BUF_SIZE);
        }
 
-       for (i = 0; i < adapter->priv_num; i++)
-               if (adapter->priv[i]->media_connected)
-                       netif_carrier_on(adapter->priv[i]->netdev);
-
        /* Disable Host Sleep */
        if (adapter->hs_activated)
                mwifiex_cancel_hs(mwifiex_get_priv(adapter,
index 2e8f9cd..95fa359 100644 (file)
@@ -1239,8 +1239,7 @@ mwifiex_dequeue_tx_packet(struct mwifiex_adapter *adapter)
                if (enable_tx_amsdu && mwifiex_is_amsdu_allowed(priv, tid) &&
                    mwifiex_is_11n_aggragation_possible(priv, ptr,
                                                        adapter->tx_buf_size))
-                       mwifiex_11n_aggregate_pkt(priv, ptr, INTF_HEADER_LEN,
-                                                 ptr_index, flags);
+                       mwifiex_11n_aggregate_pkt(priv, ptr, ptr_index, flags);
                        /* ra_list_spinlock has been freed in
                           mwifiex_11n_aggregate_pkt() */
                else
index b9deef6..e328d30 100644 (file)
@@ -83,6 +83,7 @@ static struct usb_device_id p54u_table[] = {
        {USB_DEVICE(0x06a9, 0x000e)},   /* Westell 802.11g USB (A90-211WG-01) */
        {USB_DEVICE(0x06b9, 0x0121)},   /* Thomson SpeedTouch 121g */
        {USB_DEVICE(0x0707, 0xee13)},   /* SMC 2862W-G version 2 */
+       {USB_DEVICE(0x07aa, 0x0020)},   /* Corega WLUSB2GTST USB */
        {USB_DEVICE(0x0803, 0x4310)},   /* Zoom 4410a */
        {USB_DEVICE(0x083a, 0x4521)},   /* Siemens Gigaset USB Adapter 54 version 2 */
        {USB_DEVICE(0x083a, 0x4531)},   /* T-Com Sinus 154 data II */
@@ -979,6 +980,7 @@ static int p54u_load_firmware(struct ieee80211_hw *dev,
        if (err) {
                dev_err(&priv->udev->dev, "(p54usb) cannot load firmware %s "
                                          "(%d)!\n", p54u_fwlist[i].fw, err);
+               usb_put_dev(udev);
        }
 
        return err;
index cc03e7c..7032587 100644 (file)
@@ -2057,7 +2057,7 @@ struct rtl_priv {
           that it points to the data allocated
           beyond  this structure like:
           rtl_pci_priv or rtl_usb_priv */
-       u8 priv[0];
+       u8 priv[0] __aligned(sizeof(void *));
 };
 
 #define rtl_priv(hw)           (((struct rtl_priv *)(hw)->priv))
index a53782e..b45bce2 100644 (file)
 struct backend_info {
        struct xenbus_device *dev;
        struct xenvif *vif;
+
+       /* This is the state that will be reflected in xenstore when any
+        * active hotplug script completes.
+        */
+       enum xenbus_state state;
+
        enum xenbus_state frontend_state;
        struct xenbus_watch hotplug_status_watch;
        u8 have_hotplug_status_watch:1;
@@ -136,6 +142,8 @@ static int netback_probe(struct xenbus_device *dev,
        if (err)
                goto fail;
 
+       be->state = XenbusStateInitWait;
+
        /* This kicks hotplug scripts, so do it immediately. */
        backend_create_xenvif(be);
 
@@ -208,24 +216,113 @@ static void backend_create_xenvif(struct backend_info *be)
        kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
 }
 
-
-static void disconnect_backend(struct xenbus_device *dev)
+static void backend_disconnect(struct backend_info *be)
 {
-       struct backend_info *be = dev_get_drvdata(&dev->dev);
-
        if (be->vif)
                xenvif_disconnect(be->vif);
 }
 
-static void destroy_backend(struct xenbus_device *dev)
+static void backend_connect(struct backend_info *be)
 {
-       struct backend_info *be = dev_get_drvdata(&dev->dev);
+       if (be->vif)
+               connect(be);
+}
 
-       if (be->vif) {
-               kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
-               xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
-               xenvif_free(be->vif);
-               be->vif = NULL;
+static inline void backend_switch_state(struct backend_info *be,
+                                       enum xenbus_state state)
+{
+       struct xenbus_device *dev = be->dev;
+
+       pr_debug("%s -> %s\n", dev->nodename, xenbus_strstate(state));
+       be->state = state;
+
+       /* If we are waiting for a hotplug script then defer the
+        * actual xenbus state change.
+        */
+       if (!be->have_hotplug_status_watch)
+               xenbus_switch_state(dev, state);
+}
+
+/* Handle backend state transitions:
+ *
+ * The backend state starts in InitWait and the following transitions are
+ * allowed.
+ *
+ * InitWait -> Connected
+ *
+ *    ^    \         |
+ *    |     \        |
+ *    |      \       |
+ *    |       \      |
+ *    |        \     |
+ *    |         \    |
+ *    |          V   V
+ *
+ *  Closed  <-> Closing
+ *
+ * The state argument specifies the eventual state of the backend and the
+ * function transitions to that state via the shortest path.
+ */
+static void set_backend_state(struct backend_info *be,
+                             enum xenbus_state state)
+{
+       while (be->state != state) {
+               switch (be->state) {
+               case XenbusStateClosed:
+                       switch (state) {
+                       case XenbusStateInitWait:
+                       case XenbusStateConnected:
+                               pr_info("%s: prepare for reconnect\n",
+                                       be->dev->nodename);
+                               backend_switch_state(be, XenbusStateInitWait);
+                               break;
+                       case XenbusStateClosing:
+                               backend_switch_state(be, XenbusStateClosing);
+                               break;
+                       default:
+                               BUG();
+                       }
+                       break;
+               case XenbusStateInitWait:
+                       switch (state) {
+                       case XenbusStateConnected:
+                               backend_connect(be);
+                               backend_switch_state(be, XenbusStateConnected);
+                               break;
+                       case XenbusStateClosing:
+                       case XenbusStateClosed:
+                               backend_switch_state(be, XenbusStateClosing);
+                               break;
+                       default:
+                               BUG();
+                       }
+                       break;
+               case XenbusStateConnected:
+                       switch (state) {
+                       case XenbusStateInitWait:
+                       case XenbusStateClosing:
+                       case XenbusStateClosed:
+                               backend_disconnect(be);
+                               backend_switch_state(be, XenbusStateClosing);
+                               break;
+                       default:
+                               BUG();
+                       }
+                       break;
+               case XenbusStateClosing:
+                       switch (state) {
+                       case XenbusStateInitWait:
+                       case XenbusStateConnected:
+                       case XenbusStateClosed:
+                               backend_switch_state(be, XenbusStateClosed);
+                               break;
+                       default:
+                               BUG();
+                       }
+                       break;
+               default:
+                       BUG();
+               }
        }
 }
 
@@ -237,40 +334,33 @@ static void frontend_changed(struct xenbus_device *dev,
 {
        struct backend_info *be = dev_get_drvdata(&dev->dev);
 
-       pr_debug("frontend state %s\n", xenbus_strstate(frontend_state));
+       pr_debug("%s -> %s\n", dev->otherend, xenbus_strstate(frontend_state));
 
        be->frontend_state = frontend_state;
 
        switch (frontend_state) {
        case XenbusStateInitialising:
-               if (dev->state == XenbusStateClosed) {
-                       pr_info("%s: prepare for reconnect\n", dev->nodename);
-                       xenbus_switch_state(dev, XenbusStateInitWait);
-               }
+               set_backend_state(be, XenbusStateInitWait);
                break;
 
        case XenbusStateInitialised:
                break;
 
        case XenbusStateConnected:
-               if (dev->state == XenbusStateConnected)
-                       break;
-               if (be->vif)
-                       connect(be);
+               set_backend_state(be, XenbusStateConnected);
                break;
 
        case XenbusStateClosing:
-               disconnect_backend(dev);
-               xenbus_switch_state(dev, XenbusStateClosing);
+               set_backend_state(be, XenbusStateClosing);
                break;
 
        case XenbusStateClosed:
-               xenbus_switch_state(dev, XenbusStateClosed);
+               set_backend_state(be, XenbusStateClosed);
                if (xenbus_dev_is_online(dev))
                        break;
-               destroy_backend(dev);
                /* fall through if not online */
        case XenbusStateUnknown:
+               set_backend_state(be, XenbusStateClosed);
                device_unregister(&dev->dev);
                break;
 
@@ -363,7 +453,9 @@ static void hotplug_status_changed(struct xenbus_watch *watch,
        if (IS_ERR(str))
                return;
        if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
-               xenbus_switch_state(be->dev, XenbusStateConnected);
+               /* Complete any pending state change */
+               xenbus_switch_state(be->dev, be->state);
+
                /* Not interested in this watch anymore. */
                unregister_hotplug_status_watch(be);
        }
@@ -393,12 +485,8 @@ static void connect(struct backend_info *be)
        err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
                                   hotplug_status_changed,
                                   "%s/%s", dev->nodename, "hotplug-status");
-       if (err) {
-               /* Switch now, since we can't do a watch. */
-               xenbus_switch_state(dev, XenbusStateConnected);
-       } else {
+       if (!err)
                be->have_hotplug_status_watch = 1;
-       }
 
        netif_wake_queue(be->vif->dev);
 }
index 9d2009a..78cc760 100644 (file)
@@ -74,10 +74,4 @@ config OF_MTD
        depends on MTD
        def_bool y
 
-config OF_RESERVED_MEM
-       depends on OF_FLATTREE && (DMA_CMA || (HAVE_GENERIC_DMA_COHERENT && HAVE_MEMBLOCK))
-       def_bool y
-       help
-         Initialization code for DMA reserved memory
-
 endmenu # OF
index ed9660a..efd0510 100644 (file)
@@ -9,4 +9,3 @@ obj-$(CONFIG_OF_MDIO)   += of_mdio.o
 obj-$(CONFIG_OF_PCI)   += of_pci.o
 obj-$(CONFIG_OF_PCI_IRQ)  += of_pci_irq.o
 obj-$(CONFIG_OF_MTD)   += of_mtd.o
-obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o
index 865d3f6..7d4c70f 100644 (file)
@@ -303,10 +303,8 @@ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
        struct device_node *cpun, *cpus;
 
        cpus = of_find_node_by_path("/cpus");
-       if (!cpus) {
-               pr_warn("Missing cpus node, bailing out\n");
+       if (!cpus)
                return NULL;
-       }
 
        for_each_child_of_node(cpus, cpun) {
                if (of_node_cmp(cpun->type, "cpu"))
index 229dd9d..a4fa9ad 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
-#include <linux/random.h>
 
 #include <asm/setup.h>  /* for COMMAND_LINE_SIZE */
 #ifdef CONFIG_PPC
@@ -803,14 +802,3 @@ void __init unflatten_device_tree(void)
 }
 
 #endif /* CONFIG_OF_EARLY_FLATTREE */
-
-/* Feed entire flattened device tree into the random pool */
-static int __init add_fdt_randomness(void)
-{
-       if (initial_boot_params)
-               add_device_randomness(initial_boot_params,
-                               be32_to_cpu(initial_boot_params->totalsize));
-
-       return 0;
-}
-core_initcall(add_fdt_randomness);
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
deleted file mode 100644 (file)
index 0fe40c7..0000000
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Device tree based initialization code for reserved memory.
- *
- * Copyright (c) 2013 Samsung Electronics Co., Ltd.
- *             http://www.samsung.com
- * Author: Marek Szyprowski <m.szyprowski@samsung.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License or (at your optional) any later version of the license.
- */
-
-#include <linux/memblock.h>
-#include <linux/err.h>
-#include <linux/of.h>
-#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
-#include <linux/mm.h>
-#include <linux/sizes.h>
-#include <linux/mm_types.h>
-#include <linux/dma-contiguous.h>
-#include <linux/dma-mapping.h>
-#include <linux/of_reserved_mem.h>
-
-#define MAX_RESERVED_REGIONS   16
-struct reserved_mem {
-       phys_addr_t             base;
-       unsigned long           size;
-       struct cma              *cma;
-       char                    name[32];
-};
-static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS];
-static int reserved_mem_count;
-
-static int __init fdt_scan_reserved_mem(unsigned long node, const char *uname,
-                                       int depth, void *data)
-{
-       struct reserved_mem *rmem = &reserved_mem[reserved_mem_count];
-       phys_addr_t base, size;
-       int is_cma, is_reserved;
-       unsigned long len;
-       const char *status;
-       __be32 *prop;
-
-       is_cma = IS_ENABLED(CONFIG_DMA_CMA) &&
-              of_flat_dt_is_compatible(node, "linux,contiguous-memory-region");
-       is_reserved = of_flat_dt_is_compatible(node, "reserved-memory-region");
-
-       if (!is_reserved && !is_cma) {
-               /* ignore node and scan next one */
-               return 0;
-       }
-
-       status = of_get_flat_dt_prop(node, "status", &len);
-       if (status && strcmp(status, "okay") != 0) {
-               /* ignore disabled node nad scan next one */
-               return 0;
-       }
-
-       prop = of_get_flat_dt_prop(node, "reg", &len);
-       if (!prop || (len < (dt_root_size_cells + dt_root_addr_cells) *
-                            sizeof(__be32))) {
-               pr_err("Reserved mem: node %s, incorrect \"reg\" property\n",
-                      uname);
-               /* ignore node and scan next one */
-               return 0;
-       }
-       base = dt_mem_next_cell(dt_root_addr_cells, &prop);
-       size = dt_mem_next_cell(dt_root_size_cells, &prop);
-
-       if (!size) {
-               /* ignore node and scan next one */
-               return 0;
-       }
-
-       pr_info("Reserved mem: found %s, memory base %lx, size %ld MiB\n",
-               uname, (unsigned long)base, (unsigned long)size / SZ_1M);
-
-       if (reserved_mem_count == ARRAY_SIZE(reserved_mem))
-               return -ENOSPC;
-
-       rmem->base = base;
-       rmem->size = size;
-       strlcpy(rmem->name, uname, sizeof(rmem->name));
-
-       if (is_cma) {
-               struct cma *cma;
-               if (dma_contiguous_reserve_area(size, base, 0, &cma) == 0) {
-                       rmem->cma = cma;
-                       reserved_mem_count++;
-                       if (of_get_flat_dt_prop(node,
-                                               "linux,default-contiguous-region",
-                                               NULL))
-                               dma_contiguous_set_default(cma);
-               }
-       } else if (is_reserved) {
-               if (memblock_remove(base, size) == 0)
-                       reserved_mem_count++;
-               else
-                       pr_err("Failed to reserve memory for %s\n", uname);
-       }
-
-       return 0;
-}
-
-static struct reserved_mem *get_dma_memory_region(struct device *dev)
-{
-       struct device_node *node;
-       const char *name;
-       int i;
-
-       node = of_parse_phandle(dev->of_node, "memory-region", 0);
-       if (!node)
-               return NULL;
-
-       name = kbasename(node->full_name);
-       for (i = 0; i < reserved_mem_count; i++)
-               if (strcmp(name, reserved_mem[i].name) == 0)
-                       return &reserved_mem[i];
-       return NULL;
-}
-
-/**
- * of_reserved_mem_device_init() - assign reserved memory region to given device
- *
- * This function assign memory region pointed by "memory-region" device tree
- * property to the given device.
- */
-void of_reserved_mem_device_init(struct device *dev)
-{
-       struct reserved_mem *region = get_dma_memory_region(dev);
-       if (!region)
-               return;
-
-       if (region->cma) {
-               dev_set_cma_area(dev, region->cma);
-               pr_info("Assigned CMA %s to %s device\n", region->name,
-                       dev_name(dev));
-       } else {
-               if (dma_declare_coherent_memory(dev, region->base, region->base,
-                   region->size, DMA_MEMORY_MAP | DMA_MEMORY_EXCLUSIVE) != 0)
-                       pr_info("Declared reserved memory %s to %s device\n",
-                               region->name, dev_name(dev));
-       }
-}
-
-/**
- * of_reserved_mem_device_release() - release reserved memory device structures
- *
- * This function releases structures allocated for memory region handling for
- * the given device.
- */
-void of_reserved_mem_device_release(struct device *dev)
-{
-       struct reserved_mem *region = get_dma_memory_region(dev);
-       if (!region && !region->cma)
-               dma_release_declared_memory(dev);
-}
-
-/**
- * early_init_dt_scan_reserved_mem() - create reserved memory regions
- *
- * This function grabs memory from early allocator for device exclusive use
- * defined in device tree structures. It should be called by arch specific code
- * once the early allocator (memblock) has been activated and all other
- * subsystems have already allocated/reserved memory.
- */
-void __init early_init_dt_scan_reserved_mem(void)
-{
-       of_scan_flat_dt_by_path("/memory/reserved-memory",
-                               fdt_scan_reserved_mem, NULL);
-}
index 9b439ac..f6dcde2 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
-#include <linux/of_reserved_mem.h>
 #include <linux/platform_device.h>
 
 const struct of_device_id of_default_bus_match_table[] = {
@@ -219,8 +218,6 @@ static struct platform_device *of_platform_device_create_pdata(
        dev->dev.bus = &platform_bus_type;
        dev->dev.platform_data = platform_data;
 
-       of_reserved_mem_device_init(&dev->dev);
-
        /* We do not fill the DMA ops for platform devices by default.
         * This is currently the responsibility of the platform code
         * to do such, possibly using a device notifier
@@ -228,7 +225,6 @@ static struct platform_device *of_platform_device_create_pdata(
 
        if (of_device_add(dev) != 0) {
                platform_device_put(dev);
-               of_reserved_mem_device_release(&dev->dev);
                return NULL;
        }
 
index 0b7d23b..be12fbf 100644 (file)
@@ -994,14 +994,16 @@ void acpiphp_enumerate_slots(struct pci_bus *bus)
 
                /*
                 * This bridge should have been registered as a hotplug function
-                * under its parent, so the context has to be there.  If not, we
-                * are in deep goo.
+                * under its parent, so the context should be there, unless the
+                * parent is going to be handled by pciehp, in which case this
+                * bridge is not interesting to us either.
                 */
                mutex_lock(&acpiphp_context_lock);
                context = acpiphp_get_context(handle);
-               if (WARN_ON(!context)) {
+               if (!context) {
                        mutex_unlock(&acpiphp_context_lock);
                        put_device(&bus->dev);
+                       pci_dev_put(bridge->pci_dev);
                        kfree(bridge);
                        return;
                }
index e8ccf6c..bdd64b1 100644 (file)
@@ -1155,8 +1155,14 @@ static void pci_enable_bridge(struct pci_dev *dev)
 
        pci_enable_bridge(dev->bus->self);
 
-       if (pci_is_enabled(dev))
+       if (pci_is_enabled(dev)) {
+               if (!dev->is_busmaster) {
+                       dev_warn(&dev->dev, "driver skip pci_set_master, fix it!\n");
+                       pci_set_master(dev);
+               }
                return;
+       }
+
        retval = pci_enable_device(dev);
        if (retval)
                dev_err(&dev->dev, "Error enabling bridge (%d), continuing\n",
index a138965..b8fcc38 100644 (file)
@@ -490,7 +490,7 @@ exit:
  * <devicename> <state> <pinname> are values that should match the pinctrl-maps
  * <newvalue> reflects the new config and is driver dependant
  */
-static int pinconf_dbg_config_write(struct file *file,
+static ssize_t pinconf_dbg_config_write(struct file *file,
        const char __user *user_buf, size_t count, loff_t *ppos)
 {
        struct pinctrl_maps *maps_node;
@@ -508,7 +508,7 @@ static int pinconf_dbg_config_write(struct file *file,
        int i;
 
        /* Get userspace string and assure termination */
-       buf_size = min(count, (size_t)(sizeof(buf)-1));
+       buf_size = min(count, sizeof(buf) - 1);
        if (copy_from_user(buf, user_buf, buf_size))
                return -EFAULT;
        buf[buf_size] = 0;
index 2689f8d..155b1b3 100644 (file)
@@ -663,18 +663,18 @@ static void exynos_pinctrl_resume(struct samsung_pinctrl_drv_data *drvdata)
 /* pin banks of s5pv210 pin-controller */
 static struct samsung_pin_bank s5pv210_pin_bank[] = {
        EXYNOS_PIN_BANK_EINTG(8, 0x000, "gpa0", 0x00),
-       EXYNOS_PIN_BANK_EINTG(6, 0x020, "gpa1", 0x04),
+       EXYNOS_PIN_BANK_EINTG(4, 0x020, "gpa1", 0x04),
        EXYNOS_PIN_BANK_EINTG(8, 0x040, "gpb", 0x08),
        EXYNOS_PIN_BANK_EINTG(5, 0x060, "gpc0", 0x0c),
        EXYNOS_PIN_BANK_EINTG(5, 0x080, "gpc1", 0x10),
        EXYNOS_PIN_BANK_EINTG(4, 0x0a0, "gpd0", 0x14),
-       EXYNOS_PIN_BANK_EINTG(4, 0x0c0, "gpd1", 0x18),
-       EXYNOS_PIN_BANK_EINTG(5, 0x0e0, "gpe0", 0x1c),
-       EXYNOS_PIN_BANK_EINTG(8, 0x100, "gpe1", 0x20),
-       EXYNOS_PIN_BANK_EINTG(6, 0x120, "gpf0", 0x24),
+       EXYNOS_PIN_BANK_EINTG(6, 0x0c0, "gpd1", 0x18),
+       EXYNOS_PIN_BANK_EINTG(8, 0x0e0, "gpe0", 0x1c),
+       EXYNOS_PIN_BANK_EINTG(5, 0x100, "gpe1", 0x20),
+       EXYNOS_PIN_BANK_EINTG(8, 0x120, "gpf0", 0x24),
        EXYNOS_PIN_BANK_EINTG(8, 0x140, "gpf1", 0x28),
        EXYNOS_PIN_BANK_EINTG(8, 0x160, "gpf2", 0x2c),
-       EXYNOS_PIN_BANK_EINTG(8, 0x180, "gpf3", 0x30),
+       EXYNOS_PIN_BANK_EINTG(6, 0x180, "gpf3", 0x30),
        EXYNOS_PIN_BANK_EINTG(7, 0x1a0, "gpg0", 0x34),
        EXYNOS_PIN_BANK_EINTG(7, 0x1c0, "gpg1", 0x38),
        EXYNOS_PIN_BANK_EINTG(7, 0x1e0, "gpg2", 0x3c),
index 82638fa..30c4d35 100644 (file)
@@ -891,9 +891,10 @@ static int palmas_pinconf_set(struct pinctrl_dev *pctldev,
                param = pinconf_to_config_param(configs[i]);
                param_val = pinconf_to_config_argument(configs[i]);
 
+               if (param == PIN_CONFIG_BIAS_PULL_PIN_DEFAULT)
+                       continue;
+
                switch (param) {
-               case PIN_CONFIG_BIAS_PULL_PIN_DEFAULT:
-                       return 0;
                case PIN_CONFIG_BIAS_DISABLE:
                case PIN_CONFIG_BIAS_PULL_UP:
                case PIN_CONFIG_BIAS_PULL_DOWN:
index 622c485..93c9e38 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2012-2013, NVIDIA CORPORATION.  All rights reserved.
  *
- * Arthur:  Pritesh Raithatha <praithatha@nvidia.com>
+ * Author:  Pritesh Raithatha <praithatha@nvidia.com>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -2763,7 +2763,6 @@ static struct platform_driver tegra114_pinctrl_driver = {
 };
 module_platform_driver(tegra114_pinctrl_driver);
 
-MODULE_ALIAS("platform:tegra114-pinctrl");
 MODULE_AUTHOR("Pritesh Raithatha <praithatha@nvidia.com>");
-MODULE_DESCRIPTION("NVIDIA Tegra114 pincontrol driver");
+MODULE_DESCRIPTION("NVIDIA Tegra114 pinctrl driver");
 MODULE_LICENSE("GPL v2");
index 1a78163..b9f2653 100644 (file)
@@ -709,7 +709,7 @@ static struct da9063_regulators_pdata *da9063_parse_regulators_dt(
                struct of_regulator_match **da9063_reg_matches)
 {
        da9063_reg_matches = NULL;
-       return PTR_ERR(-ENODEV);
+       return ERR_PTR(-ENODEV);
 }
 #endif
 
index 488dfe7..7e2b165 100644 (file)
@@ -201,13 +201,7 @@ static unsigned int palmas_smps_ramp_delay[4] = {0, 10000, 5000, 2500};
 #define SMPS_CTRL_MODE_ECO             0x02
 #define SMPS_CTRL_MODE_PWM             0x03
 
-/* These values are derived from the data sheet. And are the number of steps
- * where there is a voltage change, the ranges at beginning and end of register
- * max/min values where there are no change are ommitted.
- *
- * So they are basically (maxV-minV)/stepV
- */
-#define PALMAS_SMPS_NUM_VOLTAGES       117
+#define PALMAS_SMPS_NUM_VOLTAGES       122
 #define PALMAS_SMPS10_NUM_VOLTAGES     2
 #define PALMAS_LDO_NUM_VOLTAGES                50
 
@@ -979,6 +973,7 @@ static int palmas_regulators_probe(struct platform_device *pdev)
                        pmic->desc[id].min_uV = 900000;
                        pmic->desc[id].uV_step = 50000;
                        pmic->desc[id].linear_min_sel = 1;
+                       pmic->desc[id].enable_time = 500;
                        pmic->desc[id].vsel_reg =
                                        PALMAS_BASE_TO_REG(PALMAS_LDO_BASE,
                                                palmas_regs_info[id].vsel_addr);
@@ -997,6 +992,11 @@ static int palmas_regulators_probe(struct platform_device *pdev)
                                pmic->desc[id].min_uV = 450000;
                                pmic->desc[id].uV_step = 25000;
                        }
+
+                       /* LOD6 in vibrator mode will have enable time 2000us */
+                       if (pdata && pdata->ldo6_vibrator &&
+                               (id == PALMAS_REG_LDO6))
+                               pmic->desc[id].enable_time = 2000;
                } else {
                        pmic->desc[id].n_voltages = 1;
                        pmic->desc[id].ops = &palmas_ops_extreg;
index d8e3e12..20c271d 100644 (file)
@@ -279,8 +279,12 @@ static int ti_abb_set_opp(struct regulator_dev *rdev, struct ti_abb *abb,
        ti_abb_rmw(regs->opp_sel_mask, info->opp_sel, regs->control_reg,
                   abb->base);
 
-       /* program LDO VBB vset override if needed */
-       if (abb->ldo_base)
+       /*
+        * program LDO VBB vset override if needed for !bypass mode
+        * XXX: Do not switch sequence - for !bypass, LDO override reset *must*
+        * be performed *before* switch to bias mode else VBB glitches.
+        */
+       if (abb->ldo_base && info->opp_sel != TI_ABB_NOMINAL_OPP)
                ti_abb_program_ldovbb(dev, abb, info);
 
        /* Initiate ABB ldo change */
@@ -295,6 +299,14 @@ static int ti_abb_set_opp(struct regulator_dev *rdev, struct ti_abb *abb,
        if (ret)
                goto out;
 
+       /*
+        * Reset LDO VBB vset override bypass mode
+        * XXX: Do not switch sequence - for bypass, LDO override reset *must*
+        * be performed *after* switch to bypass else VBB glitches.
+        */
+       if (abb->ldo_base && info->opp_sel == TI_ABB_NOMINAL_OPP)
+               ti_abb_program_ldovbb(dev, abb, info);
+
 out:
        return ret;
 }
index 1432b26..2205fbc 100644 (file)
@@ -63,7 +63,7 @@ static irqreturn_t wm831x_ldo_uv_irq(int irq, void *data)
  */
 
 static const struct regulator_linear_range wm831x_gp_ldo_ranges[] = {
-       { .min_uV =  900000, .max_uV = 1650000, .min_sel =  0, .max_sel = 14,
+       { .min_uV =  900000, .max_uV = 1600000, .min_sel =  0, .max_sel = 14,
          .uV_step =  50000 },
        { .min_uV = 1700000, .max_uV = 3300000, .min_sel = 15, .max_sel = 31,
          .uV_step = 100000 },
@@ -332,7 +332,7 @@ static struct platform_driver wm831x_gp_ldo_driver = {
  */
 
 static const struct regulator_linear_range wm831x_aldo_ranges[] = {
-       { .min_uV = 1000000, .max_uV = 1650000, .min_sel =  0, .max_sel = 12,
+       { .min_uV = 1000000, .max_uV = 1600000, .min_sel =  0, .max_sel = 12,
          .uV_step =  50000 },
        { .min_uV = 1700000, .max_uV = 3500000, .min_sel = 13, .max_sel = 31,
          .uV_step = 100000 },
index 835b5f0..61ca929 100644 (file)
@@ -543,7 +543,7 @@ static int wm8350_dcdc_set_suspend_mode(struct regulator_dev *rdev,
 }
 
 static const struct regulator_linear_range wm8350_ldo_ranges[] = {
-       { .min_uV =  900000, .max_uV = 1750000, .min_sel =  0, .max_sel = 15,
+       { .min_uV =  900000, .max_uV = 1650000, .min_sel =  0, .max_sel = 15,
          .uV_step =  50000 },
        { .min_uV = 1800000, .max_uV = 3300000, .min_sel = 16, .max_sel = 31,
          .uV_step = 100000 },
index 8cd34bf..77df9cb 100644 (file)
@@ -145,9 +145,11 @@ bool __init sclp_has_linemode(void)
 
        if (sccb->header.response_code != 0x20)
                return 0;
-       if (sccb->sclp_send_mask & (EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK))
-               return 1;
-       return 0;
+       if (!(sccb->sclp_send_mask & (EVTYP_OPCMD_MASK | EVTYP_PMSGCMD_MASK)))
+               return 0;
+       if (!(sccb->sclp_receive_mask & (EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK)))
+               return 0;
+       return 1;
 }
 
 bool __init sclp_has_vt220(void)
index a0f47c8..3f4ca4e 100644 (file)
@@ -810,7 +810,7 @@ static void tty3270_resize_work(struct work_struct *work)
        struct winsize ws;
 
        screen = tty3270_alloc_screen(tp->n_rows, tp->n_cols);
-       if (!screen)
+       if (IS_ERR(screen))
                return;
        /* Switch to new output size */
        spin_lock_bh(&tp->view.lock);
index fd7cc56..d4ac60b 100644 (file)
@@ -1583,7 +1583,7 @@ static int atmel_spi_probe(struct platform_device *pdev)
        /* Initialize the hardware */
        ret = clk_prepare_enable(clk);
        if (ret)
-               goto out_unmap_regs;
+               goto out_free_irq;
        spi_writel(as, CR, SPI_BIT(SWRST));
        spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */
        if (as->caps.has_wdrbt) {
@@ -1614,6 +1614,7 @@ out_free_dma:
        spi_writel(as, CR, SPI_BIT(SWRST));
        spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */
        clk_disable_unprepare(clk);
+out_free_irq:
        free_irq(irq, master);
 out_unmap_regs:
        iounmap(as->regs);
index 5655acf..6416798 100644 (file)
@@ -226,7 +226,6 @@ static int spi_clps711x_probe(struct platform_device *pdev)
                               dev_name(&pdev->dev), hw);
        if (ret) {
                dev_err(&pdev->dev, "Can't request IRQ\n");
-               clk_put(hw->spi_clk);
                goto clk_out;
        }
 
@@ -247,7 +246,6 @@ err_out:
                        gpio_free(hw->chipselect[i]);
 
        spi_master_put(master);
-       kfree(master);
 
        return ret;
 }
@@ -263,7 +261,6 @@ static int spi_clps711x_remove(struct platform_device *pdev)
                        gpio_free(hw->chipselect[i]);
 
        spi_unregister_master(master);
-       kfree(master);
 
        return 0;
 }
index 6cd07d1..4e44575 100644 (file)
@@ -476,15 +476,9 @@ static int dspi_probe(struct platform_device *pdev)
        master->bus_num = bus_num;
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "can't get platform resource\n");
-               ret = -EINVAL;
-               goto out_master_put;
-       }
-
        dspi->base = devm_ioremap_resource(&pdev->dev, res);
-       if (!dspi->base) {
-               ret = -EINVAL;
+       if (IS_ERR(dspi->base)) {
+               ret = PTR_ERR(dspi->base);
                goto out_master_put;
        }
 
index dbc5e99..6adf4e3 100644 (file)
@@ -522,8 +522,10 @@ static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr,
        psc_num = master->bus_num;
        snprintf(clk_name, sizeof(clk_name), "psc%d_mclk", psc_num);
        clk = devm_clk_get(dev, clk_name);
-       if (IS_ERR(clk))
+       if (IS_ERR(clk)) {
+               ret = PTR_ERR(clk);
                goto free_irq;
+       }
        ret = clk_prepare_enable(clk);
        if (ret)
                goto free_irq;
index 2eb06ee..c1a5067 100644 (file)
@@ -546,8 +546,17 @@ static irqreturn_t ssp_int(int irq, void *dev_id)
        if (pm_runtime_suspended(&drv_data->pdev->dev))
                return IRQ_NONE;
 
-       sccr1_reg = read_SSCR1(reg);
+       /*
+        * If the device is not yet in RPM suspended state and we get an
+        * interrupt that is meant for another device, check if status bits
+        * are all set to one. That means that the device is already
+        * powered off.
+        */
        status = read_SSSR(reg);
+       if (status == ~0)
+               return IRQ_NONE;
+
+       sccr1_reg = read_SSCR1(reg);
 
        /* Ignore possible writes if we don't need to write */
        if (!(sccr1_reg & SSCR1_TIE))
index 512b889..a80376d 100644 (file)
@@ -1428,6 +1428,8 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
               S3C64XX_SPI_INT_TX_OVERRUN_EN | S3C64XX_SPI_INT_TX_UNDERRUN_EN,
               sdd->regs + S3C64XX_SPI_INT_EN);
 
+       pm_runtime_enable(&pdev->dev);
+
        if (spi_register_master(master)) {
                dev_err(&pdev->dev, "cannot register SPI master\n");
                ret = -EBUSY;
@@ -1440,8 +1442,6 @@ static int s3c64xx_spi_probe(struct platform_device *pdev)
                                        mem_res,
                                        sdd->rx_dma.dmach, sdd->tx_dma.dmach);
 
-       pm_runtime_enable(&pdev->dev);
-
        return 0;
 
 err3:
index 0b68cb5..e488a90 100644 (file)
@@ -296,6 +296,8 @@ static int hspi_probe(struct platform_device *pdev)
                goto error1;
        }
 
+       pm_runtime_enable(&pdev->dev);
+
        master->num_chipselect  = 1;
        master->bus_num         = pdev->id;
        master->setup           = hspi_setup;
@@ -309,8 +311,6 @@ static int hspi_probe(struct platform_device *pdev)
                goto error1;
        }
 
-       pm_runtime_enable(&pdev->dev);
-
        return 0;
 
  error1:
index 3ba4c57..853f62b 100644 (file)
@@ -369,28 +369,23 @@ static int ni_65xx_dio_insn_bits(struct comedi_device *dev,
 {
        const struct ni_65xx_board *board = comedi_board(dev);
        struct ni_65xx_private *devpriv = dev->private;
-       unsigned base_bitfield_channel;
-       const unsigned max_ports_per_bitfield = 5;
+       int base_bitfield_channel;
        unsigned read_bits = 0;
-       unsigned j;
+       int last_port_offset = ni_65xx_port_by_channel(s->n_chan - 1);
+       int port_offset;
 
        base_bitfield_channel = CR_CHAN(insn->chanspec);
-       for (j = 0; j < max_ports_per_bitfield; ++j) {
-               const unsigned port_offset =
-                       ni_65xx_port_by_channel(base_bitfield_channel) + j;
-               const unsigned port =
-                       sprivate(s)->base_port + port_offset;
-               unsigned base_port_channel;
+       for (port_offset = ni_65xx_port_by_channel(base_bitfield_channel);
+            port_offset <= last_port_offset; port_offset++) {
+               unsigned port = sprivate(s)->base_port + port_offset;
+               int base_port_channel = port_offset * ni_65xx_channels_per_port;
                unsigned port_mask, port_data, port_read_bits;
-               int bitshift;
-               if (port >= ni_65xx_total_num_ports(board))
+               int bitshift = base_port_channel - base_bitfield_channel;
+
+               if (bitshift >= 32)
                        break;
-               base_port_channel = port_offset * ni_65xx_channels_per_port;
                port_mask = data[0];
                port_data = data[1];
-               bitshift = base_port_channel - base_bitfield_channel;
-               if (bitshift >= 32 || bitshift <= -32)
-                       break;
                if (bitshift > 0) {
                        port_mask >>= bitshift;
                        port_data >>= bitshift;
index 47c5888..a2e52a0 100644 (file)
@@ -41,7 +41,6 @@ struct imx_drm_device {
        struct list_head                        encoder_list;
        struct list_head                        connector_list;
        struct mutex                            mutex;
-       int                                     references;
        int                                     pipes;
        struct drm_fbdev_cma                    *fbhelper;
 };
@@ -241,8 +240,6 @@ struct drm_device *imx_drm_device_get(void)
                }
        }
 
-       imxdrm->references++;
-
        return imxdrm->drm;
 
 unwind_crtc:
@@ -280,8 +277,6 @@ void imx_drm_device_put(void)
        list_for_each_entry(enc, &imxdrm->encoder_list, list)
                module_put(enc->owner);
 
-       imxdrm->references--;
-
        mutex_unlock(&imxdrm->mutex);
 }
 EXPORT_SYMBOL_GPL(imx_drm_device_put);
@@ -485,7 +480,7 @@ int imx_drm_add_crtc(struct drm_crtc *crtc,
 
        mutex_lock(&imxdrm->mutex);
 
-       if (imxdrm->references) {
+       if (imxdrm->drm->open_count) {
                ret = -EBUSY;
                goto err_busy;
        }
@@ -564,7 +559,7 @@ int imx_drm_add_encoder(struct drm_encoder *encoder,
 
        mutex_lock(&imxdrm->mutex);
 
-       if (imxdrm->references) {
+       if (imxdrm->drm->open_count) {
                ret = -EBUSY;
                goto err_busy;
        }
@@ -709,7 +704,7 @@ int imx_drm_add_connector(struct drm_connector *connector,
 
        mutex_lock(&imxdrm->mutex);
 
-       if (imxdrm->references) {
+       if (imxdrm->drm->open_count) {
                ret = -EBUSY;
                goto err_busy;
        }
index 2644edf..c8b4344 100644 (file)
@@ -1387,7 +1387,7 @@ echo_copyout_lsm (struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob)
        if (nob > ulsm_nob)
                return (-EINVAL);
 
-       if (copy_to_user (ulsm, lsm, sizeof(ulsm)))
+       if (copy_to_user (ulsm, lsm, sizeof(*ulsm)))
                return (-EFAULT);
 
        for (i = 0; i < lsm->lsm_stripe_count; i++) {
index d7b3c82..45dfe94 100644 (file)
@@ -604,7 +604,7 @@ int cvmx_usb_initialize(struct cvmx_usb_state *state, int usb_port_number,
                        }
        }
 
-       memset(usb, 0, sizeof(usb));
+       memset(usb, 0, sizeof(*usb));
        usb->init_flags = flags;
 
        /* Initialize the USB state structure */
index c7ff2e4..9832dcb 100644 (file)
@@ -907,7 +907,7 @@ u32 mp_query_psd(struct adapter *pAdapter, u8 *data)
                sscanf(data, "pts =%d, start =%d, stop =%d", &psd_pts, &psd_start, &psd_stop);
        }
 
-       _rtw_memset(data, '\0', sizeof(data));
+       _rtw_memset(data, '\0', sizeof(*data));
 
        i = psd_start;
        while (i < psd_stop) {
index 9c2e7a2..ec0028d 100644 (file)
@@ -57,7 +57,7 @@ static void Init_ODM_ComInfo_88E(struct adapter *Adapter)
        u8 cut_ver, fab_ver;
 
        /*  Init Value */
-       _rtw_memset(dm_odm, 0, sizeof(dm_odm));
+       _rtw_memset(dm_odm, 0, sizeof(*dm_odm));
 
        dm_odm->Adapter = Adapter;
 
index cd4100f..95953eb 100644 (file)
@@ -6973,7 +6973,7 @@ static int rtw_mp_ctx(struct net_device *dev,
        stop = strncmp(extra, "stop", 4);
        sscanf(extra, "count =%d, pkt", &count);
 
-       _rtw_memset(extra, '\0', sizeof(extra));
+       _rtw_memset(extra, '\0', sizeof(*extra));
 
        if (stop == 0) {
                bStartTest = 0; /*  To set Stop */
index d3078d2..9ca3180 100644 (file)
@@ -54,6 +54,7 @@ static struct usb_device_id rtw_usb_id_tbl[] = {
        /*=== Customer ID ===*/
        /****** 8188EUS ********/
        {USB_DEVICE(0x8179, 0x07B8)}, /* Abocom - Abocom */
+       {USB_DEVICE(0x2001, 0x330F)}, /* DLink DWA-125 REV D1 */
        {}      /* Terminating entry */
 };
 
index 5bc361b..5614401 100644 (file)
@@ -37,6 +37,8 @@ rt_status SendTxCommandPacket(struct net_device *dev, void *pData, u32 DataLen)
        /* Get TCB and local buffer from common pool.
           (It is shared by CmdQ, MgntQ, and USB coalesce DataQ) */
        skb  = dev_alloc_skb(USB_HWDESC_HEADER_LEN + DataLen + 4);
+       if (!skb)
+               return RT_STATUS_FAILURE;
        memcpy((unsigned char *)(skb->cb), &dev, sizeof(dev));
        tcb_desc = (cb_desc *)(skb->cb + MAX_DEV_ADDR_SIZE);
        tcb_desc->queue_index = TXCMD_QUEUE;
index d0cf7d8..8872e0f 100644 (file)
@@ -1634,6 +1634,9 @@ int iwctl_siwencodeext(struct net_device *dev, struct iw_request_info *info,
        if (pMgmt == NULL)
                return -EFAULT;
 
+       if (!(pDevice->flags & DEVICE_FLAGS_OPENED))
+               return -ENODEV;
+
        buf = kzalloc(sizeof(struct viawget_wpa_param), GFP_KERNEL);
        if (buf == NULL)
                return -ENOMEM;
index 5369717..6f9d281 100644 (file)
@@ -1098,6 +1098,8 @@ static int device_close(struct net_device *dev)
     memset(pMgmt->abyCurrBSSID, 0, 6);
     pMgmt->eCurrState = WMAC_STATE_IDLE;
 
+       pDevice->flags &= ~DEVICE_FLAGS_OPENED;
+
     device_free_tx_bufs(pDevice);
     device_free_rx_bufs(pDevice);
     device_free_int_bufs(pDevice);
@@ -1109,7 +1111,6 @@ static int device_close(struct net_device *dev)
     usb_free_urb(pDevice->pInterruptURB);
 
     BSSvClearNodeDBTable(pDevice, 0);
-    pDevice->flags &=(~DEVICE_FLAGS_OPENED);
 
     DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO "device_close2 \n");
 
index fb743a8..14f3e85 100644 (file)
@@ -148,6 +148,8 @@ static void *s_vGetFreeContext(struct vnt_private *pDevice)
     DBG_PRT(MSG_LEVEL_DEBUG, KERN_INFO"GetFreeContext()\n");
 
     for (ii = 0; ii < pDevice->cbTD; ii++) {
+       if (!pDevice->apTD[ii])
+               return NULL;
         pContext = pDevice->apTD[ii];
         if (pContext->bBoolInUse == false) {
             pContext->bBoolInUse = true;
index 35b61f7..38e44b9 100644 (file)
@@ -753,7 +753,8 @@ static void iscsit_unmap_iovec(struct iscsi_cmd *cmd)
 
 static void iscsit_ack_from_expstatsn(struct iscsi_conn *conn, u32 exp_statsn)
 {
-       struct iscsi_cmd *cmd;
+       LIST_HEAD(ack_list);
+       struct iscsi_cmd *cmd, *cmd_p;
 
        conn->exp_statsn = exp_statsn;
 
@@ -761,19 +762,23 @@ static void iscsit_ack_from_expstatsn(struct iscsi_conn *conn, u32 exp_statsn)
                return;
 
        spin_lock_bh(&conn->cmd_lock);
-       list_for_each_entry(cmd, &conn->conn_cmd_list, i_conn_node) {
+       list_for_each_entry_safe(cmd, cmd_p, &conn->conn_cmd_list, i_conn_node) {
                spin_lock(&cmd->istate_lock);
                if ((cmd->i_state == ISTATE_SENT_STATUS) &&
                    iscsi_sna_lt(cmd->stat_sn, exp_statsn)) {
                        cmd->i_state = ISTATE_REMOVE;
                        spin_unlock(&cmd->istate_lock);
-                       iscsit_add_cmd_to_immediate_queue(cmd, conn,
-                                               cmd->i_state);
+                       list_move_tail(&cmd->i_conn_node, &ack_list);
                        continue;
                }
                spin_unlock(&cmd->istate_lock);
        }
        spin_unlock_bh(&conn->cmd_lock);
+
+       list_for_each_entry_safe(cmd, cmd_p, &ack_list, i_conn_node) {
+               list_del(&cmd->i_conn_node);
+               iscsit_free_cmd(cmd, false);
+       }
 }
 
 static int iscsit_allocate_iovecs(struct iscsi_cmd *cmd)
index 14d1aed..ef6d836 100644 (file)
@@ -1192,7 +1192,7 @@ get_target:
         */
 alloc_tags:
        tag_num = max_t(u32, ISCSIT_MIN_TAGS, queue_depth);
-       tag_num += ISCSIT_EXTRA_TAGS;
+       tag_num += (tag_num / 2) + ISCSIT_EXTRA_TAGS;
        tag_size = sizeof(struct iscsi_cmd) + conn->conn_transport->priv_size;
 
        ret = transport_alloc_session_tags(sess->se_sess, tag_num, tag_size);
index f2de28e..b0cac0c 100644 (file)
@@ -736,7 +736,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown)
                 * Fallthrough
                 */
        case ISCSI_OP_SCSI_TMFUNC:
-               rc = transport_generic_free_cmd(&cmd->se_cmd, 1);
+               rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown);
                if (!rc && shutdown && se_cmd && se_cmd->se_sess) {
                        __iscsit_free_cmd(cmd, true, shutdown);
                        target_put_sess_cmd(se_cmd->se_sess, se_cmd);
@@ -752,7 +752,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown)
                        se_cmd = &cmd->se_cmd;
                        __iscsit_free_cmd(cmd, true, shutdown);
 
-                       rc = transport_generic_free_cmd(&cmd->se_cmd, 1);
+                       rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown);
                        if (!rc && shutdown && se_cmd->se_sess) {
                                __iscsit_free_cmd(cmd, true, shutdown);
                                target_put_sess_cmd(se_cmd->se_sess, se_cmd);
index 6c17295..4714c6f 100644 (file)
@@ -349,7 +349,16 @@ static sense_reason_t compare_and_write_post(struct se_cmd *cmd)
 {
        struct se_device *dev = cmd->se_dev;
 
-       cmd->se_cmd_flags |= SCF_COMPARE_AND_WRITE_POST;
+       /*
+        * Only set SCF_COMPARE_AND_WRITE_POST to force a response fall-through
+        * within target_complete_ok_work() if the command was successfully
+        * sent to the backend driver.
+        */
+       spin_lock_irq(&cmd->t_state_lock);
+       if ((cmd->transport_state & CMD_T_SENT) && !cmd->scsi_status)
+               cmd->se_cmd_flags |= SCF_COMPARE_AND_WRITE_POST;
+       spin_unlock_irq(&cmd->t_state_lock);
+
        /*
         * Unlock ->caw_sem originally obtained during sbc_compare_and_write()
         * before the original READ I/O submission.
@@ -363,7 +372,7 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd)
 {
        struct se_device *dev = cmd->se_dev;
        struct scatterlist *write_sg = NULL, *sg;
-       unsigned char *buf, *addr;
+       unsigned char *buf = NULL, *addr;
        struct sg_mapping_iter m;
        unsigned int offset = 0, len;
        unsigned int nlbas = cmd->t_task_nolb;
@@ -378,6 +387,15 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd)
         */
        if (!cmd->t_data_sg || !cmd->t_bidi_data_sg)
                return TCM_NO_SENSE;
+       /*
+        * Immediately exit + release dev->caw_sem if command has already
+        * been failed with a non-zero SCSI status.
+        */
+       if (cmd->scsi_status) {
+               pr_err("compare_and_write_callback: non zero scsi_status:"
+                       " 0x%02x\n", cmd->scsi_status);
+               goto out;
+       }
 
        buf = kzalloc(cmd->data_length, GFP_KERNEL);
        if (!buf) {
@@ -508,6 +526,12 @@ sbc_compare_and_write(struct se_cmd *cmd)
                cmd->transport_complete_callback = NULL;
                return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
        }
+       /*
+        * Reset cmd->data_length to individual block_size in order to not
+        * confuse backend drivers that depend on this value matching the
+        * size of the I/O being submitted.
+        */
+       cmd->data_length = cmd->t_task_nolb * dev->dev_attrib.block_size;
 
        ret = cmd->execute_rw(cmd, cmd->t_bidi_data_sg, cmd->t_bidi_data_nents,
                              DMA_FROM_DEVICE);
index 84747cc..81e945e 100644 (file)
@@ -236,17 +236,24 @@ int transport_alloc_session_tags(struct se_session *se_sess,
 {
        int rc;
 
-       se_sess->sess_cmd_map = kzalloc(tag_num * tag_size, GFP_KERNEL);
+       se_sess->sess_cmd_map = kzalloc(tag_num * tag_size,
+                                       GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
        if (!se_sess->sess_cmd_map) {
-               pr_err("Unable to allocate se_sess->sess_cmd_map\n");
-               return -ENOMEM;
+               se_sess->sess_cmd_map = vzalloc(tag_num * tag_size);
+               if (!se_sess->sess_cmd_map) {
+                       pr_err("Unable to allocate se_sess->sess_cmd_map\n");
+                       return -ENOMEM;
+               }
        }
 
        rc = percpu_ida_init(&se_sess->sess_tag_pool, tag_num);
        if (rc < 0) {
                pr_err("Unable to init se_sess->sess_tag_pool,"
                        " tag_num: %u\n", tag_num);
-               kfree(se_sess->sess_cmd_map);
+               if (is_vmalloc_addr(se_sess->sess_cmd_map))
+                       vfree(se_sess->sess_cmd_map);
+               else
+                       kfree(se_sess->sess_cmd_map);
                se_sess->sess_cmd_map = NULL;
                return -ENOMEM;
        }
@@ -412,7 +419,10 @@ void transport_free_session(struct se_session *se_sess)
 {
        if (se_sess->sess_cmd_map) {
                percpu_ida_destroy(&se_sess->sess_tag_pool);
-               kfree(se_sess->sess_cmd_map);
+               if (is_vmalloc_addr(se_sess->sess_cmd_map))
+                       vfree(se_sess->sess_cmd_map);
+               else
+                       kfree(se_sess->sess_cmd_map);
        }
        kmem_cache_free(se_sess_cache, se_sess);
 }
index 4d22e7d..3da4fd1 100644 (file)
@@ -298,8 +298,8 @@ static int target_xcopy_parse_segdesc_02(struct se_cmd *se_cmd, struct xcopy_op
                (unsigned long long)xop->dst_lba);
 
        if (dc != 0) {
-               xop->dbl = (desc[29] << 16) & 0xff;
-               xop->dbl |= (desc[30] << 8) & 0xff;
+               xop->dbl = (desc[29] & 0xff) << 16;
+               xop->dbl |= (desc[30] & 0xff) << 8;
                xop->dbl |= desc[31] & 0xff;
 
                pr_debug("XCOPY seg desc 0x02: DC=1 w/ dbl: %u\n", xop->dbl);
index e61c36c..c193af6 100644 (file)
@@ -636,6 +636,7 @@ struct console xenboot_console = {
        .name           = "xenboot",
        .write          = xenboot_write_console,
        .flags          = CON_PRINTBUFFER | CON_BOOT | CON_ANYTIME,
+       .index          = -1,
 };
 #endif /* CONFIG_EARLY_PRINTK */
 
index c9a9ddd..7a744b6 100644 (file)
@@ -1758,8 +1758,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
                canon_change = (old->c_lflag ^ tty->termios.c_lflag) & ICANON;
        if (canon_change) {
                bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE);
-               ldata->line_start = 0;
-               ldata->canon_head = ldata->read_tail;
+               ldata->line_start = ldata->canon_head = ldata->read_tail;
                ldata->erasing = 0;
                ldata->lnext = 0;
        }
@@ -2184,28 +2183,34 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
 
                if (!input_available_p(tty, 0)) {
                        if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) {
-                               retval = -EIO;
-                               break;
-                       }
-                       if (tty_hung_up_p(file))
-                               break;
-                       if (!timeout)
-                               break;
-                       if (file->f_flags & O_NONBLOCK) {
-                               retval = -EAGAIN;
-                               break;
-                       }
-                       if (signal_pending(current)) {
-                               retval = -ERESTARTSYS;
-                               break;
-                       }
-                       n_tty_set_room(tty);
-                       up_read(&tty->termios_rwsem);
+                               up_read(&tty->termios_rwsem);
+                               tty_flush_to_ldisc(tty);
+                               down_read(&tty->termios_rwsem);
+                               if (!input_available_p(tty, 0)) {
+                                       retval = -EIO;
+                                       break;
+                               }
+                       } else {
+                               if (tty_hung_up_p(file))
+                                       break;
+                               if (!timeout)
+                                       break;
+                               if (file->f_flags & O_NONBLOCK) {
+                                       retval = -EAGAIN;
+                                       break;
+                               }
+                               if (signal_pending(current)) {
+                                       retval = -ERESTARTSYS;
+                                       break;
+                               }
+                               n_tty_set_room(tty);
+                               up_read(&tty->termios_rwsem);
 
-                       timeout = schedule_timeout(timeout);
+                               timeout = schedule_timeout(timeout);
 
-                       down_read(&tty->termios_rwsem);
-                       continue;
+                               down_read(&tty->termios_rwsem);
+                               continue;
+                       }
                }
                __set_current_state(TASK_RUNNING);
 
index 52379e5..44077c0 100644 (file)
@@ -667,30 +667,21 @@ static int pop_tx_x(struct eg20t_port *priv, unsigned char *buf)
 
 static int dma_push_rx(struct eg20t_port *priv, int size)
 {
-       struct tty_struct *tty;
        int room;
        struct uart_port *port = &priv->port;
        struct tty_port *tport = &port->state->port;
 
-       port = &priv->port;
-       tty = tty_port_tty_get(tport);
-       if (!tty) {
-               dev_dbg(priv->port.dev, "%s:tty is busy now", __func__);
-               return 0;
-       }
-
        room = tty_buffer_request_room(tport, size);
 
        if (room < size)
                dev_warn(port->dev, "Rx overrun: dropping %u bytes\n",
                         size - room);
        if (!room)
-               return room;
+               return 0;
 
        tty_insert_flip_string(tport, sg_virt(&priv->sg_rx), size);
 
        port->icount.rx += room;
-       tty_kref_put(tty);
 
        return room;
 }
@@ -1098,6 +1089,8 @@ static void pch_uart_err_ir(struct eg20t_port *priv, unsigned int lsr)
        if (tty == NULL) {
                for (i = 0; error_msg[i] != NULL; i++)
                        dev_err(&priv->pdev->dev, error_msg[i]);
+       } else {
+               tty_kref_put(tty);
        }
 }
 
index d0d972f..0489a2b 100644 (file)
@@ -732,7 +732,7 @@ static irqreturn_t tegra_uart_isr(int irq, void *data)
 static void tegra_uart_stop_rx(struct uart_port *u)
 {
        struct tegra_uart_port *tup = to_tegra_uport(u);
-       struct tty_struct *tty = tty_port_tty_get(&tup->uport.state->port);
+       struct tty_struct *tty;
        struct tty_port *port = &u->state->port;
        struct dma_tx_state state;
        unsigned long ier;
@@ -744,6 +744,8 @@ static void tegra_uart_stop_rx(struct uart_port *u)
        if (!tup->rx_in_progress)
                return;
 
+       tty = tty_port_tty_get(&tup->uport.state->port);
+
        tegra_uart_wait_sym_time(tup, 1); /* wait a character interval */
 
        ier = tup->ier_shadow;
index 03ba081..6fd60fe 100644 (file)
@@ -1201,6 +1201,9 @@ int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
                }
                return 0;
        case TCFLSH:
+               retval = tty_check_change(tty);
+               if (retval)
+                       return retval;
                return __tty_perform_flush(tty, arg);
        default:
                /* Try the mode commands */
index 4a851e1..77b47d8 100644 (file)
@@ -1,6 +1,6 @@
 config USB_CHIPIDEA
        tristate "ChipIdea Highspeed Dual Role Controller"
-       depends on (USB_EHCI_HCD && USB_GADGET) || (USB_EHCI_HCD && !USB_GADGET) || (!USB_EHCI_HCD && USB_GADGET)
+       depends on ((USB_EHCI_HCD && USB_GADGET) || (USB_EHCI_HCD && !USB_GADGET) || (!USB_EHCI_HCD && USB_GADGET)) && HAS_DMA
        help
          Say Y here if your system has a dual role high speed USB
          controller based on ChipIdea silicon IP. Currently, only the
index 74d998d..be822a2 100644 (file)
@@ -131,7 +131,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev)
                if (ret) {
                        dev_err(&pdev->dev, "usbmisc init failed, ret=%d\n",
                                        ret);
-                       goto err_clk;
+                       goto err_phy;
                }
        }
 
@@ -143,7 +143,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev)
                dev_err(&pdev->dev,
                        "Can't register ci_hdrc platform device, err=%d\n",
                        ret);
-               goto err_clk;
+               goto err_phy;
        }
 
        if (data->usbmisc_data) {
@@ -164,6 +164,9 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev)
 
 disable_device:
        ci_hdrc_remove_device(data->ci_pdev);
+err_phy:
+       if (data->phy)
+               usb_phy_shutdown(data->phy);
 err_clk:
        clk_disable_unprepare(data->clk);
        return ret;
index 042320a..d514332 100644 (file)
@@ -129,7 +129,12 @@ static DEFINE_PCI_DEVICE_TABLE(ci_hdrc_pci_id_table) = {
                PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0829),
                .driver_data = (kernel_ulong_t)&penwell_pci_platdata,
        },
-       { 0, 0, 0, 0, 0, 0, 0 /* end: all zeroes */ }
+       {
+               /* Intel Clovertrail */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe006),
+               .driver_data = (kernel_ulong_t)&penwell_pci_platdata,
+       },
+       { 0 } /* end: all zeroes */
 };
 MODULE_DEVICE_TABLE(pci, ci_hdrc_pci_id_table);
 
index 9462640..23763dc 100644 (file)
@@ -605,6 +605,7 @@ static int ci_hdrc_remove(struct platform_device *pdev)
        dbg_remove_files(ci);
        free_irq(ci->irq, ci);
        ci_role_destroy(ci);
+       kfree(ci->hw_bank.regmap);
 
        return 0;
 }
index 6b4c2f2..9333083 100644 (file)
@@ -1600,6 +1600,8 @@ static void destroy_eps(struct ci_hdrc *ci)
        for (i = 0; i < ci->hw_ep_max; i++) {
                struct ci_hw_ep *hwep = &ci->ci_hw_ep[i];
 
+               if (hwep->pending_td)
+                       free_pending_td(hwep);
                dma_pool_free(ci->qh_pool, hwep->qh.ptr, hwep->qh.dma);
        }
 }
@@ -1667,13 +1669,13 @@ static int ci_udc_stop(struct usb_gadget *gadget,
                if (ci->platdata->notify_event)
                        ci->platdata->notify_event(ci,
                        CI_HDRC_CONTROLLER_STOPPED_EVENT);
-               ci->driver = NULL;
                spin_unlock_irqrestore(&ci->lock, flags);
                _gadget_stop_activity(&ci->gadget);
                spin_lock_irqsave(&ci->lock, flags);
                pm_runtime_put(&ci->gadget.dev);
        }
 
+       ci->driver = NULL;
        spin_unlock_irqrestore(&ci->lock, flags);
 
        return 0;
index 737e3c1..71dc5d7 100644 (file)
@@ -742,6 +742,22 @@ static int check_ctrlrecip(struct dev_state *ps, unsigned int requesttype,
                if ((index & ~USB_DIR_IN) == 0)
                        return 0;
                ret = findintfep(ps->dev, index);
+               if (ret < 0) {
+                       /*
+                        * Some not fully compliant Win apps seem to get
+                        * index wrong and have the endpoint number here
+                        * rather than the endpoint address (with the
+                        * correct direction). Win does let this through,
+                        * so we'll not reject it here but leave it to
+                        * the device to not break KVM. But we warn.
+                        */
+                       ret = findintfep(ps->dev, index ^ 0x80);
+                       if (ret >= 0)
+                               dev_info(&ps->dev->dev,
+                                       "%s: process %i (%s) requesting ep %02x but needs %02x\n",
+                                       __func__, task_pid_nr(current),
+                                       current->comm, index, index ^ 0x80);
+               }
                if (ret >= 0)
                        ret = checkintf(ps, ret);
                break;
index dde4c83..e6b682c 100644 (file)
@@ -3426,6 +3426,9 @@ static int usb_req_set_sel(struct usb_device *udev, enum usb3_link_state state)
        unsigned long long u2_pel;
        int ret;
 
+       if (udev->state != USB_STATE_CONFIGURED)
+               return 0;
+
        /* Convert SEL and PEL stored in ns to us */
        u1_sel = DIV_ROUND_UP(udev->u1_params.sel, 1000);
        u1_pel = DIV_ROUND_UP(udev->u1_params.pel, 1000);
index 997ebe4..2e252aa 100644 (file)
@@ -29,6 +29,7 @@
 #define PCI_VENDOR_ID_SYNOPSYS         0x16c3
 #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3        0xabcd
 #define PCI_DEVICE_ID_INTEL_BYT                0x0f37
+#define PCI_DEVICE_ID_INTEL_MRFLD      0x119e
 
 struct dwc3_pci {
        struct device           *dev;
@@ -189,6 +190,7 @@ static DEFINE_PCI_DEVICE_TABLE(dwc3_pci_id_table) = {
                                PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3),
        },
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BYT), },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MRFLD), },
        {  }    /* Terminating Entry */
 };
 MODULE_DEVICE_TABLE(pci, dwc3_pci_id_table);
index 1a66c5b..44cf775 100644 (file)
@@ -1034,37 +1034,19 @@ struct ffs_sb_fill_data {
        struct ffs_file_perms perms;
        umode_t root_mode;
        const char *dev_name;
-       union {
-               /* set by ffs_fs_mount(), read by ffs_sb_fill() */
-               void *private_data;
-               /* set by ffs_sb_fill(), read by ffs_fs_mount */
-               struct ffs_data *ffs_data;
-       };
+       struct ffs_data *ffs_data;
 };
 
 static int ffs_sb_fill(struct super_block *sb, void *_data, int silent)
 {
        struct ffs_sb_fill_data *data = _data;
        struct inode    *inode;
-       struct ffs_data *ffs;
+       struct ffs_data *ffs = data->ffs_data;
 
        ENTER();
 
-       /* Initialise data */
-       ffs = ffs_data_new();
-       if (unlikely(!ffs))
-               goto Enomem;
-
        ffs->sb              = sb;
-       ffs->dev_name        = kstrdup(data->dev_name, GFP_KERNEL);
-       if (unlikely(!ffs->dev_name))
-               goto Enomem;
-       ffs->file_perms      = data->perms;
-       ffs->private_data    = data->private_data;
-
-       /* used by the caller of this function */
-       data->ffs_data       = ffs;
-
+       data->ffs_data       = NULL;
        sb->s_fs_info        = ffs;
        sb->s_blocksize      = PAGE_CACHE_SIZE;
        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
@@ -1080,17 +1062,14 @@ static int ffs_sb_fill(struct super_block *sb, void *_data, int silent)
                                  &data->perms);
        sb->s_root = d_make_root(inode);
        if (unlikely(!sb->s_root))
-               goto Enomem;
+               return -ENOMEM;
 
        /* EP0 file */
        if (unlikely(!ffs_sb_create_file(sb, "ep0", ffs,
                                         &ffs_ep0_operations, NULL)))
-               goto Enomem;
+               return -ENOMEM;
 
        return 0;
-
-Enomem:
-       return -ENOMEM;
 }
 
 static int ffs_fs_parse_opts(struct ffs_sb_fill_data *data, char *opts)
@@ -1193,6 +1172,7 @@ ffs_fs_mount(struct file_system_type *t, int flags,
        struct dentry *rv;
        int ret;
        void *ffs_dev;
+       struct ffs_data *ffs;
 
        ENTER();
 
@@ -1200,18 +1180,30 @@ ffs_fs_mount(struct file_system_type *t, int flags,
        if (unlikely(ret < 0))
                return ERR_PTR(ret);
 
+       ffs = ffs_data_new();
+       if (unlikely(!ffs))
+               return ERR_PTR(-ENOMEM);
+       ffs->file_perms = data.perms;
+
+       ffs->dev_name = kstrdup(dev_name, GFP_KERNEL);
+       if (unlikely(!ffs->dev_name)) {
+               ffs_data_put(ffs);
+               return ERR_PTR(-ENOMEM);
+       }
+
        ffs_dev = functionfs_acquire_dev_callback(dev_name);
-       if (IS_ERR(ffs_dev))
-               return ffs_dev;
+       if (IS_ERR(ffs_dev)) {
+               ffs_data_put(ffs);
+               return ERR_CAST(ffs_dev);
+       }
+       ffs->private_data = ffs_dev;
+       data.ffs_data = ffs;
 
-       data.dev_name = dev_name;
-       data.private_data = ffs_dev;
        rv = mount_nodev(t, flags, &data, ffs_sb_fill);
-
-       /* data.ffs_data is set by ffs_sb_fill */
-       if (IS_ERR(rv))
+       if (IS_ERR(rv) && data.ffs_data) {
                functionfs_release_dev_callback(data.ffs_data);
-
+               ffs_data_put(data.ffs_data);
+       }
        return rv;
 }
 
@@ -2264,6 +2256,8 @@ static int ffs_func_bind(struct usb_configuration *c,
                                   data->raw_descs + ret,
                                   (sizeof data->raw_descs) - ret,
                                   __ffs_func_bind_do_descs, func);
+               if (unlikely(ret < 0))
+                       goto error;
        }
 
        /*
index cc92074..0ac6064 100644 (file)
@@ -2054,7 +2054,7 @@ static struct pxa25x_udc memory = {
 /*
  *     probe - binds to the platform device
  */
-static int __init pxa25x_udc_probe(struct platform_device *pdev)
+static int pxa25x_udc_probe(struct platform_device *pdev)
 {
        struct pxa25x_udc *dev = &memory;
        int retval, irq;
@@ -2203,7 +2203,7 @@ static void pxa25x_udc_shutdown(struct platform_device *_dev)
        pullup_off();
 }
 
-static int __exit pxa25x_udc_remove(struct platform_device *pdev)
+static int pxa25x_udc_remove(struct platform_device *pdev)
 {
        struct pxa25x_udc *dev = platform_get_drvdata(pdev);
 
@@ -2294,7 +2294,8 @@ static int pxa25x_udc_resume(struct platform_device *dev)
 
 static struct platform_driver udc_driver = {
        .shutdown       = pxa25x_udc_shutdown,
-       .remove         = __exit_p(pxa25x_udc_remove),
+       .probe          = pxa25x_udc_probe,
+       .remove         = pxa25x_udc_remove,
        .suspend        = pxa25x_udc_suspend,
        .resume         = pxa25x_udc_resume,
        .driver         = {
@@ -2303,7 +2304,7 @@ static struct platform_driver udc_driver = {
        },
 };
 
-module_platform_driver_probe(udc_driver, pxa25x_udc_probe);
+module_platform_driver(udc_driver);
 
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_AUTHOR("Frank Becker, Robert Schwebel, David Brownell");
index 6bddf1a..a8a99e4 100644 (file)
@@ -543,7 +543,7 @@ static int s3c_hsotg_write_fifo(struct s3c_hsotg *hsotg,
         * FIFO, requests of >512 cause the endpoint to get stuck with a
         * fragment of the end of the transfer in it.
         */
-       if (can_write > 512)
+       if (can_write > 512 && !periodic)
                can_write = 512;
 
        /*
index 4449f56..f2407b2 100644 (file)
@@ -130,7 +130,7 @@ static int usb_hcd_fsl_probe(const struct hc_driver *driver,
        }
 
        /* Enable USB controller, 83xx or 8536 */
-       if (pdata->have_sysif_regs)
+       if (pdata->have_sysif_regs && pdata->controller_ver < FSL_USB_VER_1_6)
                setbits32(hcd->regs + FSL_SOC_USB_CTRL, 0x4);
 
        /* Don't need to set host mode here. It will be done by tdi_reset() */
@@ -232,15 +232,9 @@ static int ehci_fsl_setup_phy(struct usb_hcd *hcd,
        case FSL_USB2_PHY_ULPI:
                if (pdata->have_sysif_regs && pdata->controller_ver) {
                        /* controller version 1.6 or above */
+                       clrbits32(non_ehci + FSL_SOC_USB_CTRL, UTMI_PHY_EN);
                        setbits32(non_ehci + FSL_SOC_USB_CTRL,
-                                       ULPI_PHY_CLK_SEL);
-                       /*
-                        * Due to controller issue of PHY_CLK_VALID in ULPI
-                        * mode, we set USB_CTRL_USB_EN before checking
-                        * PHY_CLK_VALID, otherwise PHY_CLK_VALID doesn't work.
-                        */
-                       clrsetbits_be32(non_ehci + FSL_SOC_USB_CTRL,
-                                       UTMI_PHY_EN, USB_CTRL_USB_EN);
+                               ULPI_PHY_CLK_SEL | USB_CTRL_USB_EN);
                }
                portsc |= PORT_PTS_ULPI;
                break;
@@ -270,8 +264,9 @@ static int ehci_fsl_setup_phy(struct usb_hcd *hcd,
        if (pdata->have_sysif_regs && pdata->controller_ver &&
            (phy_mode == FSL_USB2_PHY_ULPI)) {
                /* check PHY_CLK_VALID to get phy clk valid */
-               if (!spin_event_timeout(in_be32(non_ehci + FSL_SOC_USB_CTRL) &
-                               PHY_CLK_VALID, FSL_USB_PHY_CLK_TIMEOUT, 0)) {
+               if (!(spin_event_timeout(in_be32(non_ehci + FSL_SOC_USB_CTRL) &
+                               PHY_CLK_VALID, FSL_USB_PHY_CLK_TIMEOUT, 0) ||
+                               in_be32(non_ehci + FSL_SOC_USB_PRICTRL))) {
                        printk(KERN_WARNING "fsl-ehci: USB PHY clock invalid\n");
                        return -EINVAL;
                }
index 6bd299e..854c2ec 100644 (file)
@@ -361,7 +361,7 @@ static struct pci_driver ehci_pci_driver = {
        .remove =       usb_hcd_pci_remove,
        .shutdown =     usb_hcd_pci_shutdown,
 
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_PM
        .driver =       {
                .pm =   &usb_hcd_pci_pm_ops
        },
index 60a5de5..adb01d9 100644 (file)
@@ -824,13 +824,13 @@ static int imx21_hc_urb_enqueue_isoc(struct usb_hcd *hcd,
                        i = DIV_ROUND_UP(wrap_frame(
                                        cur_frame - urb->start_frame),
                                        urb->interval);
-                       if (urb->transfer_flags & URB_ISO_ASAP) {
+
+                       /* Treat underruns as if URB_ISO_ASAP was set */
+                       if ((urb->transfer_flags & URB_ISO_ASAP) ||
+                                       i >= urb->number_of_packets) {
                                urb->start_frame = wrap_frame(urb->start_frame
                                                + i * urb->interval);
                                i = 0;
-                       } else if (i >= urb->number_of_packets) {
-                               ret = -EXDEV;
-                               goto alloc_dmem_failed;
                        }
                }
        }
index 8f6b695..604cad1 100644 (file)
@@ -216,31 +216,26 @@ static int ohci_urb_enqueue (
                        frame &= ~(ed->interval - 1);
                        frame |= ed->branch;
                        urb->start_frame = frame;
+                       ed->last_iso = frame + ed->interval * (size - 1);
                }
        } else if (ed->type == PIPE_ISOCHRONOUS) {
                u16     next = ohci_frame_no(ohci) + 1;
                u16     frame = ed->last_iso + ed->interval;
+               u16     length = ed->interval * (size - 1);
 
                /* Behind the scheduling threshold? */
                if (unlikely(tick_before(frame, next))) {
 
-                       /* USB_ISO_ASAP: Round up to the first available slot */
+                       /* URB_ISO_ASAP: Round up to the first available slot */
                        if (urb->transfer_flags & URB_ISO_ASAP) {
                                frame += (next - frame + ed->interval - 1) &
                                                -ed->interval;
 
                        /*
-                        * Not ASAP: Use the next slot in the stream.  If
-                        * the entire URB falls before the threshold, fail.
+                        * Not ASAP: Use the next slot in the stream,
+                        * no matter what.
                         */
                        } else {
-                               if (tick_before(frame + ed->interval *
-                                       (urb->number_of_packets - 1), next)) {
-                                       retval = -EXDEV;
-                                       usb_hcd_unlink_urb_from_ep(hcd, urb);
-                                       goto fail;
-                               }
-
                                /*
                                 * Some OHCI hardware doesn't handle late TDs
                                 * correctly.  After retiring them it proceeds
@@ -251,9 +246,16 @@ static int ohci_urb_enqueue (
                                urb_priv->td_cnt = DIV_ROUND_UP(
                                                (u16) (next - frame),
                                                ed->interval);
+                               if (urb_priv->td_cnt >= urb_priv->length) {
+                                       ++urb_priv->td_cnt;     /* Mark it */
+                                       ohci_dbg(ohci, "iso underrun %p (%u+%u < %u)\n",
+                                                       urb, frame, length,
+                                                       next);
+                               }
                        }
                }
                urb->start_frame = frame;
+               ed->last_iso = frame + length;
        }
 
        /* fill the TDs and link them to the ed; and
index df4a670..e7f577e 100644 (file)
@@ -41,9 +41,13 @@ finish_urb(struct ohci_hcd *ohci, struct urb *urb, int status)
 __releases(ohci->lock)
 __acquires(ohci->lock)
 {
-        struct device *dev = ohci_to_hcd(ohci)->self.controller;
+       struct device *dev = ohci_to_hcd(ohci)->self.controller;
+       struct usb_host_endpoint *ep = urb->ep;
+       struct urb_priv *urb_priv;
+
        // ASSERT (urb->hcpriv != 0);
 
+ restart:
        urb_free_priv (ohci, urb->hcpriv);
        urb->hcpriv = NULL;
        if (likely(status == -EINPROGRESS))
@@ -80,6 +84,21 @@ __acquires(ohci->lock)
                ohci->hc_control &= ~(OHCI_CTRL_PLE|OHCI_CTRL_IE);
                ohci_writel (ohci, ohci->hc_control, &ohci->regs->control);
        }
+
+       /*
+        * An isochronous URB that is sumitted too late won't have any TDs
+        * (marked by the fact that the td_cnt value is larger than the
+        * actual number of TDs).  If the next URB on this endpoint is like
+        * that, give it back now.
+        */
+       if (!list_empty(&ep->urb_list)) {
+               urb = list_first_entry(&ep->urb_list, struct urb, urb_list);
+               urb_priv = urb->hcpriv;
+               if (urb_priv->td_cnt > urb_priv->length) {
+                       status = 0;
+                       goto restart;
+               }
+       }
 }
 
 
@@ -546,7 +565,6 @@ td_fill (struct ohci_hcd *ohci, u32 info,
                td->hwCBP = cpu_to_hc32 (ohci, data & 0xFFFFF000);
                *ohci_hwPSWp(ohci, td, 0) = cpu_to_hc16 (ohci,
                                                (data & 0x0FFF) | 0xE000);
-               td->ed->last_iso = info & 0xffff;
        } else {
                td->hwCBP = cpu_to_hc32 (ohci, data);
        }
@@ -996,7 +1014,7 @@ rescan_this:
                        urb_priv->td_cnt++;
 
                        /* if URB is done, clean up */
-                       if (urb_priv->td_cnt == urb_priv->length) {
+                       if (urb_priv->td_cnt >= urb_priv->length) {
                                modified = completed = 1;
                                finish_urb(ohci, urb, 0);
                        }
@@ -1086,7 +1104,7 @@ static void takeback_td(struct ohci_hcd *ohci, struct td *td)
        urb_priv->td_cnt++;
 
        /* If all this urb's TDs are done, call complete() */
-       if (urb_priv->td_cnt == urb_priv->length)
+       if (urb_priv->td_cnt >= urb_priv->length)
                finish_urb(ohci, urb, status);
 
        /* clean schedule:  unlink EDs that are no longer busy */
index c300bd2..0f228c4 100644 (file)
@@ -293,7 +293,7 @@ static struct pci_driver uhci_pci_driver = {
        .remove =       usb_hcd_pci_remove,
        .shutdown =     uhci_shutdown,
 
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_PM
        .driver =       {
                .pm =   &usb_hcd_pci_pm_ops
        },
index 041c6dd..da6f56d 100644 (file)
@@ -1303,7 +1303,7 @@ static int uhci_submit_isochronous(struct uhci_hcd *uhci, struct urb *urb,
                }
 
                /* Fell behind? */
-               if (uhci_frame_before_eq(frame, next)) {
+               if (!uhci_frame_before_eq(next, frame)) {
 
                        /* USB_ISO_ASAP: Round up to the first available slot */
                        if (urb->transfer_flags & URB_ISO_ASAP)
@@ -1311,13 +1311,17 @@ static int uhci_submit_isochronous(struct uhci_hcd *uhci, struct urb *urb,
                                                -qh->period;
 
                        /*
-                        * Not ASAP: Use the next slot in the stream.  If
-                        * the entire URB falls before the threshold, fail.
+                        * Not ASAP: Use the next slot in the stream,
+                        * no matter what.
                         */
                        else if (!uhci_frame_before_eq(next,
                                        frame + (urb->number_of_packets - 1) *
                                                qh->period))
-                               return -EXDEV;
+                               dev_dbg(uhci_dev(uhci), "iso underrun %p (%u+%u < %u)\n",
+                                               urb, frame,
+                                               (urb->number_of_packets - 1) *
+                                                       qh->period,
+                                               next);
                }
        }
 
index fae697e..773a6b2 100644 (file)
@@ -287,7 +287,7 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend)
                if (virt_dev->eps[i].ring && virt_dev->eps[i].ring->dequeue)
                        xhci_queue_stop_endpoint(xhci, slot_id, i, suspend);
        }
-       cmd->command_trb = xhci->cmd_ring->enqueue;
+       cmd->command_trb = xhci_find_next_enqueue(xhci->cmd_ring);
        list_add_tail(&cmd->cmd_list, &virt_dev->cmd_list);
        xhci_queue_stop_endpoint(xhci, slot_id, 0, suspend);
        xhci_ring_cmd_db(xhci);
@@ -552,11 +552,15 @@ void xhci_del_comp_mod_timer(struct xhci_hcd *xhci, u32 status, u16 wIndex)
  *  - Mark a port as being done with device resume,
  *    and ring the endpoint doorbells.
  *  - Stop the Synopsys redriver Compliance Mode polling.
+ *  - Drop and reacquire the xHCI lock, in order to wait for port resume.
  */
 static u32 xhci_get_port_status(struct usb_hcd *hcd,
                struct xhci_bus_state *bus_state,
                __le32 __iomem **port_array,
-               u16 wIndex, u32 raw_port_status)
+               u16 wIndex, u32 raw_port_status,
+               unsigned long flags)
+       __releases(&xhci->lock)
+       __acquires(&xhci->lock)
 {
        struct xhci_hcd *xhci = hcd_to_xhci(hcd);
        u32 status = 0;
@@ -591,21 +595,42 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd,
                        return 0xffffffff;
                if (time_after_eq(jiffies,
                                        bus_state->resume_done[wIndex])) {
+                       int time_left;
+
                        xhci_dbg(xhci, "Resume USB2 port %d\n",
                                        wIndex + 1);
                        bus_state->resume_done[wIndex] = 0;
                        clear_bit(wIndex, &bus_state->resuming_ports);
+
+                       set_bit(wIndex, &bus_state->rexit_ports);
                        xhci_set_link_state(xhci, port_array, wIndex,
                                        XDEV_U0);
-                       xhci_dbg(xhci, "set port %d resume\n",
-                                       wIndex + 1);
-                       slot_id = xhci_find_slot_id_by_port(hcd, xhci,
-                                       wIndex + 1);
-                       if (!slot_id) {
-                               xhci_dbg(xhci, "slot_id is zero\n");
-                               return 0xffffffff;
+
+                       spin_unlock_irqrestore(&xhci->lock, flags);
+                       time_left = wait_for_completion_timeout(
+                                       &bus_state->rexit_done[wIndex],
+                                       msecs_to_jiffies(
+                                               XHCI_MAX_REXIT_TIMEOUT));
+                       spin_lock_irqsave(&xhci->lock, flags);
+
+                       if (time_left) {
+                               slot_id = xhci_find_slot_id_by_port(hcd,
+                                               xhci, wIndex + 1);
+                               if (!slot_id) {
+                                       xhci_dbg(xhci, "slot_id is zero\n");
+                                       return 0xffffffff;
+                               }
+                               xhci_ring_device(xhci, slot_id);
+                       } else {
+                               int port_status = xhci_readl(xhci,
+                                               port_array[wIndex]);
+                               xhci_warn(xhci, "Port resume took longer than %i msec, port status = 0x%x\n",
+                                               XHCI_MAX_REXIT_TIMEOUT,
+                                               port_status);
+                               status |= USB_PORT_STAT_SUSPEND;
+                               clear_bit(wIndex, &bus_state->rexit_ports);
                        }
-                       xhci_ring_device(xhci, slot_id);
+
                        bus_state->port_c_suspend |= 1 << wIndex;
                        bus_state->suspended_ports &= ~(1 << wIndex);
                } else {
@@ -728,7 +753,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                        break;
                }
                status = xhci_get_port_status(hcd, bus_state, port_array,
-                               wIndex, temp);
+                               wIndex, temp, flags);
                if (status == 0xffffffff)
                        goto error;
 
index 53b972c..83bcd13 100644 (file)
@@ -2428,6 +2428,8 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
        for (i = 0; i < USB_MAXCHILDREN; ++i) {
                xhci->bus_state[0].resume_done[i] = 0;
                xhci->bus_state[1].resume_done[i] = 0;
+               /* Only the USB 2.0 completions will ever be used. */
+               init_completion(&xhci->bus_state[1].rexit_done[i]);
        }
 
        if (scratchpad_alloc(xhci, flags))
index c2d4950..236c3aa 100644 (file)
@@ -351,7 +351,7 @@ static struct pci_driver xhci_pci_driver = {
        /* suspend and resume implemented later */
 
        .shutdown =     usb_hcd_pci_shutdown,
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_PM
        .driver = {
                .pm = &usb_hcd_pci_pm_ops
        },
index 411da1f..6bfbd80 100644 (file)
@@ -123,6 +123,16 @@ static int enqueue_is_link_trb(struct xhci_ring *ring)
        return TRB_TYPE_LINK_LE32(link->control);
 }
 
+union xhci_trb *xhci_find_next_enqueue(struct xhci_ring *ring)
+{
+       /* Enqueue pointer can be left pointing to the link TRB,
+        * we must handle that
+        */
+       if (TRB_TYPE_LINK_LE32(ring->enqueue->link.control))
+               return ring->enq_seg->next->trbs;
+       return ring->enqueue;
+}
+
 /* Updates trb to point to the next TRB in the ring, and updates seg if the next
  * TRB is in a new segment.  This does not skip over link TRBs, and it does not
  * effect the ring dequeue or enqueue pointers.
@@ -859,8 +869,12 @@ remove_finished_td:
                /* Otherwise ring the doorbell(s) to restart queued transfers */
                ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
        }
-       ep->stopped_td = NULL;
-       ep->stopped_trb = NULL;
+
+       /* Clear stopped_td and stopped_trb if endpoint is not halted */
+       if (!(ep->ep_state & EP_HALTED)) {
+               ep->stopped_td = NULL;
+               ep->stopped_trb = NULL;
+       }
 
        /*
         * Drop the lock and complete the URBs in the cancelled TD list.
@@ -1414,6 +1428,12 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
                        inc_deq(xhci, xhci->cmd_ring);
                        return;
                }
+               /* There is no command to handle if we get a stop event when the
+                * command ring is empty, event->cmd_trb points to the next
+                * unset command
+                */
+               if (xhci->cmd_ring->dequeue == xhci->cmd_ring->enqueue)
+                       return;
        }
 
        switch (le32_to_cpu(xhci->cmd_ring->dequeue->generic.field[3])
@@ -1743,6 +1763,19 @@ static void handle_port_status(struct xhci_hcd *xhci,
                }
        }
 
+       /*
+        * Check to see if xhci-hub.c is waiting on RExit to U0 transition (or
+        * RExit to a disconnect state).  If so, let the the driver know it's
+        * out of the RExit state.
+        */
+       if (!DEV_SUPERSPEED(temp) &&
+                       test_and_clear_bit(faked_port_index,
+                               &bus_state->rexit_ports)) {
+               complete(&bus_state->rexit_done[faked_port_index]);
+               bogus_port_status = true;
+               goto cleanup;
+       }
+
        if (hcd->speed != HCD_USB3)
                xhci_test_and_clear_bit(xhci, port_array, faked_port_index,
                                        PORT_PLC);
index 49b6edb..1e36dbb 100644 (file)
@@ -2598,15 +2598,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci,
        if (command) {
                cmd_completion = command->completion;
                cmd_status = &command->status;
-               command->command_trb = xhci->cmd_ring->enqueue;
-
-               /* Enqueue pointer can be left pointing to the link TRB,
-                * we must handle that
-                */
-               if (TRB_TYPE_LINK_LE32(command->command_trb->link.control))
-                       command->command_trb =
-                               xhci->cmd_ring->enq_seg->next->trbs;
-
+               command->command_trb = xhci_find_next_enqueue(xhci->cmd_ring);
                list_add_tail(&command->cmd_list, &virt_dev->cmd_list);
        } else {
                cmd_completion = &virt_dev->cmd_completion;
@@ -2614,7 +2606,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci,
        }
        init_completion(cmd_completion);
 
-       cmd_trb = xhci->cmd_ring->dequeue;
+       cmd_trb = xhci_find_next_enqueue(xhci->cmd_ring);
        if (!ctx_change)
                ret = xhci_queue_configure_endpoint(xhci, in_ctx->dma,
                                udev->slot_id, must_succeed);
@@ -3439,14 +3431,7 @@ int xhci_discover_or_reset_device(struct usb_hcd *hcd, struct usb_device *udev)
 
        /* Attempt to submit the Reset Device command to the command ring */
        spin_lock_irqsave(&xhci->lock, flags);
-       reset_device_cmd->command_trb = xhci->cmd_ring->enqueue;
-
-       /* Enqueue pointer can be left pointing to the link TRB,
-        * we must handle that
-        */
-       if (TRB_TYPE_LINK_LE32(reset_device_cmd->command_trb->link.control))
-               reset_device_cmd->command_trb =
-                       xhci->cmd_ring->enq_seg->next->trbs;
+       reset_device_cmd->command_trb = xhci_find_next_enqueue(xhci->cmd_ring);
 
        list_add_tail(&reset_device_cmd->cmd_list, &virt_dev->cmd_list);
        ret = xhci_queue_reset_device(xhci, slot_id);
@@ -3650,7 +3635,7 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev)
        union xhci_trb *cmd_trb;
 
        spin_lock_irqsave(&xhci->lock, flags);
-       cmd_trb = xhci->cmd_ring->dequeue;
+       cmd_trb = xhci_find_next_enqueue(xhci->cmd_ring);
        ret = xhci_queue_slot_control(xhci, TRB_ENABLE_SLOT, 0);
        if (ret) {
                spin_unlock_irqrestore(&xhci->lock, flags);
@@ -3785,7 +3770,7 @@ int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev)
                                slot_ctx->dev_info >> 27);
 
        spin_lock_irqsave(&xhci->lock, flags);
-       cmd_trb = xhci->cmd_ring->dequeue;
+       cmd_trb = xhci_find_next_enqueue(xhci->cmd_ring);
        ret = xhci_queue_address_device(xhci, virt_dev->in_ctx->dma,
                                        udev->slot_id);
        if (ret) {
index 46aa148..289fbfb 100644 (file)
@@ -1412,8 +1412,18 @@ struct xhci_bus_state {
        unsigned long           resume_done[USB_MAXCHILDREN];
        /* which ports have started to resume */
        unsigned long           resuming_ports;
+       /* Which ports are waiting on RExit to U0 transition. */
+       unsigned long           rexit_ports;
+       struct completion       rexit_done[USB_MAXCHILDREN];
 };
 
+
+/*
+ * It can take up to 20 ms to transition from RExit to U0 on the
+ * Intel Lynx Point LP xHCI host.
+ */
+#define        XHCI_MAX_REXIT_TIMEOUT  (20 * 1000)
+
 static inline unsigned int hcd_index(struct usb_hcd *hcd)
 {
        if (hcd->speed == HCD_USB3)
@@ -1840,6 +1850,7 @@ int xhci_cancel_cmd(struct xhci_hcd *xhci, struct xhci_command *command,
                union xhci_trb *cmd_trb);
 void xhci_ring_ep_doorbell(struct xhci_hcd *xhci, unsigned int slot_id,
                unsigned int ep_index, unsigned int stream_id);
+union xhci_trb *xhci_find_next_enqueue(struct xhci_ring *ring);
 
 /* xHCI roothub code */
 void xhci_set_link_state(struct xhci_hcd *xhci, __le32 __iomem **port_array,
index 4047cbb..bd4138d 100644 (file)
@@ -535,6 +535,9 @@ static int dsps_probe(struct platform_device *pdev)
        struct dsps_glue *glue;
        int ret;
 
+       if (!strcmp(pdev->name, "musb-hdrc"))
+               return -ENODEV;
+
        match = of_match_node(musb_dsps_of_match, pdev->dev.of_node);
        if (!match) {
                dev_err(&pdev->dev, "fail to get matching of_match struct\n");
index 9a08679..b19ed21 100644 (file)
@@ -1790,6 +1790,10 @@ int musb_gadget_setup(struct musb *musb)
        musb->g.max_speed = USB_SPEED_HIGH;
        musb->g.speed = USB_SPEED_UNKNOWN;
 
+       MUSB_DEV_MODE(musb);
+       musb->xceiv->otg->default_a = 0;
+       musb->xceiv->state = OTG_STATE_B_IDLE;
+
        /* this "gadget" abstracts/virtualizes the controller */
        musb->g.name = musb_driver_name;
        musb->g.is_otg = 1;
@@ -1849,7 +1853,6 @@ static int musb_gadget_start(struct usb_gadget *g,
        musb->gadget_driver = driver;
 
        spin_lock_irqsave(&musb->lock, flags);
-       musb->is_active = 1;
 
        otg_set_peripheral(otg, &musb->g);
        musb->xceiv->state = OTG_STATE_B_IDLE;
index b2f29c9..02799a5 100644 (file)
@@ -241,7 +241,7 @@ static int gpio_vbus_set_suspend(struct usb_phy *phy, int suspend)
 
 /* platform driver interface */
 
-static int __init gpio_vbus_probe(struct platform_device *pdev)
+static int gpio_vbus_probe(struct platform_device *pdev)
 {
        struct gpio_vbus_mach_info *pdata = dev_get_platdata(&pdev->dev);
        struct gpio_vbus_data *gpio_vbus;
@@ -349,7 +349,7 @@ err_gpio:
        return err;
 }
 
-static int __exit gpio_vbus_remove(struct platform_device *pdev)
+static int gpio_vbus_remove(struct platform_device *pdev)
 {
        struct gpio_vbus_data *gpio_vbus = platform_get_drvdata(pdev);
        struct gpio_vbus_mach_info *pdata = dev_get_platdata(&pdev->dev);
@@ -398,8 +398,6 @@ static const struct dev_pm_ops gpio_vbus_dev_pm_ops = {
 };
 #endif
 
-/* NOTE:  the gpio-vbus device may *NOT* be hotplugged */
-
 MODULE_ALIAS("platform:gpio-vbus");
 
 static struct platform_driver gpio_vbus_driver = {
@@ -410,10 +408,11 @@ static struct platform_driver gpio_vbus_driver = {
                .pm = &gpio_vbus_dev_pm_ops,
 #endif
        },
-       .remove  = __exit_p(gpio_vbus_remove),
+       .probe          = gpio_vbus_probe,
+       .remove         = gpio_vbus_remove,
 };
 
-module_platform_driver_probe(gpio_vbus_driver, gpio_vbus_probe);
+module_platform_driver(gpio_vbus_driver);
 
 MODULE_DESCRIPTION("simple GPIO controlled OTG transceiver driver");
 MODULE_AUTHOR("Philipp Zabel");
index 1cf6f12..80a7104 100644 (file)
@@ -81,6 +81,7 @@ static void option_instat_callback(struct urb *urb);
 
 #define HUAWEI_VENDOR_ID                       0x12D1
 #define HUAWEI_PRODUCT_E173                    0x140C
+#define HUAWEI_PRODUCT_E1750                   0x1406
 #define HUAWEI_PRODUCT_K4505                   0x1464
 #define HUAWEI_PRODUCT_K3765                   0x1465
 #define HUAWEI_PRODUCT_K4605                   0x14C6
@@ -567,6 +568,8 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x1c23, USB_CLASS_COMM, 0x02, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E173, 0xff, 0xff, 0xff),
                .driver_info = (kernel_ulong_t) &net_intf1_blacklist },
+       { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E1750, 0xff, 0xff, 0xff),
+               .driver_info = (kernel_ulong_t) &net_intf2_blacklist },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x1441, USB_CLASS_COMM, 0x02, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x1442, USB_CLASS_COMM, 0x02, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4505, 0xff, 0xff, 0xff),
index a9807de..4fb7a8f 100644 (file)
@@ -545,6 +545,8 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
        long npage;
        int ret = 0, prot = 0;
        uint64_t mask;
+       struct vfio_dma *dma = NULL;
+       unsigned long pfn;
 
        end = map->iova + map->size;
 
@@ -587,8 +589,6 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
        }
 
        for (iova = map->iova; iova < end; iova += size, vaddr += size) {
-               struct vfio_dma *dma = NULL;
-               unsigned long pfn;
                long i;
 
                /* Pin a contiguous chunk of memory */
@@ -597,16 +597,15 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
                if (npage <= 0) {
                        WARN_ON(!npage);
                        ret = (int)npage;
-                       break;
+                       goto out;
                }
 
                /* Verify pages are not already mapped */
                for (i = 0; i < npage; i++) {
                        if (iommu_iova_to_phys(iommu->domain,
                                               iova + (i << PAGE_SHIFT))) {
-                               vfio_unpin_pages(pfn, npage, prot, true);
                                ret = -EBUSY;
-                               break;
+                               goto out_unpin;
                        }
                }
 
@@ -616,8 +615,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
                if (ret) {
                        if (ret != -EBUSY ||
                            map_try_harder(iommu, iova, pfn, npage, prot)) {
-                               vfio_unpin_pages(pfn, npage, prot, true);
-                               break;
+                               goto out_unpin;
                        }
                }
 
@@ -672,9 +670,8 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
                        dma = kzalloc(sizeof(*dma), GFP_KERNEL);
                        if (!dma) {
                                iommu_unmap(iommu->domain, iova, size);
-                               vfio_unpin_pages(pfn, npage, prot, true);
                                ret = -ENOMEM;
-                               break;
+                               goto out_unpin;
                        }
 
                        dma->size = size;
@@ -685,16 +682,21 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
                }
        }
 
-       if (ret) {
-               struct vfio_dma *tmp;
-               iova = map->iova;
-               size = map->size;
-               while ((tmp = vfio_find_dma(iommu, iova, size))) {
-                       int r = vfio_remove_dma_overlap(iommu, iova,
-                                                       &size, tmp);
-                       if (WARN_ON(r || !size))
-                               break;
-               }
+       WARN_ON(ret);
+       mutex_unlock(&iommu->lock);
+       return ret;
+
+out_unpin:
+       vfio_unpin_pages(pfn, npage, prot, true);
+
+out:
+       iova = map->iova;
+       size = map->size;
+       while ((dma = vfio_find_dma(iommu, iova, size))) {
+               int r = vfio_remove_dma_overlap(iommu, iova,
+                                               &size, dma);
+               if (WARN_ON(r || !size))
+                       break;
        }
 
        mutex_unlock(&iommu->lock);
index 592b316..ce5221f 100644 (file)
@@ -728,7 +728,12 @@ vhost_scsi_get_tag(struct vhost_virtqueue *vq,
        }
        se_sess = tv_nexus->tvn_se_sess;
 
-       tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_KERNEL);
+       tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_ATOMIC);
+       if (tag < 0) {
+               pr_err("Unable to obtain tag for tcm_vhost_cmd\n");
+               return ERR_PTR(-ENOMEM);
+       }
+
        cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[tag];
        sg = cmd->tvc_sgl;
        pages = cmd->tvc_upages;
index 75dca19..6ac7552 100644 (file)
@@ -514,7 +514,7 @@ static int mmphw_probe(struct platform_device *pdev)
        if (IS_ERR(ctrl->clk)) {
                dev_err(ctrl->dev, "unable to get clk %s\n", mi->clk_name);
                ret = -ENOENT;
-               goto failed_get_clk;
+               goto failed;
        }
        clk_prepare_enable(ctrl->clk);
 
@@ -551,21 +551,8 @@ failed_path_init:
                path_deinit(path_plat);
        }
 
-       if (ctrl->clk) {
-               devm_clk_put(ctrl->dev, ctrl->clk);
-               clk_disable_unprepare(ctrl->clk);
-       }
-failed_get_clk:
-       devm_free_irq(ctrl->dev, ctrl->irq, ctrl);
+       clk_disable_unprepare(ctrl->clk);
 failed:
-       if (ctrl) {
-               if (ctrl->reg_base)
-                       devm_iounmap(ctrl->dev, ctrl->reg_base);
-               devm_release_mem_region(ctrl->dev, res->start,
-                               resource_size(res));
-               devm_kfree(ctrl->dev, ctrl);
-       }
-
        dev_err(&pdev->dev, "device init failed\n");
 
        return ret;
index d250ed0..27197a8 100644 (file)
@@ -620,6 +620,7 @@ static int mxsfb_restore_mode(struct mxsfb_info *host)
                break;
        case 3:
                bits_per_pixel = 32;
+               break;
        case 1:
        default:
                return -EINVAL;
index 7ef079c..c172a52 100644 (file)
@@ -2075,6 +2075,7 @@ static int neofb_probe(struct pci_dev *dev, const struct pci_device_id *id)
        if (!fb_find_mode(&info->var, info, mode_option, NULL, 0,
                        info->monspecs.modedb, 16)) {
                printk(KERN_ERR "neofb: Unable to find usable video mode.\n");
+               err = -EINVAL;
                goto err_map_video;
        }
 
@@ -2097,7 +2098,8 @@ static int neofb_probe(struct pci_dev *dev, const struct pci_device_id *id)
               info->fix.smem_len >> 10, info->var.xres,
               info->var.yres, h_sync / 1000, h_sync % 1000, v_sync);
 
-       if (fb_alloc_cmap(&info->cmap, 256, 0) < 0)
+       err = fb_alloc_cmap(&info->cmap, 256, 0);
+       if (err < 0)
                goto err_map_video;
 
        err = register_framebuffer(info);
index 171821d..ba5b40f 100644 (file)
@@ -120,7 +120,7 @@ int of_get_display_timing(struct device_node *np, const char *name,
                return -EINVAL;
        }
 
-       timing_np = of_find_node_by_name(np, name);
+       timing_np = of_get_child_by_name(np, name);
        if (!timing_np) {
                pr_err("%s: could not find node '%s'\n",
                        of_node_full_name(np), name);
@@ -143,11 +143,11 @@ struct display_timings *of_get_display_timings(struct device_node *np)
        struct display_timings *disp;
 
        if (!np) {
-               pr_err("%s: no devicenode given\n", of_node_full_name(np));
+               pr_err("%s: no device node given\n", of_node_full_name(np));
                return NULL;
        }
 
-       timings_np = of_find_node_by_name(np, "display-timings");
+       timings_np = of_get_child_by_name(np, "display-timings");
        if (!timings_np) {
                pr_err("%s: could not find display-timings node\n",
                        of_node_full_name(np));
index 6c90885..10b25e7 100644 (file)
@@ -35,6 +35,7 @@ config DISPLAY_PANEL_DPI
 
 config DISPLAY_PANEL_DSI_CM
        tristate "Generic DSI Command Mode Panel"
+       depends on BACKLIGHT_CLASS_DEVICE
        help
          Driver for generic DSI command mode panels.
 
index 1b60698..ccd9073 100644 (file)
@@ -191,7 +191,7 @@ static int tvc_probe_pdata(struct platform_device *pdev)
        in = omap_dss_find_output(pdata->source);
        if (in == NULL) {
                dev_err(&pdev->dev, "Failed to find video source\n");
-               return -ENODEV;
+               return -EPROBE_DEFER;
        }
 
        ddata->in = in;
index bc5f8ce..63d88ee 100644 (file)
@@ -263,7 +263,7 @@ static int dvic_probe_pdata(struct platform_device *pdev)
        in = omap_dss_find_output(pdata->source);
        if (in == NULL) {
                dev_err(&pdev->dev, "Failed to find video source\n");
-               return -ENODEV;
+               return -EPROBE_DEFER;
        }
 
        ddata->in = in;
index c582671..9abe2c0 100644 (file)
@@ -290,7 +290,7 @@ static int hdmic_probe_pdata(struct platform_device *pdev)
        in = omap_dss_find_output(pdata->source);
        if (in == NULL) {
                dev_err(&pdev->dev, "Failed to find video source\n");
-               return -ENODEV;
+               return -EPROBE_DEFER;
        }
 
        ddata->in = in;
index 02a7340..4779750 100644 (file)
@@ -3691,6 +3691,7 @@ static int __init omap_dispchw_probe(struct platform_device *pdev)
        }
 
        pm_runtime_enable(&pdev->dev);
+       pm_runtime_irq_safe(&pdev->dev);
 
        r = dispc_runtime_get();
        if (r)
index 47ca86c..d838ba8 100644 (file)
@@ -1336,14 +1336,7 @@ static int s3_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
                        (info->var.bits_per_pixel * info->var.xres_virtual);
        if (info->var.yres_virtual < info->var.yres) {
                dev_err(info->device, "virtual vertical size smaller than real\n");
-               goto err_find_mode;
-       }
-
-       /* maximize virtual vertical size for fast scrolling */
-       info->var.yres_virtual = info->fix.smem_len * 8 /
-                       (info->var.bits_per_pixel * info->var.xres_virtual);
-       if (info->var.yres_virtual < info->var.yres) {
-               dev_err(info->device, "virtual vertical size smaller than real\n");
+               rc = -EINVAL;
                goto err_find_mode;
        }
 
index 5be5e3d..19f3c3f 100644 (file)
@@ -802,6 +802,12 @@ static int hpwdt_init_one(struct pci_dev *dev,
                return -ENODEV;
        }
 
+       /*
+        * Ignore all auxilary iLO devices with the following PCI ID
+        */
+       if (dev->subsystem_device == 0x1979)
+               return -ENODEV;
+
        if (pci_enable_device(dev)) {
                dev_warn(&dev->dev,
                        "Not possible to enable PCI Device: 0x%x:0x%x.\n",
index 491419e..5c3d4df 100644 (file)
@@ -35,7 +35,7 @@
 #define KEMPLD_WDT_STAGE_TIMEOUT(x)    (0x1b + (x) * 4)
 #define KEMPLD_WDT_STAGE_CFG(x)                (0x18 + (x))
 #define STAGE_CFG_GET_PRESCALER(x)     (((x) & 0x30) >> 4)
-#define STAGE_CFG_SET_PRESCALER(x)     (((x) & 0x30) << 4)
+#define STAGE_CFG_SET_PRESCALER(x)     (((x) & 0x3) << 4)
 #define STAGE_CFG_PRESCALER_MASK       0x30
 #define STAGE_CFG_ACTION_MASK          0x7
 #define STAGE_CFG_ASSERT               (1 << 3)
index 1f94b42..f6caa77 100644 (file)
@@ -146,7 +146,7 @@ static const struct watchdog_ops sunxi_wdt_ops = {
        .set_timeout    = sunxi_wdt_set_timeout,
 };
 
-static int __init sunxi_wdt_probe(struct platform_device *pdev)
+static int sunxi_wdt_probe(struct platform_device *pdev)
 {
        struct sunxi_wdt_dev *sunxi_wdt;
        struct resource *res;
@@ -187,7 +187,7 @@ static int __init sunxi_wdt_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int __exit sunxi_wdt_remove(struct platform_device *pdev)
+static int sunxi_wdt_remove(struct platform_device *pdev)
 {
        struct sunxi_wdt_dev *sunxi_wdt = platform_get_drvdata(pdev);
 
index 42913f1..c9b0c62 100644 (file)
@@ -310,7 +310,8 @@ static long ts72xx_wdt_ioctl(struct file *file, unsigned int cmd,
 
        case WDIOC_GETSTATUS:
        case WDIOC_GETBOOTSTATUS:
-               return put_user(0, p);
+               error = put_user(0, p);
+               break;
 
        case WDIOC_KEEPALIVE:
                ts72xx_wdt_kick(wdt);
index a50c6e3..b232908 100644 (file)
@@ -398,8 +398,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
        if (nr_pages > ARRAY_SIZE(frame_list))
                nr_pages = ARRAY_SIZE(frame_list);
 
-       scratch_page = get_balloon_scratch_page();
-
        for (i = 0; i < nr_pages; i++) {
                page = alloc_page(gfp);
                if (page == NULL) {
@@ -413,6 +411,12 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 
                scrub_page(page);
 
+               /*
+                * Ballooned out frames are effectively replaced with
+                * a scratch frame.  Ensure direct mappings and the
+                * p2m are consistent.
+                */
+               scratch_page = get_balloon_scratch_page();
 #ifdef CONFIG_XEN_HAVE_PVMMU
                if (xen_pv_domain() && !PageHighMem(page)) {
                        ret = HYPERVISOR_update_va_mapping(
@@ -422,24 +426,19 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
                        BUG_ON(ret);
                }
 #endif
-       }
-
-       /* Ensure that ballooned highmem pages don't have kmaps. */
-       kmap_flush_unused();
-       flush_tlb_all();
-
-       /* No more mappings: invalidate P2M and add to balloon. */
-       for (i = 0; i < nr_pages; i++) {
-               pfn = mfn_to_pfn(frame_list[i]);
                if (!xen_feature(XENFEAT_auto_translated_physmap)) {
                        unsigned long p;
                        p = page_to_pfn(scratch_page);
                        __set_phys_to_machine(pfn, pfn_to_mfn(p));
                }
+               put_balloon_scratch_page();
+
                balloon_append(pfn_to_page(pfn));
        }
 
-       put_balloon_scratch_page();
+       /* Ensure that ballooned highmem pages don't have kmaps. */
+       kmap_flush_unused();
+       flush_tlb_all();
 
        set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents   = nr_pages;
index 646337d..5293003 100644 (file)
@@ -600,9 +600,6 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 
        /* lock down the parent dentry so we can peer at it */
        parent = dget_parent(dentry);
-       if (!parent->d_inode)
-               goto out_bad;
-
        dir = AFS_FS_I(parent->d_inode);
 
        /* validate the parent directory */
index 6b868f0..067e3d3 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -167,10 +167,25 @@ static int __init aio_setup(void)
 }
 __initcall(aio_setup);
 
+static void put_aio_ring_file(struct kioctx *ctx)
+{
+       struct file *aio_ring_file = ctx->aio_ring_file;
+       if (aio_ring_file) {
+               truncate_setsize(aio_ring_file->f_inode, 0);
+
+               /* Prevent further access to the kioctx from migratepages */
+               spin_lock(&aio_ring_file->f_inode->i_mapping->private_lock);
+               aio_ring_file->f_inode->i_mapping->private_data = NULL;
+               ctx->aio_ring_file = NULL;
+               spin_unlock(&aio_ring_file->f_inode->i_mapping->private_lock);
+
+               fput(aio_ring_file);
+       }
+}
+
 static void aio_free_ring(struct kioctx *ctx)
 {
        int i;
-       struct file *aio_ring_file = ctx->aio_ring_file;
 
        for (i = 0; i < ctx->nr_pages; i++) {
                pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
@@ -178,14 +193,10 @@ static void aio_free_ring(struct kioctx *ctx)
                put_page(ctx->ring_pages[i]);
        }
 
+       put_aio_ring_file(ctx);
+
        if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
                kfree(ctx->ring_pages);
-
-       if (aio_ring_file) {
-               truncate_setsize(aio_ring_file->f_inode, 0);
-               fput(aio_ring_file);
-               ctx->aio_ring_file = NULL;
-       }
 }
 
 static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
@@ -207,9 +218,8 @@ static int aio_set_page_dirty(struct page *page)
 static int aio_migratepage(struct address_space *mapping, struct page *new,
                        struct page *old, enum migrate_mode mode)
 {
-       struct kioctx *ctx = mapping->private_data;
+       struct kioctx *ctx;
        unsigned long flags;
-       unsigned idx = old->index;
        int rc;
 
        /* Writeback must be complete */
@@ -224,10 +234,23 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
 
        get_page(new);
 
-       spin_lock_irqsave(&ctx->completion_lock, flags);
-       migrate_page_copy(new, old);
-       ctx->ring_pages[idx] = new;
-       spin_unlock_irqrestore(&ctx->completion_lock, flags);
+       /* We can potentially race against kioctx teardown here.  Use the
+        * address_space's private data lock to protect the mapping's
+        * private_data.
+        */
+       spin_lock(&mapping->private_lock);
+       ctx = mapping->private_data;
+       if (ctx) {
+               pgoff_t idx;
+               spin_lock_irqsave(&ctx->completion_lock, flags);
+               migrate_page_copy(new, old);
+               idx = old->index;
+               if (idx < (pgoff_t)ctx->nr_pages)
+                       ctx->ring_pages[idx] = new;
+               spin_unlock_irqrestore(&ctx->completion_lock, flags);
+       } else
+               rc = -EBUSY;
+       spin_unlock(&mapping->private_lock);
 
        return rc;
 }
@@ -617,8 +640,7 @@ out_freepcpu:
 out_freeref:
        free_percpu(ctx->users.pcpu_count);
 out_freectx:
-       if (ctx->aio_ring_file)
-               fput(ctx->aio_ring_file);
+       put_aio_ring_file(ctx);
        kmem_cache_free(kioctx_cachep, ctx);
        pr_debug("error allocating ioctx %d\n", err);
        return ERR_PTR(err);
index 100edcc..4c94a79 100644 (file)
@@ -1413,7 +1413,7 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
  *   long file_ofs
  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
  */
-static void fill_files_note(struct memelfnote *note)
+static int fill_files_note(struct memelfnote *note)
 {
        struct vm_area_struct *vma;
        unsigned count, size, names_ofs, remaining, n;
@@ -1428,11 +1428,11 @@ static void fill_files_note(struct memelfnote *note)
        names_ofs = (2 + 3 * count) * sizeof(data[0]);
  alloc:
        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
-               goto err;
+               return -EINVAL;
        size = round_up(size, PAGE_SIZE);
        data = vmalloc(size);
        if (!data)
-               goto err;
+               return -ENOMEM;
 
        start_end_ofs = data + 2;
        name_base = name_curpos = ((char *)data) + names_ofs;
@@ -1485,7 +1485,7 @@ static void fill_files_note(struct memelfnote *note)
 
        size = name_curpos - (char *)data;
        fill_note(note, "CORE", NT_FILE, size, data);
err: ;
      return 0;
 }
 
 #ifdef CORE_DUMP_USE_REGSET
@@ -1686,8 +1686,8 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
        fill_auxv_note(&info->auxv, current->mm);
        info->size += notesize(&info->auxv);
 
-       fill_files_note(&info->files);
-       info->size += notesize(&info->files);
+       if (fill_files_note(&info->files) == 0)
+               info->size += notesize(&info->files);
 
        return 1;
 }
@@ -1719,7 +1719,8 @@ static int write_note_info(struct elf_note_info *info,
                        return 0;
                if (first && !writenote(&info->auxv, file, foffset))
                        return 0;
-               if (first && !writenote(&info->files, file, foffset))
+               if (first && info->files.data &&
+                               !writenote(&info->files, file, foffset))
                        return 0;
 
                for (i = 1; i < info->thread_notes; ++i)
@@ -1806,6 +1807,7 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
 
 struct elf_note_info {
        struct memelfnote *notes;
+       struct memelfnote *notes_files;
        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
        struct list_head thread_list;
@@ -1896,9 +1898,12 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 
        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
        fill_auxv_note(info->notes + 3, current->mm);
-       fill_files_note(info->notes + 4);
+       info->numnote = 4;
 
-       info->numnote = 5;
+       if (fill_files_note(info->notes + info->numnote) == 0) {
+               info->notes_files = info->notes + info->numnote;
+               info->numnote++;
+       }
 
        /* Try to dump the FPU. */
        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
@@ -1960,8 +1965,9 @@ static void free_note_info(struct elf_note_info *info)
                kfree(list_entry(tmp, struct elf_thread_status, list));
        }
 
-       /* Free data allocated by fill_files_note(): */
-       vfree(info->notes[4].data);
+       /* Free data possibly allocated by fill_files_note(): */
+       if (info->notes_files)
+               vfree(info->notes_files->data);
 
        kfree(info->prstatus);
        kfree(info->psinfo);
@@ -2044,7 +2050,7 @@ static int elf_core_dump(struct coredump_params *cprm)
        struct vm_area_struct *vma, *gate_vma;
        struct elfhdr *elf = NULL;
        loff_t offset = 0, dataoff, foffset;
-       struct elf_note_info info;
+       struct elf_note_info info = { };
        struct elf_phdr *phdr4note = NULL;
        struct elf_shdr *shdr4extnum = NULL;
        Elf_Half e_phnum;
index b3b20ed..ea5035d 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -917,8 +917,8 @@ void bio_copy_data(struct bio *dst, struct bio *src)
                src_p = kmap_atomic(src_bv->bv_page);
                dst_p = kmap_atomic(dst_bv->bv_page);
 
-               memcpy(dst_p + dst_bv->bv_offset,
-                      src_p + src_bv->bv_offset,
+               memcpy(dst_p + dst_offset,
+                      src_p + src_offset,
                       bytes);
 
                kunmap_atomic(dst_p);
index 58b7d14..08cc08f 100644 (file)
@@ -107,7 +107,8 @@ static void check_idle_worker(struct btrfs_worker_thread *worker)
                worker->idle = 1;
 
                /* the list may be empty if the worker is just starting */
-               if (!list_empty(&worker->worker_list)) {
+               if (!list_empty(&worker->worker_list) &&
+                   !worker->workers->stopping) {
                        list_move(&worker->worker_list,
                                 &worker->workers->idle_list);
                }
@@ -127,7 +128,8 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
                spin_lock_irqsave(&worker->workers->lock, flags);
                worker->idle = 0;
 
-               if (!list_empty(&worker->worker_list)) {
+               if (!list_empty(&worker->worker_list) &&
+                   !worker->workers->stopping) {
                        list_move_tail(&worker->worker_list,
                                      &worker->workers->worker_list);
                }
@@ -412,6 +414,7 @@ void btrfs_stop_workers(struct btrfs_workers *workers)
        int can_stop;
 
        spin_lock_irq(&workers->lock);
+       workers->stopping = 1;
        list_splice_init(&workers->idle_list, &workers->worker_list);
        while (!list_empty(&workers->worker_list)) {
                cur = workers->worker_list.next;
@@ -455,6 +458,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
        workers->ordered = 0;
        workers->atomic_start_pending = 0;
        workers->atomic_worker_start = async_helper;
+       workers->stopping = 0;
 }
 
 /*
@@ -480,15 +484,19 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
        atomic_set(&worker->num_pending, 0);
        atomic_set(&worker->refs, 1);
        worker->workers = workers;
-       worker->task = kthread_run(worker_loop, worker,
-                                  "btrfs-%s-%d", workers->name,
-                                  workers->num_workers + 1);
+       worker->task = kthread_create(worker_loop, worker,
+                                     "btrfs-%s-%d", workers->name,
+                                     workers->num_workers + 1);
        if (IS_ERR(worker->task)) {
                ret = PTR_ERR(worker->task);
-               kfree(worker);
                goto fail;
        }
+
        spin_lock_irq(&workers->lock);
+       if (workers->stopping) {
+               spin_unlock_irq(&workers->lock);
+               goto fail_kthread;
+       }
        list_add_tail(&worker->worker_list, &workers->idle_list);
        worker->idle = 1;
        workers->num_workers++;
@@ -496,8 +504,13 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
        WARN_ON(workers->num_workers_starting < 0);
        spin_unlock_irq(&workers->lock);
 
+       wake_up_process(worker->task);
        return 0;
+
+fail_kthread:
+       kthread_stop(worker->task);
 fail:
+       kfree(worker);
        spin_lock_irq(&workers->lock);
        workers->num_workers_starting--;
        spin_unlock_irq(&workers->lock);
index 063698b..1f26792 100644 (file)
@@ -107,6 +107,8 @@ struct btrfs_workers {
 
        /* extra name for this worker, used for current->name */
        char *name;
+
+       int stopping;
 };
 
 void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
index 7068168..9efb94e 100644 (file)
@@ -535,10 +535,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
        list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
 
        btrfs_rm_dev_replace_srcdev(fs_info, src_device);
-       if (src_device->bdev) {
-               /* zero out the old super */
-               btrfs_scratch_superblock(src_device);
-       }
+
        /*
         * this is again a consistent state where no dev_replace procedure
         * is running, the target device is part of the filesystem, the
index 4ae17ed..62176ad 100644 (file)
@@ -1561,8 +1561,9 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
        return ret;
 }
 
-struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
-                                             struct btrfs_key *location)
+struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
+                                    struct btrfs_key *location,
+                                    bool check_ref)
 {
        struct btrfs_root *root;
        int ret;
@@ -1586,7 +1587,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
 again:
        root = btrfs_lookup_fs_root(fs_info, location->objectid);
        if (root) {
-               if (btrfs_root_refs(&root->root_item) == 0)
+               if (check_ref && btrfs_root_refs(&root->root_item) == 0)
                        return ERR_PTR(-ENOENT);
                return root;
        }
@@ -1595,7 +1596,7 @@ again:
        if (IS_ERR(root))
                return root;
 
-       if (btrfs_root_refs(&root->root_item) == 0) {
+       if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
                ret = -ENOENT;
                goto fail;
        }
index b71acd6..5ce2a7d 100644 (file)
@@ -68,8 +68,17 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
 int btrfs_init_fs_root(struct btrfs_root *root);
 int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
                         struct btrfs_root *root);
-struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
-                                             struct btrfs_key *location);
+
+struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
+                                    struct btrfs_key *key,
+                                    bool check_ref);
+static inline struct btrfs_root *
+btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
+                          struct btrfs_key *location)
+{
+       return btrfs_get_fs_root(fs_info, location, true);
+}
+
 int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
 void btrfs_btree_balance_dirty(struct btrfs_root *root);
 void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root);
index c09a40d..51731b7 100644 (file)
@@ -145,8 +145,16 @@ int __init extent_io_init(void)
                                     offsetof(struct btrfs_io_bio, bio));
        if (!btrfs_bioset)
                goto free_buffer_cache;
+
+       if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
+               goto free_bioset;
+
        return 0;
 
+free_bioset:
+       bioset_free(btrfs_bioset);
+       btrfs_bioset = NULL;
+
 free_buffer_cache:
        kmem_cache_destroy(extent_buffer_cache);
        extent_buffer_cache = NULL;
@@ -1482,10 +1490,8 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
                cur_start = state->end + 1;
                node = rb_next(node);
                total_bytes += state->end - state->start + 1;
-               if (total_bytes >= max_bytes) {
-                       *end = *start + max_bytes - 1;
+               if (total_bytes >= max_bytes)
                        break;
-               }
                if (!node)
                        break;
        }
@@ -1614,7 +1620,7 @@ again:
                *start = delalloc_start;
                *end = delalloc_end;
                free_extent_state(cached_state);
-               return found;
+               return 0;
        }
 
        /*
@@ -1627,10 +1633,9 @@ again:
 
        /*
         * make sure to limit the number of pages we try to lock down
-        * if we're looping.
         */
-       if (delalloc_end + 1 - delalloc_start > max_bytes && loops)
-               delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1;
+       if (delalloc_end + 1 - delalloc_start > max_bytes)
+               delalloc_end = delalloc_start + max_bytes - 1;
 
        /* step two, lock all the pages after the page that has start */
        ret = lock_delalloc_pages(inode, locked_page,
@@ -1641,8 +1646,7 @@ again:
                 */
                free_extent_state(cached_state);
                if (!loops) {
-                       unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
-                       max_bytes = PAGE_CACHE_SIZE - offset;
+                       max_bytes = PAGE_CACHE_SIZE;
                        loops = 1;
                        goto again;
                } else {
index 22ebc13..b0ef7b0 100644 (file)
@@ -7986,7 +7986,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 
        /* check for collisions, even if the  name isn't there */
-       ret = btrfs_check_dir_item_collision(root, new_dir->i_ino,
+       ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
                             new_dentry->d_name.name,
                             new_dentry->d_name.len);
 
index a5a2632..4a35572 100644 (file)
@@ -588,7 +588,7 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
        else
                key.offset = (u64)-1;
 
-       return btrfs_read_fs_root_no_name(fs_info, &key);
+       return btrfs_get_fs_root(fs_info, &key, false);
 }
 
 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
index 0b1f4ef..ec71ea4 100644 (file)
@@ -299,11 +299,6 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
                        continue;
                }
 
-               if (btrfs_root_refs(&root->root_item) == 0) {
-                       btrfs_add_dead_root(root);
-                       continue;
-               }
-
                err = btrfs_init_fs_root(root);
                if (err) {
                        btrfs_free_fs_root(root);
@@ -318,6 +313,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
                        btrfs_free_fs_root(root);
                        break;
                }
+
+               if (btrfs_root_refs(&root->root_item) == 0)
+                       btrfs_add_dead_root(root);
        }
 
        btrfs_free_path(path);
index e7a9535..8c81bdc 100644 (file)
@@ -1838,11 +1838,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        assert_qgroups_uptodate(trans);
        update_super_roots(root);
 
-       if (!root->fs_info->log_root_recovering) {
-               btrfs_set_super_log_root(root->fs_info->super_copy, 0);
-               btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
-       }
-
+       btrfs_set_super_log_root(root->fs_info->super_copy, 0);
+       btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
        memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy,
               sizeof(*root->fs_info->super_copy));
 
index a106458..043b215 100644 (file)
@@ -1716,6 +1716,7 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
                                 struct btrfs_device *srcdev)
 {
        WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex));
+
        list_del_rcu(&srcdev->dev_list);
        list_del_rcu(&srcdev->dev_alloc_list);
        fs_info->fs_devices->num_devices--;
@@ -1725,9 +1726,13 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
        }
        if (srcdev->can_discard)
                fs_info->fs_devices->num_can_discard--;
-       if (srcdev->bdev)
+       if (srcdev->bdev) {
                fs_info->fs_devices->open_devices--;
 
+               /* zero out the old super */
+               btrfs_scratch_superblock(srcdev);
+       }
+
        call_rcu(&srcdev->rcu, free_device);
 }
 
index ea723a5..6d0b072 100644 (file)
@@ -132,5 +132,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "2.01"
+#define CIFS_VERSION   "2.02"
 #endif                         /* _CIFSFS_H */
index cfa14c8..52b6f6c 100644 (file)
@@ -547,9 +547,6 @@ struct TCP_Server_Info {
        unsigned int max_rw;    /* maxRw specifies the maximum */
        /* message size the server can send or receive for */
        /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */
-       unsigned int max_vcs;   /* maximum number of smb sessions, at least
-                                  those that can be specified uniquely with
-                                  vcnumbers */
        unsigned int capabilities; /* selective disabling of caps by smb sess */
        int timeAdj;  /* Adjust for difference in server time zone in sec */
        __u64 CurrentMid;         /* multiplex id - rotating counter */
@@ -715,7 +712,6 @@ struct cifs_ses {
        enum statusEnum status;
        unsigned overrideSecFlg;  /* if non-zero override global sec flags */
        __u16 ipc_tid;          /* special tid for connection to IPC share */
-       __u16 vcnum;
        char *serverOS;         /* name of operating system underlying server */
        char *serverNOS;        /* name of network operating system of server */
        char *serverDomain;     /* security realm of server */
@@ -1272,6 +1268,7 @@ struct dfs_info3_param {
 #define CIFS_FATTR_DELETE_PENDING      0x2
 #define CIFS_FATTR_NEED_REVAL          0x4
 #define CIFS_FATTR_INO_COLLISION       0x8
+#define CIFS_FATTR_UNKNOWN_NLINK       0x10
 
 struct cifs_fattr {
        u32             cf_flags;
index 948676d..a630475 100644 (file)
@@ -2652,26 +2652,7 @@ typedef struct file_xattr_info {
 } __attribute__((packed)) FILE_XATTR_INFO; /* extended attribute info
                                              level 0x205 */
 
-
-/* flags for chattr command */
-#define EXT_SECURE_DELETE              0x00000001 /* EXT3_SECRM_FL */
-#define EXT_ENABLE_UNDELETE            0x00000002 /* EXT3_UNRM_FL */
-/* Reserved for compress file 0x4 */
-#define EXT_SYNCHRONOUS                        0x00000008 /* EXT3_SYNC_FL */
-#define EXT_IMMUTABLE_FL               0x00000010 /* EXT3_IMMUTABLE_FL */
-#define EXT_OPEN_APPEND_ONLY           0x00000020 /* EXT3_APPEND_FL */
-#define EXT_DO_NOT_BACKUP              0x00000040 /* EXT3_NODUMP_FL */
-#define EXT_NO_UPDATE_ATIME            0x00000080 /* EXT3_NOATIME_FL */
-/* 0x100 through 0x800 reserved for compression flags and are GET-ONLY */
-#define EXT_HASH_TREE_INDEXED_DIR      0x00001000 /* GET-ONLY EXT3_INDEX_FL */
-/* 0x2000 reserved for IMAGIC_FL */
-#define EXT_JOURNAL_THIS_FILE  0x00004000 /* GET-ONLY EXT3_JOURNAL_DATA_FL */
-/* 0x8000 reserved for EXT3_NOTAIL_FL */
-#define EXT_SYNCHRONOUS_DIR            0x00010000 /* EXT3_DIRSYNC_FL */
-#define EXT_TOPDIR                     0x00020000 /* EXT3_TOPDIR_FL */
-
-#define EXT_SET_MASK                   0x000300FF
-#define EXT_GET_MASK                   0x0003DFFF
+/* flags for lsattr and chflags commands removed arein uapi/linux/fs.h */
 
 typedef struct file_chattr_info {
        __le64  mask; /* list of all possible attribute bits */
index a3d74fe..4baf359 100644 (file)
@@ -463,7 +463,6 @@ decode_lanman_negprot_rsp(struct TCP_Server_Info *server, NEGOTIATE_RSP *pSMBr)
                               cifs_max_pending);
        set_credits(server, server->maxReq);
        server->maxBuf = le16_to_cpu(rsp->MaxBufSize);
-       server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs);
        /* even though we do not use raw we might as well set this
        accurately, in case we ever find a need for it */
        if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) {
index eb955b5..7ddddf2 100644 (file)
@@ -3254,6 +3254,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
        /*
         * Reads as many pages as possible from fscache. Returns -ENOBUFS
         * immediately if the cookie is negative
+        *
+        * After this point, every page in the list might have PG_fscache set,
+        * so we will need to clean that up off of every page we don't use.
         */
        rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
                                         &num_pages);
@@ -3376,6 +3379,11 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
                kref_put(&rdata->refcount, cifs_readdata_release);
        }
 
+       /* Any pages that have been shown to fscache but didn't get added to
+        * the pagecache must be uncached before they get returned to the
+        * allocator.
+        */
+       cifs_fscache_readpages_cancel(mapping->host, page_list);
        return rc;
 }
 
index 2f4bc5a..b3258f3 100644 (file)
@@ -223,6 +223,13 @@ void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
                fscache_uncache_page(CIFS_I(inode)->fscache, page);
 }
 
+void __cifs_fscache_readpages_cancel(struct inode *inode, struct list_head *pages)
+{
+       cifs_dbg(FYI, "%s: (fsc: %p, i: %p)\n",
+                __func__, CIFS_I(inode)->fscache, inode);
+       fscache_readpages_cancel(CIFS_I(inode)->fscache, pages);
+}
+
 void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
 {
        struct cifsInodeInfo *cifsi = CIFS_I(inode);
index 6353932..24794b6 100644 (file)
@@ -54,6 +54,7 @@ extern int __cifs_readpages_from_fscache(struct inode *,
                                         struct address_space *,
                                         struct list_head *,
                                         unsigned *);
+extern void __cifs_fscache_readpages_cancel(struct inode *, struct list_head *);
 
 extern void __cifs_readpage_to_fscache(struct inode *, struct page *);
 
@@ -91,6 +92,13 @@ static inline void cifs_readpage_to_fscache(struct inode *inode,
                __cifs_readpage_to_fscache(inode, page);
 }
 
+static inline void cifs_fscache_readpages_cancel(struct inode *inode,
+                                                struct list_head *pages)
+{
+       if (CIFS_I(inode)->fscache)
+               return __cifs_fscache_readpages_cancel(inode, pages);
+}
+
 #else /* CONFIG_CIFS_FSCACHE */
 static inline int cifs_fscache_register(void) { return 0; }
 static inline void cifs_fscache_unregister(void) {}
@@ -131,6 +139,11 @@ static inline int cifs_readpages_from_fscache(struct inode *inode,
 static inline void cifs_readpage_to_fscache(struct inode *inode,
                        struct page *page) {}
 
+static inline void cifs_fscache_readpages_cancel(struct inode *inode,
+                                                struct list_head *pages)
+{
+}
+
 #endif /* CONFIG_CIFS_FSCACHE */
 
 #endif /* _CIFS_FSCACHE_H */
index f9ff9c1..867b7cd 100644 (file)
@@ -120,6 +120,33 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
        cifs_i->invalid_mapping = true;
 }
 
+/*
+ * copy nlink to the inode, unless it wasn't provided.  Provide
+ * sane values if we don't have an existing one and none was provided
+ */
+static void
+cifs_nlink_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
+{
+       /*
+        * if we're in a situation where we can't trust what we
+        * got from the server (readdir, some non-unix cases)
+        * fake reasonable values
+        */
+       if (fattr->cf_flags & CIFS_FATTR_UNKNOWN_NLINK) {
+               /* only provide fake values on a new inode */
+               if (inode->i_state & I_NEW) {
+                       if (fattr->cf_cifsattrs & ATTR_DIRECTORY)
+                               set_nlink(inode, 2);
+                       else
+                               set_nlink(inode, 1);
+               }
+               return;
+       }
+
+       /* we trust the server, so update it */
+       set_nlink(inode, fattr->cf_nlink);
+}
+
 /* populate an inode with info from a cifs_fattr struct */
 void
 cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
@@ -134,7 +161,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
        inode->i_mtime = fattr->cf_mtime;
        inode->i_ctime = fattr->cf_ctime;
        inode->i_rdev = fattr->cf_rdev;
-       set_nlink(inode, fattr->cf_nlink);
+       cifs_nlink_fattr_to_inode(inode, fattr);
        inode->i_uid = fattr->cf_uid;
        inode->i_gid = fattr->cf_gid;
 
@@ -541,6 +568,7 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
        fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
        fattr->cf_createtime = le64_to_cpu(info->CreationTime);
 
+       fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
        if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
                fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
                fattr->cf_dtype = DT_DIR;
@@ -548,7 +576,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
                 * Server can return wrong NumberOfLinks value for directories
                 * when Unix extensions are disabled - fake it.
                 */
-               fattr->cf_nlink = 2;
+               if (!tcon->unix_ext)
+                       fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
        } else if (fattr->cf_cifsattrs & ATTR_REPARSE) {
                fattr->cf_mode = S_IFLNK;
                fattr->cf_dtype = DT_LNK;
@@ -561,11 +590,15 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
                if (fattr->cf_cifsattrs & ATTR_READONLY)
                        fattr->cf_mode &= ~(S_IWUGO);
 
-               fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
-               if (fattr->cf_nlink < 1) {
-                       cifs_dbg(1, "replacing bogus file nlink value %u\n",
+               /*
+                * Don't accept zero nlink from non-unix servers unless
+                * delete is pending.  Instead mark it as unknown.
+                */
+               if ((fattr->cf_nlink < 1) && !tcon->unix_ext &&
+                   !info->DeletePending) {
+                       cifs_dbg(1, "bogus file nlink value %u\n",
                                fattr->cf_nlink);
-                       fattr->cf_nlink = 1;
+                       fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
                }
        }
 
index 42ef03b..53a75f3 100644 (file)
@@ -180,6 +180,9 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
                fattr->cf_dtype = DT_REG;
        }
 
+       /* non-unix readdir doesn't provide nlink */
+       fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
+
        if (fattr->cf_cifsattrs & ATTR_READONLY)
                fattr->cf_mode &= ~S_IWUGO;
 
index 5f99b7f..352358d 100644 (file)
 #include <linux/slab.h>
 #include "cifs_spnego.h"
 
-/*
- * Checks if this is the first smb session to be reconnected after
- * the socket has been reestablished (so we know whether to use vc 0).
- * Called while holding the cifs_tcp_ses_lock, so do not block
- */
-static bool is_first_ses_reconnect(struct cifs_ses *ses)
-{
-       struct list_head *tmp;
-       struct cifs_ses *tmp_ses;
-
-       list_for_each(tmp, &ses->server->smb_ses_list) {
-               tmp_ses = list_entry(tmp, struct cifs_ses,
-                                    smb_ses_list);
-               if (tmp_ses->need_reconnect == false)
-                       return false;
-       }
-       /* could not find a session that was already connected,
-          this must be the first one we are reconnecting */
-       return true;
-}
-
-/*
- *     vc number 0 is treated specially by some servers, and should be the
- *      first one we request.  After that we can use vcnumbers up to maxvcs,
- *     one for each smb session (some Windows versions set maxvcs incorrectly
- *     so maxvc=1 can be ignored).  If we have too many vcs, we can reuse
- *     any vc but zero (some servers reset the connection on vcnum zero)
- *
- */
-static __le16 get_next_vcnum(struct cifs_ses *ses)
-{
-       __u16 vcnum = 0;
-       struct list_head *tmp;
-       struct cifs_ses *tmp_ses;
-       __u16 max_vcs = ses->server->max_vcs;
-       __u16 i;
-       int free_vc_found = 0;
-
-       /* Quoting the MS-SMB specification: "Windows-based SMB servers set this
-       field to one but do not enforce this limit, which allows an SMB client
-       to establish more virtual circuits than allowed by this value ... but
-       other server implementations can enforce this limit." */
-       if (max_vcs < 2)
-               max_vcs = 0xFFFF;
-
-       spin_lock(&cifs_tcp_ses_lock);
-       if ((ses->need_reconnect) && is_first_ses_reconnect(ses))
-                       goto get_vc_num_exit;  /* vcnum will be zero */
-       for (i = ses->server->srv_count - 1; i < max_vcs; i++) {
-               if (i == 0) /* this is the only connection, use vc 0 */
-                       break;
-
-               free_vc_found = 1;
-
-               list_for_each(tmp, &ses->server->smb_ses_list) {
-                       tmp_ses = list_entry(tmp, struct cifs_ses,
-                                            smb_ses_list);
-                       if (tmp_ses->vcnum == i) {
-                               free_vc_found = 0;
-                               break; /* found duplicate, try next vcnum */
-                       }
-               }
-               if (free_vc_found)
-                       break; /* we found a vcnumber that will work - use it */
-       }
-
-       if (i == 0)
-               vcnum = 0; /* for most common case, ie if one smb session, use
-                             vc zero.  Also for case when no free vcnum, zero
-                             is safest to send (some clients only send zero) */
-       else if (free_vc_found == 0)
-               vcnum = 1;  /* we can not reuse vc=0 safely, since some servers
-                               reset all uids on that, but 1 is ok. */
-       else
-               vcnum = i;
-       ses->vcnum = vcnum;
-get_vc_num_exit:
-       spin_unlock(&cifs_tcp_ses_lock);
-
-       return cpu_to_le16(vcnum);
-}
-
 static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
 {
        __u32 capabilities = 0;
@@ -128,7 +46,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
                                        CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4,
                                        USHRT_MAX));
        pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
-       pSMB->req.VcNumber = get_next_vcnum(ses);
+       pSMB->req.VcNumber = __constant_cpu_to_le16(1);
 
        /* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
 
index 0d424d7..e274e9c 100644 (file)
@@ -2563,7 +2563,7 @@ retry:
                        break;
        }
        blk_finish_plug(&plug);
-       if (!ret && !cycled) {
+       if (!ret && !cycled && wbc->nr_to_write > 0) {
                cycled = 1;
                mpd.last_page = writeback_index - 1;
                mpd.first_page = 0;
index c081e34..03e9beb 100644 (file)
@@ -1350,6 +1350,8 @@ retry:
                                    s_min_extra_isize) {
                                        tried_min_extra_isize++;
                                        new_extra_isize = s_min_extra_isize;
+                                       kfree(is); is = NULL;
+                                       kfree(bs); bs = NULL;
                                        goto retry;
                                }
                                error = -1;
index 62b43b5..b7989f2 100644 (file)
@@ -182,6 +182,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
        struct inode *inode;
        struct dentry *parent;
        struct fuse_conn *fc;
+       struct fuse_inode *fi;
        int ret;
 
        inode = ACCESS_ONCE(entry->d_inode);
@@ -228,7 +229,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
                if (!err && !outarg.nodeid)
                        err = -ENOENT;
                if (!err) {
-                       struct fuse_inode *fi = get_fuse_inode(inode);
+                       fi = get_fuse_inode(inode);
                        if (outarg.nodeid != get_node_id(inode)) {
                                fuse_queue_forget(fc, forget, outarg.nodeid, 1);
                                goto invalid;
@@ -246,8 +247,11 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
                                       attr_version);
                fuse_change_entry_timeout(entry, &outarg);
        } else if (inode) {
-               fc = get_fuse_conn(inode);
-               if (fc->readdirplus_auto) {
+               fi = get_fuse_inode(inode);
+               if (flags & LOOKUP_RCU) {
+                       if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
+                               return -ECHILD;
+               } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
                        parent = dget_parent(entry);
                        fuse_advise_use_readdirplus(parent->d_inode);
                        dput(parent);
@@ -259,7 +263,8 @@ out:
 
 invalid:
        ret = 0;
-       if (check_submounts_and_drop(entry) != 0)
+
+       if (!(flags & LOOKUP_RCU) && check_submounts_and_drop(entry) != 0)
                ret = 1;
        goto out;
 }
@@ -1063,6 +1068,8 @@ static int fuse_access(struct inode *inode, int mask)
        struct fuse_access_in inarg;
        int err;
 
+       BUG_ON(mask & MAY_NOT_BLOCK);
+
        if (fc->no_access)
                return 0;
 
@@ -1150,9 +1157,6 @@ static int fuse_permission(struct inode *inode, int mask)
                   noticed immediately, only after the attribute
                   timeout has expired */
        } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
-               if (mask & MAY_NOT_BLOCK)
-                       return -ECHILD;
-
                err = fuse_access(inode, mask);
        } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
                if (!(inode->i_mode & S_IXUGO)) {
@@ -1291,6 +1295,8 @@ static int fuse_direntplus_link(struct file *file,
        }
 
 found:
+       if (fc->readdirplus_auto)
+               set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
        fuse_change_entry_timeout(dentry, o);
 
        err = 0;
index d409dea..4598345 100644 (file)
@@ -2467,6 +2467,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
 {
        struct fuse_file *ff = file->private_data;
        struct inode *inode = file->f_inode;
+       struct fuse_inode *fi = get_fuse_inode(inode);
        struct fuse_conn *fc = ff->fc;
        struct fuse_req *req;
        struct fuse_fallocate_in inarg = {
@@ -2484,10 +2485,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
 
        if (lock_inode) {
                mutex_lock(&inode->i_mutex);
-               if (mode & FALLOC_FL_PUNCH_HOLE)
-                       fuse_set_nowrite(inode);
+               if (mode & FALLOC_FL_PUNCH_HOLE) {
+                       loff_t endbyte = offset + length - 1;
+                       err = filemap_write_and_wait_range(inode->i_mapping,
+                                                          offset, endbyte);
+                       if (err)
+                               goto out;
+
+                       fuse_sync_writes(inode);
+               }
        }
 
+       if (!(mode & FALLOC_FL_KEEP_SIZE))
+               set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+
        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req)) {
                err = PTR_ERR(req);
@@ -2520,11 +2531,11 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
        fuse_invalidate_attr(inode);
 
 out:
-       if (lock_inode) {
-               if (mode & FALLOC_FL_PUNCH_HOLE)
-                       fuse_release_nowrite(inode);
+       if (!(mode & FALLOC_FL_KEEP_SIZE))
+               clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+
+       if (lock_inode)
                mutex_unlock(&inode->i_mutex);
-       }
 
        return err;
 }
index 5ced199..5b9e6f3 100644 (file)
@@ -115,6 +115,8 @@ struct fuse_inode {
 enum {
        /** Advise readdirplus  */
        FUSE_I_ADVISE_RDPLUS,
+       /** Initialized with readdirplus */
+       FUSE_I_INIT_RDPLUS,
        /** An operation changing file size is in progress  */
        FUSE_I_SIZE_UNSTABLE,
 };
index 854a8f0..02b0df7 100644 (file)
@@ -1458,7 +1458,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 
        trace_nfs_atomic_open_enter(dir, ctx, open_flags);
        nfs_block_sillyrename(dentry->d_parent);
-       inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
+       inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened);
        nfs_unblock_sillyrename(dentry->d_parent);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
index e5b804d..77efaf1 100644 (file)
@@ -19,6 +19,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
        struct inode *dir;
        unsigned openflags = filp->f_flags;
        struct iattr attr;
+       int opened = 0;
        int err;
 
        /*
@@ -55,7 +56,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
                nfs_wb_all(inode);
        }
 
-       inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr);
+       inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, &opened);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                switch (err) {
index 95604f6..c7c295e 100644 (file)
@@ -185,6 +185,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
        if (status)
                goto out_put;
 
+       smp_wmb();
        ds->ds_clp = clp;
        dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
 out:
@@ -801,34 +802,35 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
        struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
        struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
        struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
-
-       if (filelayout_test_devid_unavailable(devid))
-               return NULL;
+       struct nfs4_pnfs_ds *ret = ds;
 
        if (ds == NULL) {
                printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
                        __func__, ds_idx);
                filelayout_mark_devid_invalid(devid);
-               return NULL;
+               goto out;
        }
+       smp_rmb();
        if (ds->ds_clp)
-               return ds;
+               goto out_test_devid;
 
        if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
                struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
                int err;
 
                err = nfs4_ds_connect(s, ds);
-               if (err) {
+               if (err)
                        nfs4_mark_deviceid_unavailable(devid);
-                       ds = NULL;
-               }
                nfs4_clear_ds_conn_bit(ds);
        } else {
                /* Either ds is connected, or ds is NULL */
                nfs4_wait_ds_connect(ds);
        }
-       return ds;
+out_test_devid:
+       if (filelayout_test_devid_unavailable(devid))
+               ret = NULL;
+out:
+       return ret;
 }
 
 module_param(dataserver_retrans, uint, 0644);
index 989bb9d..d53d678 100644 (file)
@@ -912,6 +912,7 @@ struct nfs4_opendata {
        struct iattr attrs;
        unsigned long timestamp;
        unsigned int rpc_done : 1;
+       unsigned int file_created : 1;
        unsigned int is_recover : 1;
        int rpc_status;
        int cancelled;
@@ -1946,8 +1947,13 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 
        nfs_fattr_map_and_free_names(server, &data->f_attr);
 
-       if (o_arg->open_flags & O_CREAT)
+       if (o_arg->open_flags & O_CREAT) {
                update_changeattr(dir, &o_res->cinfo);
+               if (o_arg->open_flags & O_EXCL)
+                       data->file_created = 1;
+               else if (o_res->cinfo.before != o_res->cinfo.after)
+                       data->file_created = 1;
+       }
        if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
                server->caps &= ~NFS_CAP_POSIX_LOCK;
        if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
@@ -2191,7 +2197,8 @@ static int _nfs4_do_open(struct inode *dir,
                        struct nfs_open_context *ctx,
                        int flags,
                        struct iattr *sattr,
-                       struct nfs4_label *label)
+                       struct nfs4_label *label,
+                       int *opened)
 {
        struct nfs4_state_owner  *sp;
        struct nfs4_state     *state = NULL;
@@ -2261,6 +2268,8 @@ static int _nfs4_do_open(struct inode *dir,
                        nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel);
                }
        }
+       if (opendata->file_created)
+               *opened |= FILE_CREATED;
 
        if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server))
                *ctx_th = opendata->f_attr.mdsthreshold;
@@ -2289,7 +2298,8 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
                                        struct nfs_open_context *ctx,
                                        int flags,
                                        struct iattr *sattr,
-                                       struct nfs4_label *label)
+                                       struct nfs4_label *label,
+                                       int *opened)
 {
        struct nfs_server *server = NFS_SERVER(dir);
        struct nfs4_exception exception = { };
@@ -2297,7 +2307,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
        int status;
 
        do {
-               status = _nfs4_do_open(dir, ctx, flags, sattr, label);
+               status = _nfs4_do_open(dir, ctx, flags, sattr, label, opened);
                res = ctx->state;
                trace_nfs4_open_file(ctx, flags, status);
                if (status == 0)
@@ -2659,7 +2669,8 @@ out:
 }
 
 static struct inode *
-nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr)
+nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx,
+               int open_flags, struct iattr *attr, int *opened)
 {
        struct nfs4_state *state;
        struct nfs4_label l = {0, 0, 0, NULL}, *label = NULL;
@@ -2667,7 +2678,7 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags
        label = nfs4_label_init_security(dir, ctx->dentry, attr, &l);
 
        /* Protect against concurrent sillydeletes */
-       state = nfs4_do_open(dir, ctx, open_flags, attr, label);
+       state = nfs4_do_open(dir, ctx, open_flags, attr, label, opened);
 
        nfs4_label_release_security(label);
 
@@ -3332,6 +3343,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
        struct nfs4_label l, *ilabel = NULL;
        struct nfs_open_context *ctx;
        struct nfs4_state *state;
+       int opened = 0;
        int status = 0;
 
        ctx = alloc_nfs_open_context(dentry, FMODE_READ);
@@ -3341,7 +3353,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
        ilabel = nfs4_label_init_security(dir, dentry, sattr, &l);
 
        sattr->ia_mode &= ~current_umask();
-       state = nfs4_do_open(dir, ctx, flags, sattr, ilabel);
+       state = nfs4_do_open(dir, ctx, flags, sattr, ilabel, &opened);
        if (IS_ERR(state)) {
                status = PTR_ERR(state);
                goto out;
@@ -7564,8 +7576,10 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
 {
        int err;
        struct page *page;
-       rpc_authflavor_t flavor;
+       rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR;
        struct nfs4_secinfo_flavors *flavors;
+       struct nfs4_secinfo4 *secinfo;
+       int i;
 
        page = alloc_page(GFP_KERNEL);
        if (!page) {
@@ -7587,9 +7601,31 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
        if (err)
                goto out_freepage;
 
-       flavor = nfs_find_best_sec(flavors);
-       if (err == 0)
-               err = nfs4_lookup_root_sec(server, fhandle, info, flavor);
+       for (i = 0; i < flavors->num_flavors; i++) {
+               secinfo = &flavors->flavors[i];
+
+               switch (secinfo->flavor) {
+               case RPC_AUTH_NULL:
+               case RPC_AUTH_UNIX:
+               case RPC_AUTH_GSS:
+                       flavor = rpcauth_get_pseudoflavor(secinfo->flavor,
+                                       &secinfo->flavor_info);
+                       break;
+               default:
+                       flavor = RPC_AUTH_MAXFLAVOR;
+                       break;
+               }
+
+               if (flavor != RPC_AUTH_MAXFLAVOR) {
+                       err = nfs4_lookup_root_sec(server, fhandle,
+                                                  info, flavor);
+                       if (!err)
+                               break;
+               }
+       }
+
+       if (flavor == RPC_AUTH_MAXFLAVOR)
+               err = -EPERM;
 
 out_freepage:
        put_page(page);
index 0ba6798..da27664 100644 (file)
@@ -94,6 +94,7 @@ void nilfs_forget_buffer(struct buffer_head *bh)
        clear_buffer_nilfs_volatile(bh);
        clear_buffer_nilfs_checked(bh);
        clear_buffer_nilfs_redirected(bh);
+       clear_buffer_async_write(bh);
        clear_buffer_dirty(bh);
        if (nilfs_page_buffers_clean(page))
                __nilfs_clear_page_dirty(page);
@@ -429,6 +430,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent)
                                        "discard block %llu, size %zu",
                                        (u64)bh->b_blocknr, bh->b_size);
                        }
+                       clear_buffer_async_write(bh);
                        clear_buffer_dirty(bh);
                        clear_buffer_nilfs_volatile(bh);
                        clear_buffer_nilfs_checked(bh);
index bd88a74..9f6b486 100644 (file)
@@ -665,7 +665,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
 
                bh = head = page_buffers(page);
                do {
-                       if (!buffer_dirty(bh))
+                       if (!buffer_dirty(bh) || buffer_async_write(bh))
                                continue;
                        get_bh(bh);
                        list_add_tail(&bh->b_assoc_buffers, listp);
@@ -699,7 +699,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        bh = head = page_buffers(pvec.pages[i]);
                        do {
-                               if (buffer_dirty(bh)) {
+                               if (buffer_dirty(bh) &&
+                                               !buffer_async_write(bh)) {
                                        get_bh(bh);
                                        list_add_tail(&bh->b_assoc_buffers,
                                                      listp);
@@ -1579,6 +1580,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
 
                list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
                                    b_assoc_buffers) {
+                       set_buffer_async_write(bh);
                        if (bh->b_page != bd_page) {
                                if (bd_page) {
                                        lock_page(bd_page);
@@ -1592,6 +1594,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
 
                list_for_each_entry(bh, &segbuf->sb_payload_buffers,
                                    b_assoc_buffers) {
+                       set_buffer_async_write(bh);
                        if (bh == segbuf->sb_super_root) {
                                if (bh->b_page != bd_page) {
                                        lock_page(bd_page);
@@ -1677,6 +1680,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
        list_for_each_entry(segbuf, logs, sb_list) {
                list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
                                    b_assoc_buffers) {
+                       clear_buffer_async_write(bh);
                        if (bh->b_page != bd_page) {
                                if (bd_page)
                                        end_page_writeback(bd_page);
@@ -1686,6 +1690,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
 
                list_for_each_entry(bh, &segbuf->sb_payload_buffers,
                                    b_assoc_buffers) {
+                       clear_buffer_async_write(bh);
                        if (bh == segbuf->sb_super_root) {
                                if (bh->b_page != bd_page) {
                                        end_page_writeback(bd_page);
@@ -1755,6 +1760,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
                                    b_assoc_buffers) {
                        set_buffer_uptodate(bh);
                        clear_buffer_dirty(bh);
+                       clear_buffer_async_write(bh);
                        if (bh->b_page != bd_page) {
                                if (bd_page)
                                        end_page_writeback(bd_page);
@@ -1776,6 +1782,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
                                    b_assoc_buffers) {
                        set_buffer_uptodate(bh);
                        clear_buffer_dirty(bh);
+                       clear_buffer_async_write(bh);
                        clear_buffer_delay(bh);
                        clear_buffer_nilfs_volatile(bh);
                        clear_buffer_nilfs_redirected(bh);
index ef99972..0d3a97d 100644 (file)
@@ -70,9 +70,10 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
         */
        if (inode == NULL) {
                unsigned long gen = (unsigned long) dentry->d_fsdata;
-               unsigned long pgen =
-                       OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
-
+               unsigned long pgen;
+               spin_lock(&dentry->d_lock);
+               pgen = OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
+               spin_unlock(&dentry->d_lock);
                trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len,
                                                       dentry->d_name.name,
                                                       pgen, gen);
index 121da2d..d4e81e4 100644 (file)
@@ -1924,7 +1924,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 {
        int tmp, hangup_needed = 0;
        struct ocfs2_super *osb = NULL;
-       char nodestr[8];
+       char nodestr[12];
 
        trace_ocfs2_dismount_volume(sb);
 
index 73feacc..fd77703 100644 (file)
@@ -1163,21 +1163,6 @@ static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
        return NULL;
 }
 
-static int newer_jl_done(struct reiserfs_journal_cnode *cn)
-{
-       struct super_block *sb = cn->sb;
-       b_blocknr_t blocknr = cn->blocknr;
-
-       cn = cn->hprev;
-       while (cn) {
-               if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
-                   atomic_read(&cn->jlist->j_commit_left) != 0)
-                                   return 0;
-               cn = cn->hprev;
-       }
-       return 1;
-}
-
 static void remove_journal_hash(struct super_block *,
                                struct reiserfs_journal_cnode **,
                                struct reiserfs_journal_list *, unsigned long,
@@ -1353,7 +1338,6 @@ static int flush_journal_list(struct super_block *s,
                reiserfs_warning(s, "clm-2048", "called with wcount %d",
                                 atomic_read(&journal->j_wcount));
        }
-       BUG_ON(jl->j_trans_id == 0);
 
        /* if flushall == 0, the lock is already held */
        if (flushall) {
@@ -1593,31 +1577,6 @@ static int flush_journal_list(struct super_block *s,
        return err;
 }
 
-static int test_transaction(struct super_block *s,
-                            struct reiserfs_journal_list *jl)
-{
-       struct reiserfs_journal_cnode *cn;
-
-       if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
-               return 1;
-
-       cn = jl->j_realblock;
-       while (cn) {
-               /* if the blocknr == 0, this has been cleared from the hash,
-                ** skip it
-                */
-               if (cn->blocknr == 0) {
-                       goto next;
-               }
-               if (cn->bh && !newer_jl_done(cn))
-                       return 0;
-             next:
-               cn = cn->next;
-               cond_resched();
-       }
-       return 0;
-}
-
 static int write_one_transaction(struct super_block *s,
                                 struct reiserfs_journal_list *jl,
                                 struct buffer_chunk *chunk)
@@ -1805,6 +1764,8 @@ static int flush_used_journal_lists(struct super_block *s,
                        break;
                tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
        }
+       get_journal_list(jl);
+       get_journal_list(flush_jl);
        /* try to find a group of blocks we can flush across all the
         ** transactions, but only bother if we've actually spanned
         ** across multiple lists
@@ -1813,6 +1774,8 @@ static int flush_used_journal_lists(struct super_block *s,
                ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
        }
        flush_journal_list(s, flush_jl, 1);
+       put_journal_list(s, flush_jl);
+       put_journal_list(s, jl);
        return 0;
 }
 
@@ -3868,27 +3831,6 @@ int reiserfs_prepare_for_journal(struct super_block *sb,
        return 1;
 }
 
-static void flush_old_journal_lists(struct super_block *s)
-{
-       struct reiserfs_journal *journal = SB_JOURNAL(s);
-       struct reiserfs_journal_list *jl;
-       struct list_head *entry;
-       time_t now = get_seconds();
-
-       while (!list_empty(&journal->j_journal_list)) {
-               entry = journal->j_journal_list.next;
-               jl = JOURNAL_LIST_ENTRY(entry);
-               /* this check should always be run, to send old lists to disk */
-               if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
-                   atomic_read(&jl->j_commit_left) == 0 &&
-                   test_transaction(s, jl)) {
-                       flush_used_journal_lists(s, jl);
-               } else {
-                       break;
-               }
-       }
-}
-
 /*
 ** long and ugly.  If flush, will not return until all commit
 ** blocks and all real buffers in the trans are on disk.
@@ -4232,7 +4174,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
                        }
                }
        }
-       flush_old_journal_lists(sb);
 
        journal->j_current_jl->j_list_bitmap =
            get_list_bitmap(sb, journal->j_current_jl);
index c219e73..083dc0a 100644 (file)
@@ -94,7 +94,7 @@ retry:
 
 int fd_statfs(int fd, struct kstatfs *st)
 {
-       struct fd f = fdget(fd);
+       struct fd f = fdget_raw(fd);
        int error = -EBADF;
        if (f.file) {
                error = vfs_statfs(&f.file->f_path, st);
index 3a96c97..0225c20 100644 (file)
@@ -264,6 +264,8 @@ out_free_sb:
  */
 static inline void destroy_super(struct super_block *s)
 {
+       list_lru_destroy(&s->s_dentry_lru);
+       list_lru_destroy(&s->s_inode_lru);
 #ifdef CONFIG_SMP
        free_percpu(s->s_files);
 #endif
@@ -323,8 +325,6 @@ void deactivate_locked_super(struct super_block *s)
 
                /* caches are now gone, we can safely kill the shrinker now */
                unregister_shrinker(&s->s_shrink);
-               list_lru_destroy(&s->s_dentry_lru);
-               list_lru_destroy(&s->s_inode_lru);
 
                put_filesystem(fs);
                put_super(s);
index d0c6a00..eda1095 100644 (file)
@@ -487,6 +487,7 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_sb = sb;
        sbi->s_block_base = 0;
        sbi->s_type = FSTYPE_V7;
+       mutex_init(&sbi->s_lock);
        sb->s_fs_info = sbi;
        
        sb_set_blocksize(sb, 512);
index 7e5aae4..6eaf5ed 100644 (file)
@@ -30,18 +30,17 @@ void udf_free_inode(struct inode *inode)
 {
        struct super_block *sb = inode->i_sb;
        struct udf_sb_info *sbi = UDF_SB(sb);
+       struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb);
 
-       mutex_lock(&sbi->s_alloc_mutex);
-       if (sbi->s_lvid_bh) {
-               struct logicalVolIntegrityDescImpUse *lvidiu =
-                                                       udf_sb_lvidiu(sbi);
+       if (lvidiu) {
+               mutex_lock(&sbi->s_alloc_mutex);
                if (S_ISDIR(inode->i_mode))
                        le32_add_cpu(&lvidiu->numDirs, -1);
                else
                        le32_add_cpu(&lvidiu->numFiles, -1);
                udf_updated_lvid(sb);
+               mutex_unlock(&sbi->s_alloc_mutex);
        }
-       mutex_unlock(&sbi->s_alloc_mutex);
 
        udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1);
 }
@@ -55,6 +54,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
        uint32_t start = UDF_I(dir)->i_location.logicalBlockNum;
        struct udf_inode_info *iinfo;
        struct udf_inode_info *dinfo = UDF_I(dir);
+       struct logicalVolIntegrityDescImpUse *lvidiu;
 
        inode = new_inode(sb);
 
@@ -92,12 +92,10 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
                return NULL;
        }
 
-       if (sbi->s_lvid_bh) {
-               struct logicalVolIntegrityDescImpUse *lvidiu;
-
+       lvidiu = udf_sb_lvidiu(sb);
+       if (lvidiu) {
                iinfo->i_unique = lvid_get_unique_id(sb);
                mutex_lock(&sbi->s_alloc_mutex);
-               lvidiu = udf_sb_lvidiu(sbi);
                if (S_ISDIR(mode))
                        le32_add_cpu(&lvidiu->numDirs, 1);
                else
index 839a2ba..9121938 100644 (file)
@@ -94,13 +94,25 @@ static unsigned int udf_count_free(struct super_block *);
 static int udf_statfs(struct dentry *, struct kstatfs *);
 static int udf_show_options(struct seq_file *, struct dentry *);
 
-struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
+struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb)
 {
-       struct logicalVolIntegrityDesc *lvid =
-               (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data;
-       __u32 number_of_partitions = le32_to_cpu(lvid->numOfPartitions);
-       __u32 offset = number_of_partitions * 2 *
-                               sizeof(uint32_t)/sizeof(uint8_t);
+       struct logicalVolIntegrityDesc *lvid;
+       unsigned int partnum;
+       unsigned int offset;
+
+       if (!UDF_SB(sb)->s_lvid_bh)
+               return NULL;
+       lvid = (struct logicalVolIntegrityDesc *)UDF_SB(sb)->s_lvid_bh->b_data;
+       partnum = le32_to_cpu(lvid->numOfPartitions);
+       if ((sb->s_blocksize - sizeof(struct logicalVolIntegrityDescImpUse) -
+            offsetof(struct logicalVolIntegrityDesc, impUse)) /
+            (2 * sizeof(uint32_t)) < partnum) {
+               udf_err(sb, "Logical volume integrity descriptor corrupted "
+                       "(numOfPartitions = %u)!\n", partnum);
+               return NULL;
+       }
+       /* The offset is to skip freeSpaceTable and sizeTable arrays */
+       offset = partnum * 2 * sizeof(uint32_t);
        return (struct logicalVolIntegrityDescImpUse *)&(lvid->impUse[offset]);
 }
 
@@ -629,9 +641,10 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
        struct udf_options uopt;
        struct udf_sb_info *sbi = UDF_SB(sb);
        int error = 0;
+       struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb);
 
-       if (sbi->s_lvid_bh) {
-               int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
+       if (lvidiu) {
+               int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev);
                if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY))
                        return -EACCES;
        }
@@ -1905,11 +1918,12 @@ static void udf_open_lvid(struct super_block *sb)
 
        if (!bh)
                return;
-
-       mutex_lock(&sbi->s_alloc_mutex);
        lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
-       lvidiu = udf_sb_lvidiu(sbi);
+       lvidiu = udf_sb_lvidiu(sb);
+       if (!lvidiu)
+               return;
 
+       mutex_lock(&sbi->s_alloc_mutex);
        lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
        lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
        udf_time_to_disk_stamp(&lvid->recordingDateAndTime,
@@ -1937,10 +1951,12 @@ static void udf_close_lvid(struct super_block *sb)
 
        if (!bh)
                return;
+       lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
+       lvidiu = udf_sb_lvidiu(sb);
+       if (!lvidiu)
+               return;
 
        mutex_lock(&sbi->s_alloc_mutex);
-       lvid = (struct logicalVolIntegrityDesc *)bh->b_data;
-       lvidiu = udf_sb_lvidiu(sbi);
        lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
        lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
        udf_time_to_disk_stamp(&lvid->recordingDateAndTime, CURRENT_TIME);
@@ -2093,15 +2109,19 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 
        if (sbi->s_lvid_bh) {
                struct logicalVolIntegrityDescImpUse *lvidiu =
-                                                       udf_sb_lvidiu(sbi);
-               uint16_t minUDFReadRev = le16_to_cpu(lvidiu->minUDFReadRev);
-               uint16_t minUDFWriteRev = le16_to_cpu(lvidiu->minUDFWriteRev);
-               /* uint16_t maxUDFWriteRev =
-                               le16_to_cpu(lvidiu->maxUDFWriteRev); */
+                                                       udf_sb_lvidiu(sb);
+               uint16_t minUDFReadRev;
+               uint16_t minUDFWriteRev;
 
+               if (!lvidiu) {
+                       ret = -EINVAL;
+                       goto error_out;
+               }
+               minUDFReadRev = le16_to_cpu(lvidiu->minUDFReadRev);
+               minUDFWriteRev = le16_to_cpu(lvidiu->minUDFWriteRev);
                if (minUDFReadRev > UDF_MAX_READ_VERSION) {
                        udf_err(sb, "minUDFReadRev=%x (max is %x)\n",
-                               le16_to_cpu(lvidiu->minUDFReadRev),
+                               minUDFReadRev,
                                UDF_MAX_READ_VERSION);
                        ret = -EINVAL;
                        goto error_out;
@@ -2265,11 +2285,7 @@ static int udf_statfs(struct dentry *dentry, struct kstatfs *buf)
        struct logicalVolIntegrityDescImpUse *lvidiu;
        u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
 
-       if (sbi->s_lvid_bh != NULL)
-               lvidiu = udf_sb_lvidiu(sbi);
-       else
-               lvidiu = NULL;
-
+       lvidiu = udf_sb_lvidiu(sb);
        buf->f_type = UDF_SUPER_MAGIC;
        buf->f_bsize = sb->s_blocksize;
        buf->f_blocks = sbi->s_partmaps[sbi->s_partition].s_partition_len;
index ed401e9..1f32c7b 100644 (file)
@@ -162,7 +162,7 @@ static inline struct udf_sb_info *UDF_SB(struct super_block *sb)
        return sb->s_fs_info;
 }
 
-struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi);
+struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb);
 
 int udf_compute_nr_groups(struct super_block *sb, u32 partition);
 
index 88c5ea7..f1d85cf 100644 (file)
@@ -628,6 +628,7 @@ xfs_buf_item_unlock(
                else if (aborted) {
                        ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
                        if (lip->li_flags & XFS_LI_IN_AIL) {
+                               spin_lock(&lip->li_ailp->xa_lock);
                                xfs_trans_ail_delete(lip->li_ailp, lip,
                                                     SHUTDOWN_LOG_IO_ERROR);
                        }
index 069537c..20bf8e8 100644 (file)
@@ -1224,6 +1224,7 @@ xfs_da3_node_toosmall(
        /* start with smaller blk num */
        forward = nodehdr.forw < nodehdr.back;
        for (i = 0; i < 2; forward = !forward, i++) {
+               struct xfs_da3_icnode_hdr thdr;
                if (forward)
                        blkno = nodehdr.forw;
                else
@@ -1236,10 +1237,10 @@ xfs_da3_node_toosmall(
                        return(error);
 
                node = bp->b_addr;
-               xfs_da3_node_hdr_from_disk(&nodehdr, node);
+               xfs_da3_node_hdr_from_disk(&thdr, node);
                xfs_trans_brelse(state->args->trans, bp);
 
-               if (count - nodehdr.count >= 0)
+               if (count - thdr.count >= 0)
                        break;  /* fits with at least 25% to spare */
        }
        if (i >= 2) {
index 0957aa9..12dad18 100644 (file)
@@ -1158,7 +1158,7 @@ xfs_dir2_sf_to_block(
        /*
         * Create entry for .
         */
-       dep = xfs_dir3_data_dot_entry_p(hdr);
+       dep = xfs_dir3_data_dot_entry_p(mp, hdr);
        dep->inumber = cpu_to_be64(dp->i_ino);
        dep->namelen = 1;
        dep->name[0] = '.';
@@ -1172,7 +1172,7 @@ xfs_dir2_sf_to_block(
        /*
         * Create entry for ..
         */
-       dep = xfs_dir3_data_dotdot_entry_p(hdr);
+       dep = xfs_dir3_data_dotdot_entry_p(mp, hdr);
        dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp));
        dep->namelen = 2;
        dep->name[0] = dep->name[1] = '.';
@@ -1183,7 +1183,7 @@ xfs_dir2_sf_to_block(
        blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
        blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
                                (char *)dep - (char *)hdr));
-       offset = xfs_dir3_data_first_offset(hdr);
+       offset = xfs_dir3_data_first_offset(mp);
        /*
         * Loop over existing entries, stuff them in.
         */
index a0961a6..9cf6738 100644 (file)
@@ -497,69 +497,58 @@ xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr)
 /*
  * Offsets of . and .. in data space (always block 0)
  *
- * The macros are used for shortform directories as they have no headers to read
- * the magic number out of. Shortform directories need to know the size of the
- * data block header because the sfe embeds the block offset of the entry into
- * it so that it doesn't change when format conversion occurs. Bad Things Happen
- * if we don't follow this rule.
- *
  * XXX: there is scope for significant optimisation of the logic here. Right
  * now we are checking for "dir3 format" over and over again. Ideally we should
  * only do it once for each operation.
  */
-#define        XFS_DIR3_DATA_DOT_OFFSET(mp)    \
-       xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&(mp)->m_sb))
-#define        XFS_DIR3_DATA_DOTDOT_OFFSET(mp) \
-       (XFS_DIR3_DATA_DOT_OFFSET(mp) + xfs_dir3_data_entsize(mp, 1))
-#define        XFS_DIR3_DATA_FIRST_OFFSET(mp)          \
-       (XFS_DIR3_DATA_DOTDOT_OFFSET(mp) + xfs_dir3_data_entsize(mp, 2))
-
 static inline xfs_dir2_data_aoff_t
-xfs_dir3_data_dot_offset(struct xfs_dir2_data_hdr *hdr)
+xfs_dir3_data_dot_offset(struct xfs_mount *mp)
 {
-       return xfs_dir3_data_entry_offset(hdr);
+       return xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&mp->m_sb));
 }
 
 static inline xfs_dir2_data_aoff_t
-xfs_dir3_data_dotdot_offset(struct xfs_dir2_data_hdr *hdr)
+xfs_dir3_data_dotdot_offset(struct xfs_mount *mp)
 {
-       bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
-                   hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
-       return xfs_dir3_data_dot_offset(hdr) +
-               __xfs_dir3_data_entsize(dir3, 1);
+       return xfs_dir3_data_dot_offset(mp) +
+               xfs_dir3_data_entsize(mp, 1);
 }
 
 static inline xfs_dir2_data_aoff_t
-xfs_dir3_data_first_offset(struct xfs_dir2_data_hdr *hdr)
+xfs_dir3_data_first_offset(struct xfs_mount *mp)
 {
-       bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
-                   hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
-       return xfs_dir3_data_dotdot_offset(hdr) +
-               __xfs_dir3_data_entsize(dir3, 2);
+       return xfs_dir3_data_dotdot_offset(mp) +
+               xfs_dir3_data_entsize(mp, 2);
 }
 
 /*
  * location of . and .. in data space (always block 0)
  */
 static inline struct xfs_dir2_data_entry *
-xfs_dir3_data_dot_entry_p(struct xfs_dir2_data_hdr *hdr)
+xfs_dir3_data_dot_entry_p(
+       struct xfs_mount        *mp,
+       struct xfs_dir2_data_hdr *hdr)
 {
        return (struct xfs_dir2_data_entry *)
-               ((char *)hdr + xfs_dir3_data_dot_offset(hdr));
+               ((char *)hdr + xfs_dir3_data_dot_offset(mp));
 }
 
 static inline struct xfs_dir2_data_entry *
-xfs_dir3_data_dotdot_entry_p(struct xfs_dir2_data_hdr *hdr)
+xfs_dir3_data_dotdot_entry_p(
+       struct xfs_mount        *mp,
+       struct xfs_dir2_data_hdr *hdr)
 {
        return (struct xfs_dir2_data_entry *)
-               ((char *)hdr + xfs_dir3_data_dotdot_offset(hdr));
+               ((char *)hdr + xfs_dir3_data_dotdot_offset(mp));
 }
 
 static inline struct xfs_dir2_data_entry *
-xfs_dir3_data_first_entry_p(struct xfs_dir2_data_hdr *hdr)
+xfs_dir3_data_first_entry_p(
+       struct xfs_mount        *mp,
+       struct xfs_dir2_data_hdr *hdr)
 {
        return (struct xfs_dir2_data_entry *)
-               ((char *)hdr + xfs_dir3_data_first_offset(hdr));
+               ((char *)hdr + xfs_dir3_data_first_offset(mp));
 }
 
 /*
index 8993ec1..8f84153 100644 (file)
@@ -119,9 +119,9 @@ xfs_dir2_sf_getdents(
         * mp->m_dirdatablk.
         */
        dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
-                                            XFS_DIR3_DATA_DOT_OFFSET(mp));
+                                            xfs_dir3_data_dot_offset(mp));
        dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
-                                               XFS_DIR3_DATA_DOTDOT_OFFSET(mp));
+                                               xfs_dir3_data_dotdot_offset(mp));
 
        /*
         * Put . entry unless we're starting past it.
index bb6e284..3ef6d40 100644 (file)
@@ -557,7 +557,7 @@ xfs_dir2_sf_addname_hard(
         * to insert the new entry.
         * If it's going to end up at the end then oldsfep will point there.
         */
-       for (offset = XFS_DIR3_DATA_FIRST_OFFSET(mp),
+       for (offset = xfs_dir3_data_first_offset(mp),
              oldsfep = xfs_dir2_sf_firstentry(oldsfp),
              add_datasize = xfs_dir3_data_entsize(mp, args->namelen),
              eof = (char *)oldsfep == &buf[old_isize];
@@ -640,7 +640,7 @@ xfs_dir2_sf_addname_pick(
 
        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
        size = xfs_dir3_data_entsize(mp, args->namelen);
-       offset = XFS_DIR3_DATA_FIRST_OFFSET(mp);
+       offset = xfs_dir3_data_first_offset(mp);
        sfep = xfs_dir2_sf_firstentry(sfp);
        holefit = 0;
        /*
@@ -713,7 +713,7 @@ xfs_dir2_sf_check(
        mp = dp->i_mount;
 
        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-       offset = XFS_DIR3_DATA_FIRST_OFFSET(mp);
+       offset = xfs_dir3_data_first_offset(mp);
        ino = xfs_dir2_sf_get_parent_ino(sfp);
        i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
 
index 71520e6..1ee776d 100644 (file)
@@ -64,7 +64,8 @@ int xfs_dqerror_mod = 33;
 struct kmem_zone               *xfs_qm_dqtrxzone;
 static struct kmem_zone                *xfs_qm_dqzone;
 
-static struct lock_class_key xfs_dquot_other_class;
+static struct lock_class_key xfs_dquot_group_class;
+static struct lock_class_key xfs_dquot_project_class;
 
 /*
  * This is called to free all the memory associated with a dquot
@@ -703,8 +704,20 @@ xfs_qm_dqread(
         * Make sure group quotas have a different lock class than user
         * quotas.
         */
-       if (!(type & XFS_DQ_USER))
-               lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
+       switch (type) {
+       case XFS_DQ_USER:
+               /* uses the default lock class */
+               break;
+       case XFS_DQ_GROUP:
+               lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class);
+               break;
+       case XFS_DQ_PROJ:
+               lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class);
+               break;
+       default:
+               ASSERT(0);
+               break;
+       }
 
        XFS_STATS_INC(xs_qm_dquot);
 
index 1edb5cc..18272c7 100644 (file)
@@ -515,7 +515,7 @@ typedef struct xfs_swapext
 /*     XFS_IOC_GETBIOSIZE ---- deprecated 47      */
 #define XFS_IOC_GETBMAPX       _IOWR('X', 56, struct getbmap)
 #define XFS_IOC_ZERO_RANGE     _IOW ('X', 57, struct xfs_flock64)
-#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_eofblocks)
+#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks)
 
 /*
  * ioctl commands that replace IRIX syssgi()'s
index 193206b..474807a 100644 (file)
@@ -119,11 +119,6 @@ xfs_inode_free(
                ip->i_itemp = NULL;
        }
 
-       /* asserts to verify all state is correct here */
-       ASSERT(atomic_read(&ip->i_pincount) == 0);
-       ASSERT(!spin_is_locked(&ip->i_flags_lock));
-       ASSERT(!xfs_isiflocked(ip));
-
        /*
         * Because we use RCU freeing we need to ensure the inode always
         * appears to be reclaimed with an invalid inode number when in the
@@ -135,6 +130,10 @@ xfs_inode_free(
        ip->i_ino = 0;
        spin_unlock(&ip->i_flags_lock);
 
+       /* asserts to verify all state is correct here */
+       ASSERT(atomic_read(&ip->i_pincount) == 0);
+       ASSERT(!xfs_isiflocked(ip));
+
        call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
 }
 
index dabda95..3979749 100644 (file)
@@ -1585,6 +1585,7 @@ xlog_recover_add_to_trans(
                "bad number of regions (%d) in inode log format",
                                  in_f->ilf_size);
                        ASSERT(0);
+                       kmem_free(ptr);
                        return XFS_ERROR(EIO);
                }
 
@@ -1970,6 +1971,13 @@ xlog_recover_do_inode_buffer(
  * magic number.  If we don't recognise the magic number in the buffer, then
  * return a LSN of -1 so that the caller knows it was an unrecognised block and
  * so can recover the buffer.
+ *
+ * Note: we cannot rely solely on magic number matches to determine that the
+ * buffer has a valid LSN - we also need to verify that it belongs to this
+ * filesystem, so we need to extract the object's LSN and compare it to that
+ * which we read from the superblock. If the UUIDs don't match, then we've got a
+ * stale metadata block from an old filesystem instance that we need to recover
+ * over the top of.
  */
 static xfs_lsn_t
 xlog_recover_get_buf_lsn(
@@ -1980,6 +1988,8 @@ xlog_recover_get_buf_lsn(
        __uint16_t              magic16;
        __uint16_t              magicda;
        void                    *blk = bp->b_addr;
+       uuid_t                  *uuid;
+       xfs_lsn_t               lsn = -1;
 
        /* v4 filesystems always recover immediately */
        if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -1992,43 +2002,79 @@ xlog_recover_get_buf_lsn(
        case XFS_ABTB_MAGIC:
        case XFS_ABTC_MAGIC:
        case XFS_IBT_CRC_MAGIC:
-       case XFS_IBT_MAGIC:
-               return be64_to_cpu(
-                               ((struct xfs_btree_block *)blk)->bb_u.s.bb_lsn);
+       case XFS_IBT_MAGIC: {
+               struct xfs_btree_block *btb = blk;
+
+               lsn = be64_to_cpu(btb->bb_u.s.bb_lsn);
+               uuid = &btb->bb_u.s.bb_uuid;
+               break;
+       }
        case XFS_BMAP_CRC_MAGIC:
-       case XFS_BMAP_MAGIC:
-               return be64_to_cpu(
-                               ((struct xfs_btree_block *)blk)->bb_u.l.bb_lsn);
+       case XFS_BMAP_MAGIC: {
+               struct xfs_btree_block *btb = blk;
+
+               lsn = be64_to_cpu(btb->bb_u.l.bb_lsn);
+               uuid = &btb->bb_u.l.bb_uuid;
+               break;
+       }
        case XFS_AGF_MAGIC:
-               return be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
+               lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
+               uuid = &((struct xfs_agf *)blk)->agf_uuid;
+               break;
        case XFS_AGFL_MAGIC:
-               return be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
+               lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
+               uuid = &((struct xfs_agfl *)blk)->agfl_uuid;
+               break;
        case XFS_AGI_MAGIC:
-               return be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
+               lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
+               uuid = &((struct xfs_agi *)blk)->agi_uuid;
+               break;
        case XFS_SYMLINK_MAGIC:
-               return be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
+               lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
+               uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid;
+               break;
        case XFS_DIR3_BLOCK_MAGIC:
        case XFS_DIR3_DATA_MAGIC:
        case XFS_DIR3_FREE_MAGIC:
-               return be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
+               lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
+               uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
+               break;
        case XFS_ATTR3_RMT_MAGIC:
-               return be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
+               lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
+               uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid;
+               break;
        case XFS_SB_MAGIC:
-               return be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
+               lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
+               uuid = &((struct xfs_dsb *)blk)->sb_uuid;
+               break;
        default:
                break;
        }
 
+       if (lsn != (xfs_lsn_t)-1) {
+               if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
+                       goto recover_immediately;
+               return lsn;
+       }
+
        magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic);
        switch (magicda) {
        case XFS_DIR3_LEAF1_MAGIC:
        case XFS_DIR3_LEAFN_MAGIC:
        case XFS_DA3_NODE_MAGIC:
-               return be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
+               lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
+               uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
+               break;
        default:
                break;
        }
 
+       if (lsn != (xfs_lsn_t)-1) {
+               if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
+                       goto recover_immediately;
+               return lsn;
+       }
+
        /*
         * We do individual object checks on dquot and inode buffers as they
         * have their own individual LSN records. Also, we could have a stale
index d06079c..99b490b 100644 (file)
@@ -6,12 +6,12 @@ static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
        return mk_pte(page, pgprot);
 }
 
-static inline int huge_pte_write(pte_t pte)
+static inline unsigned long huge_pte_write(pte_t pte)
 {
        return pte_write(pte);
 }
 
-static inline int huge_pte_dirty(pte_t pte)
+static inline unsigned long huge_pte_dirty(pte_t pte)
 {
        return pte_dirty(pte);
 }
index e69de29..b1a4967 100644 (file)
@@ -0,0 +1 @@
+/* no content, but patch(1) dislikes empty files */
index edbd250..bed35e3 100644 (file)
@@ -23,7 +23,7 @@
 #define PULL_UP                        (1 << 4)
 #define ALTELECTRICALSEL       (1 << 5)
 
-/* 34xx specific mux bit defines */
+/* omap3/4/5 specific mux bit defines */
 #define INPUT_EN               (1 << 8)
 #define OFF_EN                 (1 << 9)
 #define OFFOUT_EN              (1 << 10)
@@ -31,8 +31,6 @@
 #define OFF_PULL_EN            (1 << 12)
 #define OFF_PULL_UP            (1 << 13)
 #define WAKEUP_EN              (1 << 14)
-
-/* 44xx specific mux bit defines */
 #define WAKEUP_EVENT           (1 << 15)
 
 /* Active pin states */
index f7f1d71..089743a 100644 (file)
@@ -159,6 +159,26 @@ static inline bool balloon_page_movable(struct page *page)
 }
 
 /*
+ * isolated_balloon_page - identify an isolated balloon page on private
+ *                        compaction/migration page lists.
+ *
+ * After a compaction thread isolates a balloon page for migration, it raises
+ * the page refcount to prevent concurrent compaction threads from re-isolating
+ * the same page. For that reason putback_movable_pages(), or other routines
+ * that need to identify isolated balloon pages on private pagelists, cannot
+ * rely on balloon_page_movable() to accomplish the task.
+ */
+static inline bool isolated_balloon_page(struct page *page)
+{
+       /* Already isolated balloon pages, by default, have a raised refcount */
+       if (page_flags_cleared(page) && !page_mapped(page) &&
+           page_count(page) >= 2)
+               return __is_movable_balloon_page(page);
+
+       return false;
+}
+
+/*
  * balloon_page_insert - insert a page into the balloon's page list and make
  *                      the page->mapping assignment accordingly.
  * @page    : page to be assigned as a 'balloon page'
@@ -243,6 +263,11 @@ static inline bool balloon_page_movable(struct page *page)
        return false;
 }
 
+static inline bool isolated_balloon_page(struct page *page)
+{
+       return false;
+}
+
 static inline bool balloon_page_isolate(struct page *page)
 {
        return false;
index d66033f..0333e60 100644 (file)
@@ -242,6 +242,7 @@ extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc,
                                 struct bcma_device *core, bool enable);
 extern void bcma_core_pci_up(struct bcma_bus *bus);
 extern void bcma_core_pci_down(struct bcma_bus *bus);
+extern void bcma_core_pci_power_save(struct bcma_bus *bus, bool up);
 
 extern int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev);
 extern int bcma_core_pci_plat_dev_init(struct pci_dev *dev);
index 842de22..ded4299 100644 (file)
 #define __visible __attribute__((externally_visible))
 #endif
 
+/*
+ * GCC 'asm goto' miscompiles certain code sequences:
+ *
+ *   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
+ *
+ * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
+ * Fixed in GCC 4.8.2 and later versions.
+ *
+ * (asm goto is automatically volatile - the naming reflects this.)
+ */
+#if GCC_VERSION <= 40801
+# define asm_volatile_goto(x...)       do { asm goto(x); asm (""); } while (0)
+#else
+# define asm_volatile_goto(x...)       do { asm goto(x); } while (0)
+#endif
 
 #ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
 #if GCC_VERSION >= 40400
index 653073d..ed419c6 100644 (file)
@@ -406,13 +406,14 @@ int dm_noflush_suspending(struct dm_target *ti);
 union map_info *dm_get_mapinfo(struct bio *bio);
 union map_info *dm_get_rq_mapinfo(struct request *rq);
 
+struct queue_limits *dm_get_queue_limits(struct mapped_device *md);
+
 /*
  * Geometry functions.
  */
 int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo);
 int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo);
 
-
 /*-----------------------------------------------------------------
  * Functions for manipulating device-mapper tables.
  *---------------------------------------------------------------*/
index a3b8b2e..d98503b 100644 (file)
 /*
  * Framework version for util services.
  */
+#define UTIL_FW_MINOR  0
+
+#define UTIL_WS2K8_FW_MAJOR  1
+#define UTIL_WS2K8_FW_VERSION     (UTIL_WS2K8_FW_MAJOR << 16 | UTIL_FW_MINOR)
 
 #define UTIL_FW_MAJOR  3
-#define UTIL_FW_MINOR  0
-#define UTIL_FW_MAJOR_MINOR     (UTIL_FW_MAJOR << 16 | UTIL_FW_MINOR)
+#define UTIL_FW_VERSION     (UTIL_FW_MAJOR << 16 | UTIL_FW_MINOR)
 
 
 /*
index 78e2ada..d380c5e 100644 (file)
@@ -55,7 +55,7 @@
 #define DMAR_IQT_REG   0x88    /* Invalidation queue tail register */
 #define DMAR_IQ_SHIFT  4       /* Invalidation queue head/tail shift */
 #define DMAR_IQA_REG   0x90    /* Invalidation queue addr register */
-#define DMAR_ICS_REG   0x98    /* Invalidation complete status register */
+#define DMAR_ICS_REG   0x9c    /* Invalidation complete status register */
 #define DMAR_IRTA_REG  0xb8    /* Interrupt remapping table addr register */
 
 #define OFFSET_STRIDE          (9)
index 482ad2d..672ddc4 100644 (file)
@@ -439,6 +439,17 @@ static inline char *hex_byte_pack(char *buf, u8 byte)
        return buf;
 }
 
+extern const char hex_asc_upper[];
+#define hex_asc_upper_lo(x)    hex_asc_upper[((x) & 0x0f)]
+#define hex_asc_upper_hi(x)    hex_asc_upper[((x) & 0xf0) >> 4]
+
+static inline char *hex_byte_pack_upper(char *buf, u8 byte)
+{
+       *buf++ = hex_asc_upper_hi(byte);
+       *buf++ = hex_asc_upper_lo(byte);
+       return buf;
+}
+
 static inline char * __deprecated pack_hex_byte(char *buf, u8 byte)
 {
        return hex_byte_pack(buf, byte);
index 0fbbc7a..9523d2a 100644 (file)
@@ -142,7 +142,7 @@ struct kvm;
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
-extern raw_spinlock_t kvm_lock;
+extern spinlock_t kvm_lock;
 extern struct list_head vm_list;
 
 struct kvm_io_range {
@@ -189,8 +189,7 @@ struct kvm_async_pf {
        gva_t gva;
        unsigned long addr;
        struct kvm_arch_async_pf arch;
-       struct page *page;
-       bool done;
+       bool   wakeup_all;
 };
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
@@ -508,9 +507,10 @@ int kvm_set_memory_region(struct kvm *kvm,
                          struct kvm_userspace_memory_region *mem);
 int __kvm_set_memory_region(struct kvm *kvm,
                            struct kvm_userspace_memory_region *mem);
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
                           struct kvm_memory_slot *dont);
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+                           unsigned long npages);
 void kvm_arch_memslots_updated(struct kvm *kvm);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                struct kvm_memory_slot *memslot,
@@ -671,6 +671,25 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 }
 #endif
 
+#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
+void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
+void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
+bool kvm_arch_has_noncoherent_dma(struct kvm *kvm);
+#else
+static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
+{
+}
+
+static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
+{
+}
+
+static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
+{
+       return false;
+}
+#endif
+
 static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
 {
 #ifdef __KVM_HAVE_ARCH_WQP
@@ -747,9 +766,6 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
-/* For vcpu->arch.iommu_flags */
-#define KVM_IOMMU_CACHE_COHERENCY      0x1
-
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
 void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
@@ -789,7 +805,7 @@ static inline void kvm_guest_enter(void)
 
        /* KVM does not hold any references to rcu protected data when it
         * switches CPU into a guest mode. In fact switching to a guest mode
-        * is very similar to exiting to userspase from rcu point of view. In
+        * is very similar to exiting to userspace from rcu point of view. In
         * addition CPU may stay in a guest mode for quite a long time (up to
         * one time slice). Lets treat guest mode as quiescent state, just like
         * we do with user-mode execution.
@@ -842,13 +858,6 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
        return gfn_to_memslot(kvm, gfn)->id;
 }
 
-static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
-{
-       /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
-       return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
-               (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
-}
-
 static inline gfn_t
 hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
 {
@@ -1066,6 +1075,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
+extern struct kvm_device_ops kvm_vfio_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
index 60e9587..ecc82b3 100644 (file)
@@ -53,23 +53,6 @@ struct mem_cgroup_reclaim_cookie {
        unsigned int generation;
 };
 
-enum mem_cgroup_filter_t {
-       VISIT,          /* visit current node */
-       SKIP,           /* skip the current node and continue traversal */
-       SKIP_TREE,      /* skip the whole subtree and continue traversal */
-};
-
-/*
- * mem_cgroup_filter_t predicate might instruct mem_cgroup_iter_cond how to
- * iterate through the hierarchy tree. Each tree element is checked by the
- * predicate before it is returned by the iterator. If a filter returns
- * SKIP or SKIP_TREE then the iterator code continues traversal (with the
- * next node down the hierarchy or the next node that doesn't belong under the
- * memcg's subtree).
- */
-typedef enum mem_cgroup_filter_t
-(*mem_cgroup_iter_filter)(struct mem_cgroup *memcg, struct mem_cgroup *root);
-
 #ifdef CONFIG_MEMCG
 /*
  * All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -137,18 +120,9 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
 extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
        struct page *oldpage, struct page *newpage, bool migration_ok);
 
-struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
-                                  struct mem_cgroup *prev,
-                                  struct mem_cgroup_reclaim_cookie *reclaim,
-                                  mem_cgroup_iter_filter cond);
-
-static inline struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
-                                  struct mem_cgroup *prev,
-                                  struct mem_cgroup_reclaim_cookie *reclaim)
-{
-       return mem_cgroup_iter_cond(root, prev, reclaim, NULL);
-}
-
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
+                                  struct mem_cgroup *,
+                                  struct mem_cgroup_reclaim_cookie *);
 void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
 
 /*
@@ -260,9 +234,9 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
        mem_cgroup_update_page_stat(page, idx, -1);
 }
 
-enum mem_cgroup_filter_t
-mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
-               struct mem_cgroup *root);
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+                                               gfp_t gfp_mask,
+                                               unsigned long *total_scanned);
 
 void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
 static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
@@ -376,15 +350,6 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
                struct page *oldpage, struct page *newpage, bool migration_ok)
 {
 }
-static inline struct mem_cgroup *
-mem_cgroup_iter_cond(struct mem_cgroup *root,
-               struct mem_cgroup *prev,
-               struct mem_cgroup_reclaim_cookie *reclaim,
-               mem_cgroup_iter_filter cond)
-{
-       /* first call must return non-NULL, second return NULL */
-       return (struct mem_cgroup *)(unsigned long)!prev;
-}
 
 static inline struct mem_cgroup *
 mem_cgroup_iter(struct mem_cgroup *root,
@@ -471,11 +436,11 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
 }
 
 static inline
-enum mem_cgroup_filter_t
-mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
-               struct mem_cgroup *root)
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+                                           gfp_t gfp_mask,
+                                           unsigned long *total_scanned)
 {
-       return VISIT;
+       return 0;
 }
 
 static inline void mem_cgroup_split_huge_fixup(struct page *head)
index 09c2300..cb35835 100644 (file)
@@ -45,6 +45,7 @@
 #define MAPPER_CTRL_MINOR      236
 #define LOOP_CTRL_MINOR                237
 #define VHOST_NET_MINOR                238
+#define UHID_MINOR             239
 #define MISC_DYNAMIC_MINOR     255
 
 struct device;
index 68029b3..5eb4e31 100644 (file)
@@ -181,7 +181,7 @@ enum {
        MLX5_DEV_CAP_FLAG_TLP_HINTS     = 1LL << 39,
        MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40,
        MLX5_DEV_CAP_FLAG_DCT           = 1LL << 41,
-       MLX5_DEV_CAP_FLAG_CMDIF_CSUM    = 1LL << 46,
+       MLX5_DEV_CAP_FLAG_CMDIF_CSUM    = 3LL << 46,
 };
 
 enum {
@@ -417,7 +417,7 @@ struct mlx5_init_seg {
        struct health_buffer    health;
        __be32                  rsvd2[884];
        __be32                  health_counter;
-       __be32                  rsvd3[1023];
+       __be32                  rsvd3[1019];
        __be64                  ieee1588_clk;
        __be32                  ieee1588_clk_type;
        __be32                  clr_intx;
index 8888381..6b8c496 100644 (file)
@@ -82,7 +82,7 @@ enum {
 };
 
 enum {
-       MLX5_MAX_EQ_NAME        = 20
+       MLX5_MAX_EQ_NAME        = 32
 };
 
 enum {
@@ -747,8 +747,7 @@ static inline u32 mlx5_idx_to_mkey(u32 mkey_idx)
 
 enum {
        MLX5_PROF_MASK_QP_SIZE          = (u64)1 << 0,
-       MLX5_PROF_MASK_CMDIF_CSUM       = (u64)1 << 1,
-       MLX5_PROF_MASK_MR_CACHE         = (u64)1 << 2,
+       MLX5_PROF_MASK_MR_CACHE         = (u64)1 << 1,
 };
 
 enum {
@@ -758,7 +757,6 @@ enum {
 struct mlx5_profile {
        u64     mask;
        u32     log_max_qp;
-       int     cmdif_csum;
        struct {
                int     size;
                int     limit;
index ccd4260..bab49da 100644 (file)
@@ -15,8 +15,8 @@
 #include <linux/spinlock_types.h>
 #include <linux/linkage.h>
 #include <linux/lockdep.h>
-
 #include <linux/atomic.h>
+#include <asm/processor.h>
 
 /*
  * Simple, straightforward mutexes with strict semantics:
@@ -175,8 +175,8 @@ extern void mutex_unlock(struct mutex *lock);
 
 extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 
-#ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX
-#define arch_mutex_cpu_relax() cpu_relax()
+#ifndef arch_mutex_cpu_relax
+# define arch_mutex_cpu_relax() cpu_relax()
 #endif
 
 #endif
index 01fd84b..49f52c8 100644 (file)
@@ -1455,7 +1455,8 @@ struct nfs_rpc_ops {
        struct inode * (*open_context) (struct inode *dir,
                                struct nfs_open_context *ctx,
                                int open_flags,
-                               struct iattr *iattr);
+                               struct iattr *iattr,
+                               int *);
        int (*have_delegation)(struct inode *, fmode_t);
        int (*return_delegation)(struct inode *);
        struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *);
index 535cecf..fcd63ba 100644 (file)
@@ -1,8 +1,6 @@
 #ifndef __OF_IRQ_H
 #define __OF_IRQ_H
 
-#if defined(CONFIG_OF)
-struct of_irq;
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/irq.h>
@@ -10,14 +8,6 @@ struct of_irq;
 #include <linux/ioport.h>
 #include <linux/of.h>
 
-/*
- * irq_of_parse_and_map() is used by all OF enabled platforms; but SPARC
- * implements it differently.  However, the prototype is the same for all,
- * so declare it here regardless of the CONFIG_OF_IRQ setting.
- */
-extern unsigned int irq_of_parse_and_map(struct device_node *node, int index);
-
-#if defined(CONFIG_OF_IRQ)
 /**
  * of_irq - container for device_node/irq_specifier pair for an irq controller
  * @controller: pointer to interrupt controller device tree node
@@ -71,11 +61,17 @@ extern int of_irq_to_resource(struct device_node *dev, int index,
 extern int of_irq_count(struct device_node *dev);
 extern int of_irq_to_resource_table(struct device_node *dev,
                struct resource *res, int nr_irqs);
-extern struct device_node *of_irq_find_parent(struct device_node *child);
 
 extern void of_irq_init(const struct of_device_id *matches);
 
-#endif /* CONFIG_OF_IRQ */
+#if defined(CONFIG_OF)
+/*
+ * irq_of_parse_and_map() is used by all OF enabled platforms; but SPARC
+ * implements it differently.  However, the prototype is the same for all,
+ * so declare it here regardless of the CONFIG_OF_IRQ setting.
+ */
+extern unsigned int irq_of_parse_and_map(struct device_node *node, int index);
+extern struct device_node *of_irq_find_parent(struct device_node *child);
 
 #else /* !CONFIG_OF */
 static inline unsigned int irq_of_parse_and_map(struct device_node *dev,
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
deleted file mode 100644 (file)
index c841282..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef __OF_RESERVED_MEM_H
-#define __OF_RESERVED_MEM_H
-
-#ifdef CONFIG_OF_RESERVED_MEM
-void of_reserved_mem_device_init(struct device *dev);
-void of_reserved_mem_device_release(struct device *dev);
-void early_init_dt_scan_reserved_mem(void);
-#else
-static inline void of_reserved_mem_device_init(struct device *dev) { }
-static inline void of_reserved_mem_device_release(struct device *dev) { }
-static inline void early_init_dt_scan_reserved_mem(void) { }
-#endif
-
-#endif /* __OF_RESERVED_MEM_H */
index 866e85c..c8ba627 100644 (file)
@@ -294,9 +294,31 @@ struct ring_buffer;
  */
 struct perf_event {
 #ifdef CONFIG_PERF_EVENTS
-       struct list_head                group_entry;
+       /*
+        * entry onto perf_event_context::event_list;
+        *   modifications require ctx->lock
+        *   RCU safe iterations.
+        */
        struct list_head                event_entry;
+
+       /*
+        * XXX: group_entry and sibling_list should be mutually exclusive;
+        * either you're a sibling on a group, or you're the group leader.
+        * Rework the code to always use the same list element.
+        *
+        * Locked for modification by both ctx->mutex and ctx->lock; holding
+        * either sufficies for read.
+        */
+       struct list_head                group_entry;
        struct list_head                sibling_list;
+
+       /*
+        * We need storage to track the entries in perf_pmu_migrate_context; we
+        * cannot use the event_entry because of RCU and we want to keep the
+        * group in tact which avoids us using the other two entries.
+        */
+       struct list_head                migrate_entry;
+
        struct hlist_node               hlist_entry;
        int                             nr_siblings;
        int                             group_flags;
index 3b9377d..6312dd9 100644 (file)
@@ -17,6 +17,7 @@ extern void add_interrupt_randomness(int irq, int irq_flags);
 extern void get_random_bytes(void *buf, int nbytes);
 extern void get_random_bytes_arch(void *buf, int nbytes);
 void generate_random_uuid(unsigned char uuid_out[16]);
+extern int random_int_secret_init(void);
 
 #ifndef MODULE
 extern const struct file_operations random_fops, urandom_fops;
index 67e13aa..9bdad43 100644 (file)
@@ -40,6 +40,8 @@ enum regulator_status {
 };
 
 /**
+ * struct regulator_linear_range - specify linear voltage ranges
+ *
  * Specify a range of voltages for regulator_map_linar_range() and
  * regulator_list_linear_range().
  *
index 6682da3..7bb4b4a 100644 (file)
@@ -285,6 +285,14 @@ static inline void lockup_detector_init(void)
 }
 #endif
 
+#ifdef CONFIG_DETECT_HUNG_TASK
+void reset_hung_task_detector(void);
+#else
+static inline void reset_hung_task_detector(void)
+{
+}
+#endif
+
 /* Attach to any functions which should be ignored in wchan output. */
 #define __sched                __attribute__((__section__(".sched.text")))
 
index 2ddb48d..c2d8933 100644 (file)
@@ -498,7 +498,7 @@ struct sk_buff {
         * headers if needed
         */
        __u8                    encapsulation:1;
-       /* 7/9 bit hole (depending on ndisc_nodetype presence) */
+       /* 6/8 bit hole (depending on ndisc_nodetype presence) */
        kmemcheck_bitfield_end(flags2);
 
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
index cfb7ca0..731f523 100644 (file)
@@ -155,6 +155,12 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func,
 
 static inline void kick_all_cpus_sync(void) {  }
 
+static inline void __smp_call_function_single(int cpuid,
+               struct call_single_data *data, int wait)
+{
+       on_each_cpu(data->func, data->info, wait);
+}
+
 #endif /* !SMP */
 
 /*
index c114614..9b058ee 100644 (file)
@@ -237,4 +237,18 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
        __srcu_read_unlock(sp, idx);
 }
 
+/**
+ * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock
+ *
+ * Converts the preceding srcu_read_unlock into a two-way memory barrier.
+ *
+ * Call this after srcu_read_unlock, to guarantee that all memory operations
+ * that occur after smp_mb__after_srcu_read_unlock will appear to happen after
+ * the preceding srcu_read_unlock.
+ */
+static inline void smp_mb__after_srcu_read_unlock(void)
+{
+       /* __srcu_read_unlock has smp_mb() internally so nothing to do here. */
+}
+
 #endif
index dd3edd7..9d3f1a5 100644 (file)
 
 #include <asm/timex.h>
 
+#ifndef random_get_entropy
+/*
+ * The random_get_entropy() function is used by the /dev/random driver
+ * in order to extract entropy via the relative unpredictability of
+ * when an interrupt takes places versus a high speed, fine-grained
+ * timing source or cycle counter.  Since it will be occurred on every
+ * single interrupt, it must have a very low cost/overhead.
+ *
+ * By default we use get_cycles() for this purpose, but individual
+ * architectures may override this in their asm/timex.h header file.
+ */
+#define random_get_entropy()   get_cycles()
+#endif
+
 /*
  * SHIFT_PLL is used as a dampening factor to define how much we
  * adjust the frequency correction for a given offset in PLL mode.
index 9cb2fe8..e303eef 100644 (file)
@@ -42,6 +42,7 @@ struct usbnet {
        struct usb_host_endpoint *status;
        unsigned                maxpacket;
        struct timer_list       delay;
+       const char              *padding_pkt;
 
        /* protocol/interface state */
        struct net_device       *net;
index 80cf817..2c02f3a 100644 (file)
@@ -65,15 +65,8 @@ struct pci_dev;
  *     out of the arbitration process (and can be safe to take
  *     interrupts at any time.
  */
-#if defined(CONFIG_VGA_ARB)
 extern void vga_set_legacy_decoding(struct pci_dev *pdev,
                                    unsigned int decodes);
-#else
-static inline void vga_set_legacy_decoding(struct pci_dev *pdev,
-                                          unsigned int decodes)
-{
-}
-#endif
 
 /**
  *     vga_get         - acquire & locks VGA resources
index fb314de..86505bf 100644 (file)
@@ -67,6 +67,10 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr);
 #endif
 
+bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
+                                  const unsigned int prefix_len,
+                                  struct net_device *dev);
+
 int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev);
 
 struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net,
index aaeaf09..15f1084 100644 (file)
@@ -104,6 +104,7 @@ enum {
 enum {
        HCI_SETUP,
        HCI_AUTO_OFF,
+       HCI_RFKILLED,
        HCI_MGMT,
        HCI_PAIRABLE,
        HCI_SERVICE_CACHE,
index f0d70f0..9c4d37e 100644 (file)
@@ -723,8 +723,6 @@ struct ip_vs_dest_dst {
        struct rcu_head         rcu_head;
 };
 
-/* In grace period after removing */
-#define IP_VS_DEST_STATE_REMOVING      0x01
 /*
  *     The real server destination forwarding entry
  *     with ip address, port number, and so on.
@@ -742,7 +740,7 @@ struct ip_vs_dest {
 
        atomic_t                refcnt;         /* reference counter */
        struct ip_vs_stats      stats;          /* statistics */
-       unsigned long           state;          /* state flags */
+       unsigned long           idle_start;     /* start time, jiffies */
 
        /* connection counters and thresholds */
        atomic_t                activeconns;    /* active connections */
@@ -756,14 +754,13 @@ struct ip_vs_dest {
        struct ip_vs_dest_dst __rcu *dest_dst;  /* cached dst info */
 
        /* for virtual service */
-       struct ip_vs_service    *svc;           /* service it belongs to */
+       struct ip_vs_service __rcu *svc;        /* service it belongs to */
        __u16                   protocol;       /* which protocol (TCP/UDP) */
        __be16                  vport;          /* virtual port number */
        union nf_inet_addr      vaddr;          /* virtual IP address */
        __u32                   vfwmark;        /* firewall mark of service */
 
        struct list_head        t_list;         /* in dest_trash */
-       struct rcu_head         rcu_head;
        unsigned int            in_rs_table:1;  /* we are in rs_table */
 };
 
@@ -1649,7 +1646,7 @@ static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
 /* CONFIG_IP_VS_NFCT */
 #endif
 
-static inline unsigned int
+static inline int
 ip_vs_dest_conn_overhead(struct ip_vs_dest *dest)
 {
        /*
index 4fbf02a..0f7558b 100644 (file)
@@ -112,6 +112,7 @@ struct mrp_applicant {
        struct mrp_application  *app;
        struct net_device       *dev;
        struct timer_list       join_timer;
+       struct timer_list       periodic_timer;
 
        spinlock_t              lock;
        struct sk_buff_head     queue;
index 1313456..9d22f08 100644 (file)
@@ -74,6 +74,7 @@ struct net {
        struct hlist_head       *dev_index_head;
        unsigned int            dev_base_seq;   /* protected by rtnl_mutex */
        int                     ifindex;
+       unsigned int            dev_unreg_count;
 
        /* core fib_rules */
        struct list_head        rules_ops;
index 806f54a..f572f31 100644 (file)
@@ -56,7 +56,7 @@ struct synproxy_options {
 
 struct tcphdr;
 struct xt_synproxy_info;
-extern void synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
+extern bool synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
                                   const struct tcphdr *th,
                                   struct synproxy_options *opts);
 extern unsigned int synproxy_options_size(const struct synproxy_options *opts);
index 6ca975b..c2e542b 100644 (file)
@@ -3,7 +3,6 @@
 
 #include <linux/types.h>
 
-extern void net_secret_init(void);
 extern __u32 secure_ip_id(__be32 daddr);
 extern __u32 secure_ipv6_id(const __be32 daddr[4]);
 extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
index 6ba2e7b..1d37a80 100644 (file)
@@ -409,6 +409,11 @@ struct sock {
        void                    (*sk_destruct)(struct sock *sk);
 };
 
+#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
+
+#define rcu_dereference_sk_user_data(sk)       rcu_dereference(__sk_user_data((sk)))
+#define rcu_assign_sk_user_data(sk, ptr)       rcu_assign_pointer(__sk_user_data((sk)), ptr)
+
 /*
  * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK
  * or not whether his port will be reused by someone else. SK_FORCE_REUSE
index 7005d11..131a0bd 100644 (file)
@@ -296,23 +296,21 @@ DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready,
 
 TRACE_EVENT(
        kvm_async_pf_completed,
-       TP_PROTO(unsigned long address, struct page *page, u64 gva),
-       TP_ARGS(address, page, gva),
+       TP_PROTO(unsigned long address, u64 gva),
+       TP_ARGS(address, gva),
 
        TP_STRUCT__entry(
                __field(unsigned long, address)
-               __field(pfn_t, pfn)
                __field(u64, gva)
                ),
 
        TP_fast_assign(
                __entry->address = address;
-               __entry->pfn = page ? page_to_pfn(page) : 0;
                __entry->gva = gva;
                ),
 
-       TP_printk("gva %#llx address %#lx pfn %#llx",  __entry->gva,
-                 __entry->address, __entry->pfn)
+       TP_printk("gva %#llx address %#lx",  __entry->gva,
+                 __entry->address)
 );
 
 #endif
index fa8b3ad..46d41e8 100644 (file)
@@ -1007,4 +1007,6 @@ struct drm_radeon_info {
 #define SI_TILE_MODE_DEPTH_STENCIL_2D_4AA      3
 #define SI_TILE_MODE_DEPTH_STENCIL_2D_8AA      2
 
+#define CIK_TILE_MODE_DEPTH_STENCIL_1D         5
+
 #endif
index 99c2533..902f124 100644 (file)
@@ -518,6 +518,10 @@ struct kvm_ppc_smmu_info {
 /* machine type bits, to be used as argument to KVM_CREATE_VM */
 #define KVM_VM_S390_UCONTROL   1
 
+/* on ppc, 0 indicate default, 1 should force HV and 2 PR */
+#define KVM_VM_PPC_HV 1
+#define KVM_VM_PPC_PR 2
+
 #define KVM_S390_SIE_PAGE_OFFSET 1
 
 /*
@@ -541,6 +545,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_TRACE_ENABLE          __KVM_DEPRECATED_MAIN_W_0x06
 #define KVM_TRACE_PAUSE           __KVM_DEPRECATED_MAIN_0x07
 #define KVM_TRACE_DISABLE         __KVM_DEPRECATED_MAIN_0x08
+#define KVM_GET_EMULATED_CPUID   _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
 
 /*
  * Extension capability list.
@@ -668,6 +673,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_IRQ_XICS 92
 #define KVM_CAP_ARM_EL1_32BIT 93
 #define KVM_CAP_SPAPR_MULTITCE 94
+#define KVM_CAP_EXT_EMUL_CPUID 95
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -843,6 +849,10 @@ struct kvm_device_attr {
 #define KVM_DEV_TYPE_FSL_MPIC_20       1
 #define KVM_DEV_TYPE_FSL_MPIC_42       2
 #define KVM_DEV_TYPE_XICS              3
+#define KVM_DEV_TYPE_VFIO              4
+#define  KVM_DEV_VFIO_GROUP                    1
+#define   KVM_DEV_VFIO_GROUP_ADD                       1
+#define   KVM_DEV_VFIO_GROUP_DEL                       2
 
 /*
  * ioctls for VM fds
@@ -1012,6 +1022,7 @@ struct kvm_s390_ucas_mapping {
 /* VM is being stopped by host */
 #define KVM_KVMCLOCK_CTRL        _IO(KVMIO,   0xad)
 #define KVM_ARM_VCPU_INIT        _IOW(KVMIO,  0xae, struct kvm_vcpu_init)
+#define KVM_ARM_PREFERRED_TARGET  _IOR(KVMIO,  0xaf, struct kvm_vcpu_init)
 #define KVM_GET_REG_LIST         _IOWR(KVMIO, 0xb0, struct kvm_reg_list)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU    (1 << 0)
index 40a1fb8..009a655 100644 (file)
@@ -380,10 +380,13 @@ struct perf_event_mmap_page {
        union {
                __u64   capabilities;
                struct {
-                       __u64   cap_usr_time            : 1,
-                               cap_usr_rdpmc           : 1,
-                               cap_usr_time_zero       : 1,
-                               cap_____res             : 61;
+                       __u64   cap_bit0                : 1, /* Always 0, deprecated, see commit 860f085b74e9 */
+                               cap_bit0_is_deprecated  : 1, /* Always 1, signals that bit 0 is zero */
+
+                               cap_user_rdpmc          : 1, /* The RDPMC instruction can be used to read counts */
+                               cap_user_time           : 1, /* The time_* fields are used */
+                               cap_user_time_zero      : 1, /* The time_zero field is used */
+                               cap_____res             : 59;
                };
        };
 
@@ -442,12 +445,13 @@ struct perf_event_mmap_page {
         *               ((rem * time_mult) >> time_shift);
         */
        __u64   time_zero;
+       __u32   size;                   /* Header size up to __reserved[] fields. */
 
                /*
                 * Hole for extension of the self monitor capabilities
                 */
 
-       __u64   __reserved[119];        /* align to 1k */
+       __u8    __reserved[118*8+4];    /* align to 1k. */
 
        /*
         * Control data for the mmap() data buffer.
@@ -528,6 +532,7 @@ enum perf_event_type {
         *      u64                             len;
         *      u64                             pgoff;
         *      char                            filename[];
+        *      struct sample_id                sample_id;
         * };
         */
        PERF_RECORD_MMAP                        = 1,
index af310af..63d3e8f 100644 (file)
@@ -76,6 +76,7 @@
 #include <linux/elevator.h>
 #include <linux/sched_clock.h>
 #include <linux/context_tracking.h>
+#include <linux/random.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -780,6 +781,7 @@ static void __init do_basic_setup(void)
        do_ctors();
        usermodehelper_enable();
        do_initcalls();
+       random_int_secret_init();
 }
 
 static void __init do_pre_smp_initcalls(void)
index b0d541d..558aa91 100644 (file)
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -165,6 +165,15 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
        ipc_rmid(&msg_ids(ns), &s->q_perm);
 }
 
+static void msg_rcu_free(struct rcu_head *head)
+{
+       struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+       struct msg_queue *msq = ipc_rcu_to_struct(p);
+
+       security_msg_queue_free(msq);
+       ipc_rcu_free(head);
+}
+
 /**
  * newque - Create a new msg queue
  * @ns: namespace
@@ -189,15 +198,14 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
        msq->q_perm.security = NULL;
        retval = security_msg_queue_alloc(msq);
        if (retval) {
-               ipc_rcu_putref(msq);
+               ipc_rcu_putref(msq, ipc_rcu_free);
                return retval;
        }
 
        /* ipc_addid() locks msq upon success. */
        id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
        if (id < 0) {
-               security_msg_queue_free(msq);
-               ipc_rcu_putref(msq);
+               ipc_rcu_putref(msq, msg_rcu_free);
                return id;
        }
 
@@ -276,8 +284,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
                free_msg(msg);
        }
        atomic_sub(msq->q_cbytes, &ns->msg_bytes);
-       security_msg_queue_free(msq);
-       ipc_rcu_putref(msq);
+       ipc_rcu_putref(msq, msg_rcu_free);
 }
 
 /*
@@ -688,6 +695,12 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
                if (ipcperms(ns, &msq->q_perm, S_IWUGO))
                        goto out_unlock0;
 
+               /* raced with RMID? */
+               if (msq->q_perm.deleted) {
+                       err = -EIDRM;
+                       goto out_unlock0;
+               }
+
                err = security_msg_queue_msgsnd(msq, msg, msgflg);
                if (err)
                        goto out_unlock0;
@@ -717,7 +730,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
                rcu_read_lock();
                ipc_lock_object(&msq->q_perm);
 
-               ipc_rcu_putref(msq);
+               ipc_rcu_putref(msq, ipc_rcu_free);
                if (msq->q_perm.deleted) {
                        err = -EIDRM;
                        goto out_unlock0;
@@ -894,6 +907,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
                        goto out_unlock1;
 
                ipc_lock_object(&msq->q_perm);
+
+               /* raced with RMID? */
+               if (msq->q_perm.deleted) {
+                       msg = ERR_PTR(-EIDRM);
+                       goto out_unlock0;
+               }
+
                msg = find_msg(msq, &msgtyp, mode);
                if (!IS_ERR(msg)) {
                        /*
index 69b6a21..8c4f59b 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -243,71 +243,122 @@ static void merge_queues(struct sem_array *sma)
        }
 }
 
+static void sem_rcu_free(struct rcu_head *head)
+{
+       struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+       struct sem_array *sma = ipc_rcu_to_struct(p);
+
+       security_sem_free(sma);
+       ipc_rcu_free(head);
+}
+
+/*
+ * Wait until all currently ongoing simple ops have completed.
+ * Caller must own sem_perm.lock.
+ * New simple ops cannot start, because simple ops first check
+ * that sem_perm.lock is free.
+ * that a) sem_perm.lock is free and b) complex_count is 0.
+ */
+static void sem_wait_array(struct sem_array *sma)
+{
+       int i;
+       struct sem *sem;
+
+       if (sma->complex_count)  {
+               /* The thread that increased sma->complex_count waited on
+                * all sem->lock locks. Thus we don't need to wait again.
+                */
+               return;
+       }
+
+       for (i = 0; i < sma->sem_nsems; i++) {
+               sem = sma->sem_base + i;
+               spin_unlock_wait(&sem->lock);
+       }
+}
+
 /*
  * If the request contains only one semaphore operation, and there are
  * no complex transactions pending, lock only the semaphore involved.
  * Otherwise, lock the entire semaphore array, since we either have
  * multiple semaphores in our own semops, or we need to look at
  * semaphores from other pending complex operations.
- *
- * Carefully guard against sma->complex_count changing between zero
- * and non-zero while we are spinning for the lock. The value of
- * sma->complex_count cannot change while we are holding the lock,
- * so sem_unlock should be fine.
- *
- * The global lock path checks that all the local locks have been released,
- * checking each local lock once. This means that the local lock paths
- * cannot start their critical sections while the global lock is held.
  */
 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
                              int nsops)
 {
-       int locknum;
- again:
-       if (nsops == 1 && !sma->complex_count) {
-               struct sem *sem = sma->sem_base + sops->sem_num;
+       struct sem *sem;
 
-               /* Lock just the semaphore we are interested in. */
-               spin_lock(&sem->lock);
+       if (nsops != 1) {
+               /* Complex operation - acquire a full lock */
+               ipc_lock_object(&sma->sem_perm);
 
-               /*
-                * If sma->complex_count was set while we were spinning,
-                * we may need to look at things we did not lock here.
+               /* And wait until all simple ops that are processed
+                * right now have dropped their locks.
                 */
-               if (unlikely(sma->complex_count)) {
-                       spin_unlock(&sem->lock);
-                       goto lock_array;
-               }
+               sem_wait_array(sma);
+               return -1;
+       }
+
+       /*
+        * Only one semaphore affected - try to optimize locking.
+        * The rules are:
+        * - optimized locking is possible if no complex operation
+        *   is either enqueued or processed right now.
+        * - The test for enqueued complex ops is simple:
+        *      sma->complex_count != 0
+        * - Testing for complex ops that are processed right now is
+        *   a bit more difficult. Complex ops acquire the full lock
+        *   and first wait that the running simple ops have completed.
+        *   (see above)
+        *   Thus: If we own a simple lock and the global lock is free
+        *      and complex_count is now 0, then it will stay 0 and
+        *      thus just locking sem->lock is sufficient.
+        */
+       sem = sma->sem_base + sops->sem_num;
 
+       if (sma->complex_count == 0) {
                /*
-                * Another process is holding the global lock on the
-                * sem_array; we cannot enter our critical section,
-                * but have to wait for the global lock to be released.
+                * It appears that no complex operation is around.
+                * Acquire the per-semaphore lock.
                 */
-               if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
-                       spin_unlock(&sem->lock);
-                       spin_unlock_wait(&sma->sem_perm.lock);
-                       goto again;
+               spin_lock(&sem->lock);
+
+               /* Then check that the global lock is free */
+               if (!spin_is_locked(&sma->sem_perm.lock)) {
+                       /* spin_is_locked() is not a memory barrier */
+                       smp_mb();
+
+                       /* Now repeat the test of complex_count:
+                        * It can't change anymore until we drop sem->lock.
+                        * Thus: if is now 0, then it will stay 0.
+                        */
+                       if (sma->complex_count == 0) {
+                               /* fast path successful! */
+                               return sops->sem_num;
+                       }
                }
+               spin_unlock(&sem->lock);
+       }
 
-               locknum = sops->sem_num;
+       /* slow path: acquire the full lock */
+       ipc_lock_object(&sma->sem_perm);
+
+       if (sma->complex_count == 0) {
+               /* False alarm:
+                * There is no complex operation, thus we can switch
+                * back to the fast path.
+                */
+               spin_lock(&sem->lock);
+               ipc_unlock_object(&sma->sem_perm);
+               return sops->sem_num;
        } else {
-               int i;
-               /*
-                * Lock the semaphore array, and wait for all of the
-                * individual semaphore locks to go away.  The code
-                * above ensures no new single-lock holders will enter
-                * their critical section while the array lock is held.
+               /* Not a false alarm, thus complete the sequence for a
+                * full lock.
                 */
- lock_array:
-               ipc_lock_object(&sma->sem_perm);
-               for (i = 0; i < sma->sem_nsems; i++) {
-                       struct sem *sem = sma->sem_base + i;
-                       spin_unlock_wait(&sem->lock);
-               }
-               locknum = -1;
+               sem_wait_array(sma);
+               return -1;
        }
-       return locknum;
 }
 
 static inline void sem_unlock(struct sem_array *sma, int locknum)
@@ -374,12 +425,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
 static inline void sem_lock_and_putref(struct sem_array *sma)
 {
        sem_lock(sma, NULL, -1);
-       ipc_rcu_putref(sma);
-}
-
-static inline void sem_putref(struct sem_array *sma)
-{
-       ipc_rcu_putref(sma);
+       ipc_rcu_putref(sma, ipc_rcu_free);
 }
 
 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -458,14 +504,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
        sma->sem_perm.security = NULL;
        retval = security_sem_alloc(sma);
        if (retval) {
-               ipc_rcu_putref(sma);
+               ipc_rcu_putref(sma, ipc_rcu_free);
                return retval;
        }
 
        id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
        if (id < 0) {
-               security_sem_free(sma);
-               ipc_rcu_putref(sma);
+               ipc_rcu_putref(sma, sem_rcu_free);
                return id;
        }
        ns->used_sems += nsems;
@@ -873,6 +918,24 @@ again:
 }
 
 /**
+ * set_semotime(sma, sops) - set sem_otime
+ * @sma: semaphore array
+ * @sops: operations that modified the array, may be NULL
+ *
+ * sem_otime is replicated to avoid cache line trashing.
+ * This function sets one instance to the current time.
+ */
+static void set_semotime(struct sem_array *sma, struct sembuf *sops)
+{
+       if (sops == NULL) {
+               sma->sem_base[0].sem_otime = get_seconds();
+       } else {
+               sma->sem_base[sops[0].sem_num].sem_otime =
+                                                       get_seconds();
+       }
+}
+
+/**
  * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
  * @sma: semaphore array
  * @sops: operations that were performed
@@ -922,17 +985,10 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
                        }
                }
        }
-       if (otime) {
-               if (sops == NULL) {
-                       sma->sem_base[0].sem_otime = get_seconds();
-               } else {
-                       sma->sem_base[sops[0].sem_num].sem_otime =
-                                                               get_seconds();
-               }
-       }
+       if (otime)
+               set_semotime(sma, sops);
 }
 
-
 /* The following counts are associated to each semaphore:
  *   semncnt        number of tasks waiting on semval being nonzero
  *   semzcnt        number of tasks waiting on semval being zero
@@ -1047,8 +1103,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 
        wake_up_sem_queue_do(&tasks);
        ns->used_sems -= sma->sem_nsems;
-       security_sem_free(sma);
-       ipc_rcu_putref(sma);
+       ipc_rcu_putref(sma, sem_rcu_free);
 }
 
 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
@@ -1292,7 +1347,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                        rcu_read_unlock();
                        sem_io = ipc_alloc(sizeof(ushort)*nsems);
                        if(sem_io == NULL) {
-                               sem_putref(sma);
+                               ipc_rcu_putref(sma, ipc_rcu_free);
                                return -ENOMEM;
                        }
 
@@ -1328,20 +1383,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                if(nsems > SEMMSL_FAST) {
                        sem_io = ipc_alloc(sizeof(ushort)*nsems);
                        if(sem_io == NULL) {
-                               sem_putref(sma);
+                               ipc_rcu_putref(sma, ipc_rcu_free);
                                return -ENOMEM;
                        }
                }
 
                if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) {
-                       sem_putref(sma);
+                       ipc_rcu_putref(sma, ipc_rcu_free);
                        err = -EFAULT;
                        goto out_free;
                }
 
                for (i = 0; i < nsems; i++) {
                        if (sem_io[i] > SEMVMX) {
-                               sem_putref(sma);
+                               ipc_rcu_putref(sma, ipc_rcu_free);
                                err = -ERANGE;
                                goto out_free;
                        }
@@ -1629,7 +1684,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
        /* step 2: allocate new undo structure */
        new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
        if (!new) {
-               sem_putref(sma);
+               ipc_rcu_putref(sma, ipc_rcu_free);
                return ERR_PTR(-ENOMEM);
        }
 
@@ -1795,12 +1850,17 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 
        error = perform_atomic_semop(sma, sops, nsops, un,
                                        task_tgid_vnr(current));
-       if (error <= 0) {
-               if (alter && error == 0)
+       if (error == 0) {
+               /* If the operation was successful, then do
+                * the required updates.
+                */
+               if (alter)
                        do_smart_update(sma, sops, nsops, 1, &tasks);
-
-               goto out_unlock_free;
+               else
+                       set_semotime(sma, sops);
        }
+       if (error <= 0)
+               goto out_unlock_free;
 
        /* We need to sleep on this operation, so we put the current
         * task into the pending queue and go to sleep.
@@ -2059,6 +2119,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
        struct sem_array *sma = it;
        time_t sem_otime;
 
+       /*
+        * The proc interface isn't aware of sem_lock(), it calls
+        * ipc_lock_object() directly (in sysvipc_find_ipc).
+        * In order to stay compatible with sem_lock(), we must wait until
+        * all simple semop() calls have left their critical regions.
+        */
+       sem_wait_array(sma);
+
        sem_otime = get_semotime(sma);
 
        return seq_printf(s,
index 2821cdf..d697396 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -167,6 +167,15 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
        ipc_lock_object(&ipcp->shm_perm);
 }
 
+static void shm_rcu_free(struct rcu_head *head)
+{
+       struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+       struct shmid_kernel *shp = ipc_rcu_to_struct(p);
+
+       security_shm_free(shp);
+       ipc_rcu_free(head);
+}
+
 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
 {
        ipc_rmid(&shm_ids(ns), &s->shm_perm);
@@ -208,8 +217,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
                user_shm_unlock(file_inode(shp->shm_file)->i_size,
                                                shp->mlock_user);
        fput (shp->shm_file);
-       security_shm_free(shp);
-       ipc_rcu_putref(shp);
+       ipc_rcu_putref(shp, shm_rcu_free);
 }
 
 /*
@@ -497,7 +505,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
        shp->shm_perm.security = NULL;
        error = security_shm_alloc(shp);
        if (error) {
-               ipc_rcu_putref(shp);
+               ipc_rcu_putref(shp, ipc_rcu_free);
                return error;
        }
 
@@ -566,8 +574,7 @@ no_id:
                user_shm_unlock(size, shp->mlock_user);
        fput(file);
 no_file:
-       security_shm_free(shp);
-       ipc_rcu_putref(shp);
+       ipc_rcu_putref(shp, shm_rcu_free);
        return error;
 }
 
index e829da9..fdb8ae7 100644 (file)
@@ -474,11 +474,6 @@ void ipc_free(void* ptr, int size)
                kfree(ptr);
 }
 
-struct ipc_rcu {
-       struct rcu_head rcu;
-       atomic_t refcount;
-} ____cacheline_aligned_in_smp;
-
 /**
  *     ipc_rcu_alloc   -       allocate ipc and rcu space 
  *     @size: size desired
@@ -505,27 +500,24 @@ int ipc_rcu_getref(void *ptr)
        return atomic_inc_not_zero(&p->refcount);
 }
 
-/**
- * ipc_schedule_free - free ipc + rcu space
- * @head: RCU callback structure for queued work
- */
-static void ipc_schedule_free(struct rcu_head *head)
-{
-       vfree(container_of(head, struct ipc_rcu, rcu));
-}
-
-void ipc_rcu_putref(void *ptr)
+void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head))
 {
        struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
 
        if (!atomic_dec_and_test(&p->refcount))
                return;
 
-       if (is_vmalloc_addr(ptr)) {
-               call_rcu(&p->rcu, ipc_schedule_free);
-       } else {
-               kfree_rcu(p, rcu);
-       }
+       call_rcu(&p->rcu, func);
+}
+
+void ipc_rcu_free(struct rcu_head *head)
+{
+       struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+
+       if (is_vmalloc_addr(p))
+               vfree(p);
+       else
+               kfree(p);
 }
 
 /**
index c5f3338..f2f5036 100644 (file)
@@ -47,6 +47,13 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { }
 static inline void shm_exit_ns(struct ipc_namespace *ns) { }
 #endif
 
+struct ipc_rcu {
+       struct rcu_head rcu;
+       atomic_t refcount;
+} ____cacheline_aligned_in_smp;
+
+#define ipc_rcu_to_struct(p)  ((void *)(p+1))
+
 /*
  * Structure that holds the parameters needed by the ipc operations
  * (see after)
@@ -120,7 +127,8 @@ void ipc_free(void* ptr, int size);
  */
 void* ipc_rcu_alloc(int size);
 int ipc_rcu_getref(void *ptr);
-void ipc_rcu_putref(void *ptr);
+void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head));
+void ipc_rcu_free(struct rcu_head *head);
 
 struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
 struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id);
index 91e53d0..7b0e23a 100644 (file)
@@ -1117,9 +1117,10 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
 
                        sleep_time = timeout_start + audit_backlog_wait_time -
                                        jiffies;
-                       if ((long)sleep_time > 0)
+                       if ((long)sleep_time > 0) {
                                wait_for_auditd(sleep_time);
-                       continue;
+                               continue;
+                       }
                }
                if (audit_rate_check() && printk_ratelimit())
                        printk(KERN_WARNING
index 247091b..859c8df 100644 (file)
@@ -51,6 +51,15 @@ void context_tracking_user_enter(void)
        unsigned long flags;
 
        /*
+        * Repeat the user_enter() check here because some archs may be calling
+        * this from asm and if no CPU needs context tracking, they shouldn't
+        * go further. Repeat the check here until they support the static key
+        * check.
+        */
+       if (!static_key_false(&context_tracking_enabled))
+               return;
+
+       /*
         * Some contexts may involve an exception occuring in an irq,
         * leading to that nesting:
         * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
@@ -151,6 +160,9 @@ void context_tracking_user_exit(void)
 {
        unsigned long flags;
 
+       if (!static_key_false(&context_tracking_enabled))
+               return;
+
        if (in_interrupt())
                return;
 
index dd236b6..d49a9d2 100644 (file)
@@ -3660,6 +3660,26 @@ static void calc_timer_values(struct perf_event *event,
        *running = ctx_time - event->tstamp_running;
 }
 
+static void perf_event_init_userpage(struct perf_event *event)
+{
+       struct perf_event_mmap_page *userpg;
+       struct ring_buffer *rb;
+
+       rcu_read_lock();
+       rb = rcu_dereference(event->rb);
+       if (!rb)
+               goto unlock;
+
+       userpg = rb->user_page;
+
+       /* Allow new userspace to detect that bit 0 is deprecated */
+       userpg->cap_bit0_is_deprecated = 1;
+       userpg->size = offsetof(struct perf_event_mmap_page, __reserved);
+
+unlock:
+       rcu_read_unlock();
+}
+
 void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
 {
 }
@@ -4044,6 +4064,7 @@ again:
        ring_buffer_attach(event, rb);
        rcu_assign_pointer(event->rb, rb);
 
+       perf_event_init_userpage(event);
        perf_event_update_userpage(event);
 
 unlock:
@@ -7213,15 +7234,15 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
                perf_remove_from_context(event);
                unaccount_event_cpu(event, src_cpu);
                put_ctx(src_ctx);
-               list_add(&event->event_entry, &events);
+               list_add(&event->migrate_entry, &events);
        }
        mutex_unlock(&src_ctx->mutex);
 
        synchronize_rcu();
 
        mutex_lock(&dst_ctx->mutex);
-       list_for_each_entry_safe(event, tmp, &events, event_entry) {
-               list_del(&event->event_entry);
+       list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
+               list_del(&event->migrate_entry);
                if (event->state >= PERF_EVENT_STATE_OFF)
                        event->state = PERF_EVENT_STATE_INACTIVE;
                account_event_cpu(event, dst_cpu);
index 3e97fb1..dfdf515 100644 (file)
@@ -203,6 +203,14 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
        return ret;
 }
 
+static atomic_t reset_hung_task = ATOMIC_INIT(0);
+
+void reset_hung_task_detector(void)
+{
+       atomic_set(&reset_hung_task, 1);
+}
+EXPORT_SYMBOL_GPL(reset_hung_task_detector);
+
 /*
  * kthread which checks for tasks stuck in D state
  */
@@ -216,6 +224,9 @@ static int watchdog(void *dummy)
                while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
                        timeout = sysctl_hung_task_timeout_secs;
 
+               if (atomic_xchg(&reset_hung_task, 0))
+                       continue;
+
                check_hung_uninterruptible_tasks(timeout);
        }
 
index fb32636..b086006 100644 (file)
@@ -571,6 +571,10 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
        DECLARE_COMPLETION_ONSTACK(done);
        int retval = 0;
 
+       if (!sub_info->path) {
+               call_usermodehelper_freeinfo(sub_info);
+               return -EINVAL;
+       }
        helper_lock();
        if (!khelper_wq || usermodehelper_disabled) {
                retval = -EBUSY;
index 81c4e78..c00d5b5 100644 (file)
@@ -254,11 +254,11 @@ int parse_args(const char *doing,
 
 
 STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", unsigned long, kstrtoul);
-STANDARD_PARAM_DEF(short, short, "%hi", long, kstrtoul);
+STANDARD_PARAM_DEF(short, short, "%hi", long, kstrtol);
 STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, kstrtoul);
-STANDARD_PARAM_DEF(int, int, "%i", long, kstrtoul);
+STANDARD_PARAM_DEF(int, int, "%i", long, kstrtol);
 STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, kstrtoul);
-STANDARD_PARAM_DEF(long, long, "%li", long, kstrtoul);
+STANDARD_PARAM_DEF(long, long, "%li", long, kstrtol);
 STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, kstrtoul);
 
 int param_set_charp(const char *val, const struct kernel_param *kp)
index ebe5e80..9b9a266 100644 (file)
@@ -273,6 +273,11 @@ void free_pid(struct pid *pid)
                         */
                        wake_up_process(ns->child_reaper);
                        break;
+               case PIDNS_HASH_ADDING:
+                       /* Handle a fork failure of the first process */
+                       WARN_ON(ns->child_reaper);
+                       ns->nr_hashed = 0;
+                       /* fall through */
                case 0:
                        schedule_work(&ns->proc_work);
                        break;
index 358a146..98c3b34 100644 (file)
@@ -743,7 +743,10 @@ int create_basic_memory_bitmaps(void)
        struct memory_bitmap *bm1, *bm2;
        int error = 0;
 
-       BUG_ON(forbidden_pages_map || free_pages_map);
+       if (forbidden_pages_map && free_pages_map)
+               return 0;
+       else
+               BUG_ON(forbidden_pages_map || free_pages_map);
 
        bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
        if (!bm1)
index 72e8f4f..957f061 100644 (file)
@@ -39,6 +39,7 @@ static struct snapshot_data {
        char frozen;
        char ready;
        char platform_support;
+       bool free_bitmaps;
 } snapshot_state;
 
 atomic_t snapshot_device_available = ATOMIC_INIT(1);
@@ -82,6 +83,10 @@ static int snapshot_open(struct inode *inode, struct file *filp)
                data->swap = -1;
                data->mode = O_WRONLY;
                error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+               if (!error) {
+                       error = create_basic_memory_bitmaps();
+                       data->free_bitmaps = !error;
+               }
                if (error)
                        pm_notifier_call_chain(PM_POST_RESTORE);
        }
@@ -111,6 +116,8 @@ static int snapshot_release(struct inode *inode, struct file *filp)
                pm_restore_gfp_mask();
                free_basic_memory_bitmaps();
                thaw_processes();
+       } else if (data->free_bitmaps) {
+               free_basic_memory_bitmaps();
        }
        pm_notifier_call_chain(data->mode == O_RDONLY ?
                        PM_POST_HIBERNATION : PM_POST_RESTORE);
@@ -231,6 +238,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
                        break;
                pm_restore_gfp_mask();
                free_basic_memory_bitmaps();
+               data->free_bitmaps = false;
                thaw_processes();
                data->frozen = 0;
                break;
index 269ed93..f813b34 100644 (file)
@@ -32,7 +32,14 @@ EXPORT_SYMBOL(cad_pid);
 #endif
 enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE;
 
-int reboot_default;
+/*
+ * This variable is used privately to keep track of whether or not
+ * reboot_type is still set to its default value (i.e., reboot= hasn't
+ * been set on the command line).  This is needed so that we can
+ * suppress DMI scanning for reboot quirks.  Without it, it's
+ * impossible to override a faulty reboot quirk without recompiling.
+ */
+int reboot_default = 1;
 int reboot_cpu;
 enum reboot_type reboot_type = BOOT_ACPI;
 int reboot_force;
index 11cd136..7c70201 100644 (file)
@@ -4242,7 +4242,7 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
        }
 
        if (!se) {
-               cfs_rq->h_load = rq->avg.load_avg_contrib;
+               cfs_rq->h_load = cfs_rq->runnable_load_avg;
                cfs_rq->last_h_load_update = now;
        }
 
@@ -4823,8 +4823,8 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
                (busiest->load_per_task * SCHED_POWER_SCALE) /
                busiest->group_power;
 
-       if (busiest->avg_load - local->avg_load + scaled_busy_load_per_task >=
-           (scaled_busy_load_per_task * imbn)) {
+       if (busiest->avg_load + scaled_busy_load_per_task >=
+           local->avg_load + (scaled_busy_load_per_task * imbn)) {
                env->imbalance = busiest->load_per_task;
                return;
        }
@@ -4896,7 +4896,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
         * max load less than avg load(as we skip the groups at or below
         * its cpu_power, while calculating max_load..)
         */
-       if (busiest->avg_load < sds->avg_load) {
+       if (busiest->avg_load <= sds->avg_load ||
+           local->avg_load >= sds->avg_load) {
                env->imbalance = 0;
                return fix_small_imbalance(env, sds);
        }
index 53cc09c..d7d498d 100644 (file)
@@ -328,10 +328,19 @@ void irq_enter(void)
 
 static inline void invoke_softirq(void)
 {
-       if (!force_irqthreads)
-               __do_softirq();
-       else
+       if (!force_irqthreads) {
+               /*
+                * We can safely execute softirq on the current stack if
+                * it is the irq stack, because it should be near empty
+                * at this stage. But we have no way to know if the arch
+                * calls irq_exit() on the irq stack. So call softirq
+                * in its own stack to prevent from any overrun on top
+                * of a potentially deep task stack.
+                */
+               do_softirq();
+       } else {
                wakeup_softirqd();
+       }
 }
 
 static inline void tick_irq_exit(void)
index 51c4f34..4431610 100644 (file)
@@ -486,7 +486,52 @@ static struct smp_hotplug_thread watchdog_threads = {
        .unpark                 = watchdog_enable,
 };
 
-static int watchdog_enable_all_cpus(void)
+static void restart_watchdog_hrtimer(void *info)
+{
+       struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
+       int ret;
+
+       /*
+        * No need to cancel and restart hrtimer if it is currently executing
+        * because it will reprogram itself with the new period now.
+        * We should never see it unqueued here because we are running per-cpu
+        * with interrupts disabled.
+        */
+       ret = hrtimer_try_to_cancel(hrtimer);
+       if (ret == 1)
+               hrtimer_start(hrtimer, ns_to_ktime(sample_period),
+                               HRTIMER_MODE_REL_PINNED);
+}
+
+static void update_timers(int cpu)
+{
+       struct call_single_data data = {.func = restart_watchdog_hrtimer};
+       /*
+        * Make sure that perf event counter will adopt to a new
+        * sampling period. Updating the sampling period directly would
+        * be much nicer but we do not have an API for that now so
+        * let's use a big hammer.
+        * Hrtimer will adopt the new period on the next tick but this
+        * might be late already so we have to restart the timer as well.
+        */
+       watchdog_nmi_disable(cpu);
+       __smp_call_function_single(cpu, &data, 1);
+       watchdog_nmi_enable(cpu);
+}
+
+static void update_timers_all_cpus(void)
+{
+       int cpu;
+
+       get_online_cpus();
+       preempt_disable();
+       for_each_online_cpu(cpu)
+               update_timers(cpu);
+       preempt_enable();
+       put_online_cpus();
+}
+
+static int watchdog_enable_all_cpus(bool sample_period_changed)
 {
        int err = 0;
 
@@ -496,6 +541,8 @@ static int watchdog_enable_all_cpus(void)
                        pr_err("Failed to create watchdog threads, disabled\n");
                else
                        watchdog_running = 1;
+       } else if (sample_period_changed) {
+               update_timers_all_cpus();
        }
 
        return err;
@@ -520,13 +567,15 @@ int proc_dowatchdog(struct ctl_table *table, int write,
                    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int err, old_thresh, old_enabled;
+       static DEFINE_MUTEX(watchdog_proc_mutex);
 
+       mutex_lock(&watchdog_proc_mutex);
        old_thresh = ACCESS_ONCE(watchdog_thresh);
        old_enabled = ACCESS_ONCE(watchdog_user_enabled);
 
        err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        if (err || !write)
-               return err;
+               goto out;
 
        set_sample_period();
        /*
@@ -535,7 +584,7 @@ int proc_dowatchdog(struct ctl_table *table, int write,
         * watchdog_*_all_cpus() function takes care of this.
         */
        if (watchdog_user_enabled && watchdog_thresh)
-               err = watchdog_enable_all_cpus();
+               err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh);
        else
                watchdog_disable_all_cpus();
 
@@ -544,7 +593,8 @@ int proc_dowatchdog(struct ctl_table *table, int write,
                watchdog_thresh = old_thresh;
                watchdog_user_enabled = old_enabled;
        }
-
+out:
+       mutex_unlock(&watchdog_proc_mutex);
        return err;
 }
 #endif /* CONFIG_SYSCTL */
@@ -554,5 +604,5 @@ void __init lockup_detector_init(void)
        set_sample_period();
 
        if (watchdog_user_enabled)
-               watchdog_enable_all_cpus();
+               watchdog_enable_all_cpus(false);
 }
index 3f0494c..8499c81 100644 (file)
@@ -14,6 +14,8 @@
 
 const char hex_asc[] = "0123456789abcdef";
 EXPORT_SYMBOL(hex_asc);
+const char hex_asc_upper[] = "0123456789ABCDEF";
+EXPORT_SYMBOL(hex_asc_upper);
 
 /**
  * hex_to_bin - convert a hex digit to its real value
index 9621751..084f7b1 100644 (file)
@@ -592,7 +592,7 @@ static void kobject_release(struct kref *kref)
 {
        struct kobject *kobj = container_of(kref, struct kobject, kref);
 #ifdef CONFIG_DEBUG_KOBJECT_RELEASE
-       pr_debug("kobject: '%s' (%p): %s, parent %p (delayed)\n",
+       pr_info("kobject: '%s' (%p): %s, parent %p (delayed)\n",
                 kobject_name(kobj), kobj, __func__, kobj->parent);
        INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup);
        schedule_delayed_work(&kobj->release, HZ);
@@ -933,10 +933,7 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
 
 bool kobj_ns_current_may_mount(enum kobj_ns_type type)
 {
-       bool may_mount = false;
-
-       if (type == KOBJ_NS_TYPE_NONE)
-               return true;
+       bool may_mount = true;
 
        spin_lock(&kobj_ns_type_lock);
        if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
index 677d036..6f9d434 100644 (file)
@@ -4,6 +4,22 @@
 #ifdef CONFIG_CMPXCHG_LOCKREF
 
 /*
+ * Allow weakly-ordered memory architectures to provide barrier-less
+ * cmpxchg semantics for lockref updates.
+ */
+#ifndef cmpxchg64_relaxed
+# define cmpxchg64_relaxed cmpxchg64
+#endif
+
+/*
+ * Allow architectures to override the default cpu_relax() within CMPXCHG_LOOP.
+ * This is useful for architectures with an expensive cpu_relax().
+ */
+#ifndef arch_mutex_cpu_relax
+# define arch_mutex_cpu_relax() cpu_relax()
+#endif
+
+/*
  * Note that the "cmpxchg()" reloads the "old" value for the
  * failure case.
  */
        while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) {     \
                struct lockref new = old, prev = old;                           \
                CODE                                                            \
-               old.lock_count = cmpxchg64(&lockref->lock_count,                \
-                                          old.lock_count, new.lock_count);     \
+               old.lock_count = cmpxchg64_relaxed(&lockref->lock_count,        \
+                                                  old.lock_count,              \
+                                                  new.lock_count);             \
                if (likely(old.lock_count == prev.lock_count)) {                \
                        SUCCESS;                                                \
                }                                                               \
-               cpu_relax();                                                    \
+               arch_mutex_cpu_relax();                                         \
        }                                                                       \
 } while (0)
 
index 026771a..394838f 100644 (file)
@@ -183,7 +183,7 @@ config MEMORY_HOTPLUG_SPARSE
 config MEMORY_HOTREMOVE
        bool "Allow for memory hot remove"
        select MEMORY_ISOLATION
-       select HAVE_BOOTMEM_INFO_NODE if X86_64
+       select HAVE_BOOTMEM_INFO_NODE if (X86_64 || PPC64)
        depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
        depends on MIGRATION
 
index c9f0a43..5a7d58f 100644 (file)
@@ -204,6 +204,8 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
        struct bio_vec *to, *from;
        unsigned i;
 
+       if (force)
+               goto bounce;
        bio_for_each_segment(from, *bio_orig, i)
                if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q))
                        goto bounce;
index c437893..b5326b1 100644 (file)
@@ -677,6 +677,13 @@ static void isolate_freepages(struct zone *zone,
                                        pfn -= pageblock_nr_pages) {
                unsigned long isolated;
 
+               /*
+                * This can iterate a massively long zone without finding any
+                * suitable migration targets, so periodically check if we need
+                * to schedule.
+                */
+               cond_resched();
+
                if (!pfn_valid(pfn))
                        continue;
 
index afc2daa..4c84678 100644 (file)
@@ -20,8 +20,6 @@ static int hwpoison_inject(void *data, u64 val)
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!hwpoison_filter_enable)
-               goto inject;
        if (!pfn_valid(pfn))
                return -ENXIO;
 
@@ -33,6 +31,9 @@ static int hwpoison_inject(void *data, u64 val)
        if (!get_page_unless_zero(hpage))
                return 0;
 
+       if (!hwpoison_filter_enable)
+               goto inject;
+
        if (!PageLRU(p) && !PageHuge(p))
                shake_page(p, 0);
        /*
index 6975bc8..539eeb9 100644 (file)
@@ -343,10 +343,11 @@ static long madvise_remove(struct vm_area_struct *vma,
  */
 static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
 {
+       struct page *p;
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-       for (; start < end; start += PAGE_SIZE) {
-               struct page *p;
+       for (; start < end; start += PAGE_SIZE <<
+                               compound_order(compound_head(p))) {
                int ret;
 
                ret = get_user_pages_fast(start, 1, 0, &p);
index d5ff3ce..1c52ddb 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/limits.h>
 #include <linux/export.h>
 #include <linux/mutex.h>
+#include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
@@ -160,6 +161,10 @@ struct mem_cgroup_per_zone {
 
        struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
 
+       struct rb_node          tree_node;      /* RB tree node */
+       unsigned long long      usage_in_excess;/* Set to the value by which */
+                                               /* the soft limit is exceeded*/
+       bool                    on_tree;
        struct mem_cgroup       *memcg;         /* Back pointer, we cannot */
                                                /* use container_of        */
 };
@@ -168,6 +173,26 @@ struct mem_cgroup_per_node {
        struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
 };
 
+/*
+ * Cgroups above their limits are maintained in a RB-Tree, independent of
+ * their hierarchy representation
+ */
+
+struct mem_cgroup_tree_per_zone {
+       struct rb_root rb_root;
+       spinlock_t lock;
+};
+
+struct mem_cgroup_tree_per_node {
+       struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
+};
+
+struct mem_cgroup_tree {
+       struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
+};
+
+static struct mem_cgroup_tree soft_limit_tree __read_mostly;
+
 struct mem_cgroup_threshold {
        struct eventfd_ctx *eventfd;
        u64 threshold;
@@ -303,22 +328,6 @@ struct mem_cgroup {
        atomic_t        numainfo_events;
        atomic_t        numainfo_updating;
 #endif
-       /*
-        * Protects soft_contributed transitions.
-        * See mem_cgroup_update_soft_limit
-        */
-       spinlock_t soft_lock;
-
-       /*
-        * If true then this group has increased parents' children_in_excess
-        * when it got over the soft limit.
-        * When a group falls bellow the soft limit, parents' children_in_excess
-        * is decreased and soft_contributed changed to false.
-        */
-       bool soft_contributed;
-
-       /* Number of children that are in soft limit excess */
-       atomic_t children_in_excess;
 
        struct mem_cgroup_per_node *nodeinfo[0];
        /* WARNING: nodeinfo must be the last member here */
@@ -422,6 +431,7 @@ static bool move_file(void)
  * limit reclaim to prevent infinite loops, if they ever occur.
  */
 #define        MEM_CGROUP_MAX_RECLAIM_LOOPS            100
+#define        MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2
 
 enum charge_type {
        MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
@@ -648,6 +658,164 @@ page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page)
        return mem_cgroup_zoneinfo(memcg, nid, zid);
 }
 
+static struct mem_cgroup_tree_per_zone *
+soft_limit_tree_node_zone(int nid, int zid)
+{
+       return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+}
+
+static struct mem_cgroup_tree_per_zone *
+soft_limit_tree_from_page(struct page *page)
+{
+       int nid = page_to_nid(page);
+       int zid = page_zonenum(page);
+
+       return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+}
+
+static void
+__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg,
+                               struct mem_cgroup_per_zone *mz,
+                               struct mem_cgroup_tree_per_zone *mctz,
+                               unsigned long long new_usage_in_excess)
+{
+       struct rb_node **p = &mctz->rb_root.rb_node;
+       struct rb_node *parent = NULL;
+       struct mem_cgroup_per_zone *mz_node;
+
+       if (mz->on_tree)
+               return;
+
+       mz->usage_in_excess = new_usage_in_excess;
+       if (!mz->usage_in_excess)
+               return;
+       while (*p) {
+               parent = *p;
+               mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
+                                       tree_node);
+               if (mz->usage_in_excess < mz_node->usage_in_excess)
+                       p = &(*p)->rb_left;
+               /*
+                * We can't avoid mem cgroups that are over their soft
+                * limit by the same amount
+                */
+               else if (mz->usage_in_excess >= mz_node->usage_in_excess)
+                       p = &(*p)->rb_right;
+       }
+       rb_link_node(&mz->tree_node, parent, p);
+       rb_insert_color(&mz->tree_node, &mctz->rb_root);
+       mz->on_tree = true;
+}
+
+static void
+__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
+                               struct mem_cgroup_per_zone *mz,
+                               struct mem_cgroup_tree_per_zone *mctz)
+{
+       if (!mz->on_tree)
+               return;
+       rb_erase(&mz->tree_node, &mctz->rb_root);
+       mz->on_tree = false;
+}
+
+static void
+mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
+                               struct mem_cgroup_per_zone *mz,
+                               struct mem_cgroup_tree_per_zone *mctz)
+{
+       spin_lock(&mctz->lock);
+       __mem_cgroup_remove_exceeded(memcg, mz, mctz);
+       spin_unlock(&mctz->lock);
+}
+
+
+static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
+{
+       unsigned long long excess;
+       struct mem_cgroup_per_zone *mz;
+       struct mem_cgroup_tree_per_zone *mctz;
+       int nid = page_to_nid(page);
+       int zid = page_zonenum(page);
+       mctz = soft_limit_tree_from_page(page);
+
+       /*
+        * Necessary to update all ancestors when hierarchy is used.
+        * because their event counter is not touched.
+        */
+       for (; memcg; memcg = parent_mem_cgroup(memcg)) {
+               mz = mem_cgroup_zoneinfo(memcg, nid, zid);
+               excess = res_counter_soft_limit_excess(&memcg->res);
+               /*
+                * We have to update the tree if mz is on RB-tree or
+                * mem is over its softlimit.
+                */
+               if (excess || mz->on_tree) {
+                       spin_lock(&mctz->lock);
+                       /* if on-tree, remove it */
+                       if (mz->on_tree)
+                               __mem_cgroup_remove_exceeded(memcg, mz, mctz);
+                       /*
+                        * Insert again. mz->usage_in_excess will be updated.
+                        * If excess is 0, no tree ops.
+                        */
+                       __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess);
+                       spin_unlock(&mctz->lock);
+               }
+       }
+}
+
+static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
+{
+       int node, zone;
+       struct mem_cgroup_per_zone *mz;
+       struct mem_cgroup_tree_per_zone *mctz;
+
+       for_each_node(node) {
+               for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+                       mz = mem_cgroup_zoneinfo(memcg, node, zone);
+                       mctz = soft_limit_tree_node_zone(node, zone);
+                       mem_cgroup_remove_exceeded(memcg, mz, mctz);
+               }
+       }
+}
+
+static struct mem_cgroup_per_zone *
+__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+{
+       struct rb_node *rightmost = NULL;
+       struct mem_cgroup_per_zone *mz;
+
+retry:
+       mz = NULL;
+       rightmost = rb_last(&mctz->rb_root);
+       if (!rightmost)
+               goto done;              /* Nothing to reclaim from */
+
+       mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
+       /*
+        * Remove the node now but someone else can add it back,
+        * we will to add it back at the end of reclaim to its correct
+        * position in the tree.
+        */
+       __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
+       if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
+               !css_tryget(&mz->memcg->css))
+               goto retry;
+done:
+       return mz;
+}
+
+static struct mem_cgroup_per_zone *
+mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+{
+       struct mem_cgroup_per_zone *mz;
+
+       spin_lock(&mctz->lock);
+       mz = __mem_cgroup_largest_soft_limit_node(mctz);
+       spin_unlock(&mctz->lock);
+       return mz;
+}
+
 /*
  * Implementation Note: reading percpu statistics for memcg.
  *
@@ -822,48 +990,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
 }
 
 /*
- * Called from rate-limited memcg_check_events when enough
- * MEM_CGROUP_TARGET_SOFTLIMIT events are accumulated and it makes sure
- * that all the parents up the hierarchy will be notified that this group
- * is in excess or that it is not in excess anymore. mmecg->soft_contributed
- * makes the transition a single action whenever the state flips from one to
- * the other.
- */
-static void mem_cgroup_update_soft_limit(struct mem_cgroup *memcg)
-{
-       unsigned long long excess = res_counter_soft_limit_excess(&memcg->res);
-       struct mem_cgroup *parent = memcg;
-       int delta = 0;
-
-       spin_lock(&memcg->soft_lock);
-       if (excess) {
-               if (!memcg->soft_contributed) {
-                       delta = 1;
-                       memcg->soft_contributed = true;
-               }
-       } else {
-               if (memcg->soft_contributed) {
-                       delta = -1;
-                       memcg->soft_contributed = false;
-               }
-       }
-
-       /*
-        * Necessary to update all ancestors when hierarchy is used
-        * because their event counter is not touched.
-        * We track children even outside the hierarchy for the root
-        * cgroup because tree walk starting at root should visit
-        * all cgroups and we want to prevent from pointless tree
-        * walk if no children is below the limit.
-        */
-       while (delta && (parent = parent_mem_cgroup(parent)))
-               atomic_add(delta, &parent->children_in_excess);
-       if (memcg != root_mem_cgroup && !root_mem_cgroup->use_hierarchy)
-               atomic_add(delta, &root_mem_cgroup->children_in_excess);
-       spin_unlock(&memcg->soft_lock);
-}
-
-/*
  * Check events in order.
  *
  */
@@ -886,7 +1012,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 
                mem_cgroup_threshold(memcg);
                if (unlikely(do_softlimit))
-                       mem_cgroup_update_soft_limit(memcg);
+                       mem_cgroup_update_tree(memcg, page);
 #if MAX_NUMNODES > 1
                if (unlikely(do_numainfo))
                        atomic_inc(&memcg->numainfo_events);
@@ -929,15 +1055,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
        return memcg;
 }
 
-static enum mem_cgroup_filter_t
-mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
-               mem_cgroup_iter_filter cond)
-{
-       if (!cond)
-               return VISIT;
-       return cond(memcg, root);
-}
-
 /*
  * Returns a next (in a pre-order walk) alive memcg (with elevated css
  * ref. count) or NULL if the whole root's subtree has been visited.
@@ -945,7 +1062,7 @@ mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
  * helper function to be used by mem_cgroup_iter
  */
 static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
-               struct mem_cgroup *last_visited, mem_cgroup_iter_filter cond)
+               struct mem_cgroup *last_visited)
 {
        struct cgroup_subsys_state *prev_css, *next_css;
 
@@ -963,31 +1080,11 @@ skip_node:
        if (next_css) {
                struct mem_cgroup *mem = mem_cgroup_from_css(next_css);
 
-               switch (mem_cgroup_filter(mem, root, cond)) {
-               case SKIP:
+               if (css_tryget(&mem->css))
+                       return mem;
+               else {
                        prev_css = next_css;
                        goto skip_node;
-               case SKIP_TREE:
-                       if (mem == root)
-                               return NULL;
-                       /*
-                        * css_rightmost_descendant is not an optimal way to
-                        * skip through a subtree (especially for imbalanced
-                        * trees leaning to right) but that's what we have right
-                        * now. More effective solution would be traversing
-                        * right-up for first non-NULL without calling
-                        * css_next_descendant_pre afterwards.
-                        */
-                       prev_css = css_rightmost_descendant(next_css);
-                       goto skip_node;
-               case VISIT:
-                       if (css_tryget(&mem->css))
-                               return mem;
-                       else {
-                               prev_css = next_css;
-                               goto skip_node;
-                       }
-                       break;
                }
        }
 
@@ -1051,7 +1148,6 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
  * @root: hierarchy root
  * @prev: previously returned memcg, NULL on first invocation
  * @reclaim: cookie for shared reclaim walks, NULL for full walks
- * @cond: filter for visited nodes, NULL for no filter
  *
  * Returns references to children of the hierarchy below @root, or
  * @root itself, or %NULL after a full round-trip.
@@ -1064,18 +1160,15 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
  * divide up the memcgs in the hierarchy among all concurrent
  * reclaimers operating on the same zone and priority.
  */
-struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
                                   struct mem_cgroup *prev,
-                                  struct mem_cgroup_reclaim_cookie *reclaim,
-                                  mem_cgroup_iter_filter cond)
+                                  struct mem_cgroup_reclaim_cookie *reclaim)
 {
        struct mem_cgroup *memcg = NULL;
        struct mem_cgroup *last_visited = NULL;
 
-       if (mem_cgroup_disabled()) {
-               /* first call must return non-NULL, second return NULL */
-               return (struct mem_cgroup *)(unsigned long)!prev;
-       }
+       if (mem_cgroup_disabled())
+               return NULL;
 
        if (!root)
                root = root_mem_cgroup;
@@ -1086,9 +1179,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
        if (!root->use_hierarchy && root != root_mem_cgroup) {
                if (prev)
                        goto out_css_put;
-               if (mem_cgroup_filter(root, root, cond) == VISIT)
-                       return root;
-               return NULL;
+               return root;
        }
 
        rcu_read_lock();
@@ -1111,7 +1202,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
                        last_visited = mem_cgroup_iter_load(iter, root, &seq);
                }
 
-               memcg = __mem_cgroup_iter_next(root, last_visited, cond);
+               memcg = __mem_cgroup_iter_next(root, last_visited);
 
                if (reclaim) {
                        mem_cgroup_iter_update(iter, last_visited, memcg, seq);
@@ -1122,11 +1213,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
                                reclaim->generation = iter->generation;
                }
 
-               /*
-                * We have finished the whole tree walk or no group has been
-                * visited because filter told us to skip the root node.
-                */
-               if (!memcg && (prev || (cond && !last_visited)))
+               if (prev && !memcg)
                        goto out_unlock;
        }
 out_unlock:
@@ -1767,7 +1854,6 @@ static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
        return total;
 }
 
-#if MAX_NUMNODES > 1
 /**
  * test_mem_cgroup_node_reclaimable
  * @memcg: the target memcg
@@ -1790,6 +1876,7 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
        return false;
 
 }
+#if MAX_NUMNODES > 1
 
 /*
  * Always updating the nodemask is not very good - even if we have an empty
@@ -1857,50 +1944,104 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
        return node;
 }
 
+/*
+ * Check all nodes whether it contains reclaimable pages or not.
+ * For quick scan, we make use of scan_nodes. This will allow us to skip
+ * unused nodes. But scan_nodes is lazily updated and may not cotain
+ * enough new information. We need to do double check.
+ */
+static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
+{
+       int nid;
+
+       /*
+        * quick check...making use of scan_node.
+        * We can skip unused nodes.
+        */
+       if (!nodes_empty(memcg->scan_nodes)) {
+               for (nid = first_node(memcg->scan_nodes);
+                    nid < MAX_NUMNODES;
+                    nid = next_node(nid, memcg->scan_nodes)) {
+
+                       if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
+                               return true;
+               }
+       }
+       /*
+        * Check rest of nodes.
+        */
+       for_each_node_state(nid, N_MEMORY) {
+               if (node_isset(nid, memcg->scan_nodes))
+                       continue;
+               if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
+                       return true;
+       }
+       return false;
+}
+
 #else
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
 {
        return 0;
 }
 
-#endif
-
-/*
- * A group is eligible for the soft limit reclaim under the given root
- * hierarchy if
- *     a) it is over its soft limit
- *     b) any parent up the hierarchy is over its soft limit
- *
- * If the given group doesn't have any children over the limit then it
- * doesn't make any sense to iterate its subtree.
- */
-enum mem_cgroup_filter_t
-mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
-               struct mem_cgroup *root)
+static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
 {
-       struct mem_cgroup *parent;
-
-       if (!memcg)
-               memcg = root_mem_cgroup;
-       parent = memcg;
-
-       if (res_counter_soft_limit_excess(&memcg->res))
-               return VISIT;
+       return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
+}
+#endif
 
-       /*
-        * If any parent up to the root in the hierarchy is over its soft limit
-        * then we have to obey and reclaim from this group as well.
-        */
-       while ((parent = parent_mem_cgroup(parent))) {
-               if (res_counter_soft_limit_excess(&parent->res))
-                       return VISIT;
-               if (parent == root)
+static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
+                                  struct zone *zone,
+                                  gfp_t gfp_mask,
+                                  unsigned long *total_scanned)
+{
+       struct mem_cgroup *victim = NULL;
+       int total = 0;
+       int loop = 0;
+       unsigned long excess;
+       unsigned long nr_scanned;
+       struct mem_cgroup_reclaim_cookie reclaim = {
+               .zone = zone,
+               .priority = 0,
+       };
+
+       excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
+
+       while (1) {
+               victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
+               if (!victim) {
+                       loop++;
+                       if (loop >= 2) {
+                               /*
+                                * If we have not been able to reclaim
+                                * anything, it might because there are
+                                * no reclaimable pages under this hierarchy
+                                */
+                               if (!total)
+                                       break;
+                               /*
+                                * We want to do more targeted reclaim.
+                                * excess >> 2 is not to excessive so as to
+                                * reclaim too much, nor too less that we keep
+                                * coming back to reclaim from this cgroup
+                                */
+                               if (total >= (excess >> 2) ||
+                                       (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
+                                       break;
+                       }
+                       continue;
+               }
+               if (!mem_cgroup_reclaimable(victim, false))
+                       continue;
+               total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
+                                                    zone, &nr_scanned);
+               *total_scanned += nr_scanned;
+               if (!res_counter_soft_limit_excess(&root_memcg->res))
                        break;
        }
-
-       if (!atomic_read(&memcg->children_in_excess))
-               return SKIP_TREE;
-       return SKIP;
+       mem_cgroup_iter_break(root_memcg, victim);
+       return total;
 }
 
 static DEFINE_SPINLOCK(memcg_oom_lock);
@@ -2812,7 +2953,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
        unlock_page_cgroup(pc);
 
        /*
-        * "charge_statistics" updated event counter.
+        * "charge_statistics" updated event counter. Then, check it.
+        * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
+        * if they exceeds softlimit.
         */
        memcg_check_events(memcg, page);
 }
@@ -4647,6 +4790,98 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
        return ret;
 }
 
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+                                           gfp_t gfp_mask,
+                                           unsigned long *total_scanned)
+{
+       unsigned long nr_reclaimed = 0;
+       struct mem_cgroup_per_zone *mz, *next_mz = NULL;
+       unsigned long reclaimed;
+       int loop = 0;
+       struct mem_cgroup_tree_per_zone *mctz;
+       unsigned long long excess;
+       unsigned long nr_scanned;
+
+       if (order > 0)
+               return 0;
+
+       mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone));
+       /*
+        * This loop can run a while, specially if mem_cgroup's continuously
+        * keep exceeding their soft limit and putting the system under
+        * pressure
+        */
+       do {
+               if (next_mz)
+                       mz = next_mz;
+               else
+                       mz = mem_cgroup_largest_soft_limit_node(mctz);
+               if (!mz)
+                       break;
+
+               nr_scanned = 0;
+               reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone,
+                                                   gfp_mask, &nr_scanned);
+               nr_reclaimed += reclaimed;
+               *total_scanned += nr_scanned;
+               spin_lock(&mctz->lock);
+
+               /*
+                * If we failed to reclaim anything from this memory cgroup
+                * it is time to move on to the next cgroup
+                */
+               next_mz = NULL;
+               if (!reclaimed) {
+                       do {
+                               /*
+                                * Loop until we find yet another one.
+                                *
+                                * By the time we get the soft_limit lock
+                                * again, someone might have aded the
+                                * group back on the RB tree. Iterate to
+                                * make sure we get a different mem.
+                                * mem_cgroup_largest_soft_limit_node returns
+                                * NULL if no other cgroup is present on
+                                * the tree
+                                */
+                               next_mz =
+                               __mem_cgroup_largest_soft_limit_node(mctz);
+                               if (next_mz == mz)
+                                       css_put(&next_mz->memcg->css);
+                               else /* next_mz == NULL or other memcg */
+                                       break;
+                       } while (1);
+               }
+               __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
+               excess = res_counter_soft_limit_excess(&mz->memcg->res);
+               /*
+                * One school of thought says that we should not add
+                * back the node to the tree if reclaim returns 0.
+                * But our reclaim could return 0, simply because due
+                * to priority we are exposing a smaller subset of
+                * memory to reclaim from. Consider this as a longer
+                * term TODO.
+                */
+               /* If excess == 0, no tree ops */
+               __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess);
+               spin_unlock(&mctz->lock);
+               css_put(&mz->memcg->css);
+               loop++;
+               /*
+                * Could not reclaim anything and there are no more
+                * mem cgroups to try or we seem to be looping without
+                * reclaiming anything.
+                */
+               if (!nr_reclaimed &&
+                       (next_mz == NULL ||
+                       loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
+                       break;
+       } while (!nr_reclaimed);
+       if (next_mz)
+               css_put(&next_mz->memcg->css);
+       return nr_reclaimed;
+}
+
 /**
  * mem_cgroup_force_empty_list - clears LRU of a group
  * @memcg: group to clear
@@ -5911,6 +6146,8 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
        for (zone = 0; zone < MAX_NR_ZONES; zone++) {
                mz = &pn->zoneinfo[zone];
                lruvec_init(&mz->lruvec);
+               mz->usage_in_excess = 0;
+               mz->on_tree = false;
                mz->memcg = memcg;
        }
        memcg->nodeinfo[node] = pn;
@@ -5966,6 +6203,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
        int node;
        size_t size = memcg_size();
 
+       mem_cgroup_remove_from_trees(memcg);
        free_css_id(&mem_cgroup_subsys, &memcg->css);
 
        for_each_node(node)
@@ -6002,6 +6240,29 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
 }
 EXPORT_SYMBOL(parent_mem_cgroup);
 
+static void __init mem_cgroup_soft_limit_tree_init(void)
+{
+       struct mem_cgroup_tree_per_node *rtpn;
+       struct mem_cgroup_tree_per_zone *rtpz;
+       int tmp, node, zone;
+
+       for_each_node(node) {
+               tmp = node;
+               if (!node_state(node, N_NORMAL_MEMORY))
+                       tmp = -1;
+               rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
+               BUG_ON(!rtpn);
+
+               soft_limit_tree.rb_tree_per_node[node] = rtpn;
+
+               for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+                       rtpz = &rtpn->rb_tree_per_zone[zone];
+                       rtpz->rb_root = RB_ROOT;
+                       spin_lock_init(&rtpz->lock);
+               }
+       }
+}
+
 static struct cgroup_subsys_state * __ref
 mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 {
@@ -6031,7 +6292,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
        mutex_init(&memcg->thresholds_lock);
        spin_lock_init(&memcg->move_lock);
        vmpressure_init(&memcg->vmpressure);
-       spin_lock_init(&memcg->soft_lock);
 
        return &memcg->css;
 
@@ -6109,13 +6369,6 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
 
        mem_cgroup_invalidate_reclaim_iterators(memcg);
        mem_cgroup_reparent_charges(memcg);
-       if (memcg->soft_contributed) {
-               while ((memcg = parent_mem_cgroup(memcg)))
-                       atomic_dec(&memcg->children_in_excess);
-
-               if (memcg != root_mem_cgroup && !root_mem_cgroup->use_hierarchy)
-                       atomic_dec(&root_mem_cgroup->children_in_excess);
-       }
        mem_cgroup_destroy_all_caches(memcg);
        vmpressure_cleanup(&memcg->vmpressure);
 }
@@ -6790,6 +7043,7 @@ static int __init mem_cgroup_init(void)
 {
        hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
        enable_swap_cgroup();
+       mem_cgroup_soft_limit_tree_init();
        memcg_stock_init();
        return 0;
 }
index 947ed54..bf3351b 100644 (file)
@@ -1114,8 +1114,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
                         * shake_page could have turned it free.
                         */
                        if (is_free_buddy_page(p)) {
-                               action_result(pfn, "free buddy, 2nd try",
-                                               DELAYED);
+                               if (flags & MF_COUNT_INCREASED)
+                                       action_result(pfn, "free buddy", DELAYED);
+                               else
+                                       action_result(pfn, "free buddy, 2nd try", DELAYED);
                                return 0;
                        }
                        action_result(pfn, "non LRU", IGNORED);
@@ -1349,7 +1351,7 @@ int unpoison_memory(unsigned long pfn)
         * worked by memory_failure() and the page lock is not held yet.
         * In such case, we yield to memory_failure() and make unpoison fail.
         */
-       if (PageTransHuge(page)) {
+       if (!PageHuge(page) && PageTransHuge(page)) {
                pr_info("MCE: Memory failure is now running on %#lx\n", pfn);
                        return 0;
        }
index 9c8d5f5..a26bccd 100644 (file)
@@ -107,7 +107,7 @@ void putback_movable_pages(struct list_head *l)
                list_del(&page->lru);
                dec_zone_page_state(page, NR_ISOLATED_ANON +
                                page_is_file_cache(page));
-               if (unlikely(balloon_page_movable(page)))
+               if (unlikely(isolated_balloon_page(page)))
                        balloon_page_putback(page);
                else
                        putback_lru_page(page);
index d638026..d480cd6 100644 (file)
@@ -379,10 +379,14 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
 
        /*
         * Initialize pte walk starting at the already pinned page where we
-        * are sure that there is a pte.
+        * are sure that there is a pte, as it was pinned under the same
+        * mmap_sem write op.
         */
        pte = get_locked_pte(vma->vm_mm, start, &ptl);
-       end = min(end, pmd_addr_end(start, end));
+       /* Make sure we do not cross the page table boundary */
+       end = pgd_addr_end(start, end);
+       end = pud_addr_end(start, end);
+       end = pmd_addr_end(start, end);
 
        /* The page next to the pinned page is the first we will try to get */
        start += PAGE_SIZE;
@@ -736,6 +740,7 @@ static int do_mlockall(int flags)
 
                /* Ignore errors */
                mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
+               cond_resched();
        }
 out:
        return 0;
index 0ee638f..dd886fa 100644 (file)
@@ -6366,10 +6366,6 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
                list_del(&page->lru);
                rmv_page_order(page);
                zone->free_area[order].nr_free--;
-#ifdef CONFIG_HIGHMEM
-               if (PageHighMem(page))
-                       totalhigh_pages -= 1 << order;
-#endif
                for (i = 0; i < (1 << order); i++)
                        SetPageReserved((page+i));
                pfn += (1 << order);
index a344327..e2e98af 100644 (file)
@@ -56,6 +56,7 @@ static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name,
                        continue;
                }
 
+#if !defined(CONFIG_SLUB) || !defined(CONFIG_SLUB_DEBUG_ON)
                /*
                 * For simplicity, we won't check this in the list of memcg
                 * caches. We have control over memcg naming, and if there
@@ -69,6 +70,7 @@ static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name,
                        s = NULL;
                        return -EINVAL;
                }
+#endif
        }
 
        WARN_ON(strchr(name, ' '));     /* It confuses parsers */
index 8ed1b77..53f2f82 100644 (file)
@@ -48,6 +48,7 @@
 #include <asm/div64.h>
 
 #include <linux/swapops.h>
+#include <linux/balloon_compaction.h>
 
 #include "internal.h"
 
@@ -139,23 +140,11 @@ static bool global_reclaim(struct scan_control *sc)
 {
        return !sc->target_mem_cgroup;
 }
-
-static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
-{
-       struct mem_cgroup *root = sc->target_mem_cgroup;
-       return !mem_cgroup_disabled() &&
-               mem_cgroup_soft_reclaim_eligible(root, root) != SKIP_TREE;
-}
 #else
 static bool global_reclaim(struct scan_control *sc)
 {
        return true;
 }
-
-static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
-{
-       return false;
-}
 #endif
 
 unsigned long zone_reclaimable_pages(struct zone *zone)
@@ -1125,7 +1114,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
        LIST_HEAD(clean_pages);
 
        list_for_each_entry_safe(page, next, page_list, lru) {
-               if (page_is_file_cache(page) && !PageDirty(page)) {
+               if (page_is_file_cache(page) && !PageDirty(page) &&
+                   !isolated_balloon_page(page)) {
                        ClearPageActive(page);
                        list_move(&page->lru, &clean_pages);
                }
@@ -2176,11 +2166,9 @@ static inline bool should_continue_reclaim(struct zone *zone,
        }
 }
 
-static int
-__shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
+static void shrink_zone(struct zone *zone, struct scan_control *sc)
 {
        unsigned long nr_reclaimed, nr_scanned;
-       int groups_scanned = 0;
 
        do {
                struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2188,17 +2176,15 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
                        .zone = zone,
                        .priority = sc->priority,
                };
-               struct mem_cgroup *memcg = NULL;
-               mem_cgroup_iter_filter filter = (soft_reclaim) ?
-                       mem_cgroup_soft_reclaim_eligible : NULL;
+               struct mem_cgroup *memcg;
 
                nr_reclaimed = sc->nr_reclaimed;
                nr_scanned = sc->nr_scanned;
 
-               while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) {
+               memcg = mem_cgroup_iter(root, NULL, &reclaim);
+               do {
                        struct lruvec *lruvec;
 
-                       groups_scanned++;
                        lruvec = mem_cgroup_zone_lruvec(zone, memcg);
 
                        shrink_lruvec(lruvec, sc);
@@ -2218,7 +2204,8 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
                                mem_cgroup_iter_break(root, memcg);
                                break;
                        }
-               }
+                       memcg = mem_cgroup_iter(root, memcg, &reclaim);
+               } while (memcg);
 
                vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
                           sc->nr_scanned - nr_scanned,
@@ -2226,37 +2213,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
 
        } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
                                         sc->nr_scanned - nr_scanned, sc));
-
-       return groups_scanned;
-}
-
-
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
-{
-       bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc);
-       unsigned long nr_scanned = sc->nr_scanned;
-       int scanned_groups;
-
-       scanned_groups = __shrink_zone(zone, sc, do_soft_reclaim);
-       /*
-        * memcg iterator might race with other reclaimer or start from
-        * a incomplete tree walk so the tree walk in __shrink_zone
-        * might have missed groups that are above the soft limit. Try
-        * another loop to catch up with others. Do it just once to
-        * prevent from reclaim latencies when other reclaimers always
-        * preempt this one.
-        */
-       if (do_soft_reclaim && !scanned_groups)
-               __shrink_zone(zone, sc, do_soft_reclaim);
-
-       /*
-        * No group is over the soft limit or those that are do not have
-        * pages in the zone we are reclaiming so we have to reclaim everybody
-        */
-       if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) {
-               __shrink_zone(zone, sc, false);
-               return;
-       }
 }
 
 /* Returns true if compaction should go ahead for a high-order request */
@@ -2320,6 +2276,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 {
        struct zoneref *z;
        struct zone *zone;
+       unsigned long nr_soft_reclaimed;
+       unsigned long nr_soft_scanned;
        bool aborted_reclaim = false;
 
        /*
@@ -2359,6 +2317,18 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                                        continue;
                                }
                        }
+                       /*
+                        * This steals pages from memory cgroups over softlimit
+                        * and returns the number of reclaimed pages and
+                        * scanned pages. This works for global memory pressure
+                        * and balancing, not for a memcg's limit.
+                        */
+                       nr_soft_scanned = 0;
+                       nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
+                                               sc->order, sc->gfp_mask,
+                                               &nr_soft_scanned);
+                       sc->nr_reclaimed += nr_soft_reclaimed;
+                       sc->nr_scanned += nr_soft_scanned;
                        /* need some check for avoid more shrink_zone() */
                }
 
@@ -2952,6 +2922,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 {
        int i;
        int end_zone = 0;       /* Inclusive.  0 = ZONE_DMA */
+       unsigned long nr_soft_reclaimed;
+       unsigned long nr_soft_scanned;
        struct scan_control sc = {
                .gfp_mask = GFP_KERNEL,
                .priority = DEF_PRIORITY,
@@ -3066,6 +3038,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 
                        sc.nr_scanned = 0;
 
+                       nr_soft_scanned = 0;
+                       /*
+                        * Call soft limit reclaim before calling shrink_zone.
+                        */
+                       nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
+                                                       order, sc.gfp_mask,
+                                                       &nr_soft_scanned);
+                       sc.nr_reclaimed += nr_soft_reclaimed;
+
                        /*
                         * There should be no need to raise the scanning
                         * priority if enough pages are already being scanned
index 1eb05d8..3ed6162 100644 (file)
 static unsigned int mrp_join_time __read_mostly = 200;
 module_param(mrp_join_time, uint, 0644);
 MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)");
+
+static unsigned int mrp_periodic_time __read_mostly = 1000;
+module_param(mrp_periodic_time, uint, 0644);
+MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)");
+
 MODULE_LICENSE("GPL");
 
 static const u8
@@ -595,6 +600,24 @@ static void mrp_join_timer(unsigned long data)
        mrp_join_timer_arm(app);
 }
 
+static void mrp_periodic_timer_arm(struct mrp_applicant *app)
+{
+       mod_timer(&app->periodic_timer,
+                 jiffies + msecs_to_jiffies(mrp_periodic_time));
+}
+
+static void mrp_periodic_timer(unsigned long data)
+{
+       struct mrp_applicant *app = (struct mrp_applicant *)data;
+
+       spin_lock(&app->lock);
+       mrp_mad_event(app, MRP_EVENT_PERIODIC);
+       mrp_pdu_queue(app);
+       spin_unlock(&app->lock);
+
+       mrp_periodic_timer_arm(app);
+}
+
 static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset)
 {
        __be16 endmark;
@@ -845,6 +868,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
        rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
        setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app);
        mrp_join_timer_arm(app);
+       setup_timer(&app->periodic_timer, mrp_periodic_timer,
+                   (unsigned long)app);
+       mrp_periodic_timer_arm(app);
        return 0;
 
 err3:
@@ -870,6 +896,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
         * all pending messages before the applicant is gone.
         */
        del_timer_sync(&app->join_timer);
+       del_timer_sync(&app->periodic_timer);
 
        spin_lock_bh(&app->lock);
        mrp_mad_event(app, MRP_EVENT_TX);
index 634deba..fb7356f 100644 (file)
@@ -1146,7 +1146,11 @@ int hci_dev_open(__u16 dev)
                goto done;
        }
 
-       if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) {
+       /* Check for rfkill but allow the HCI setup stage to proceed
+        * (which in itself doesn't cause any RF activity).
+        */
+       if (test_bit(HCI_RFKILLED, &hdev->dev_flags) &&
+           !test_bit(HCI_SETUP, &hdev->dev_flags)) {
                ret = -ERFKILL;
                goto done;
        }
@@ -1566,10 +1570,13 @@ static int hci_rfkill_set_block(void *data, bool blocked)
 
        BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked);
 
-       if (!blocked)
-               return 0;
-
-       hci_dev_do_close(hdev);
+       if (blocked) {
+               set_bit(HCI_RFKILLED, &hdev->dev_flags);
+               if (!test_bit(HCI_SETUP, &hdev->dev_flags))
+                       hci_dev_do_close(hdev);
+       } else {
+               clear_bit(HCI_RFKILLED, &hdev->dev_flags);
+       }
 
        return 0;
 }
@@ -1591,9 +1598,13 @@ static void hci_power_on(struct work_struct *work)
                return;
        }
 
-       if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags))
+       if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) {
+               clear_bit(HCI_AUTO_OFF, &hdev->dev_flags);
+               hci_dev_do_close(hdev);
+       } else if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
                queue_delayed_work(hdev->req_workqueue, &hdev->power_off,
                                   HCI_AUTO_OFF_TIMEOUT);
+       }
 
        if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags))
                mgmt_index_added(hdev);
@@ -2209,6 +2220,9 @@ int hci_register_dev(struct hci_dev *hdev)
                }
        }
 
+       if (hdev->rfkill && rfkill_blocked(hdev->rfkill))
+               set_bit(HCI_RFKILLED, &hdev->dev_flags);
+
        set_bit(HCI_SETUP, &hdev->dev_flags);
 
        if (hdev->dev_type != HCI_AMP)
index 94aab73..8db3e89 100644 (file)
@@ -3557,7 +3557,11 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
        cp.handle = cpu_to_le16(conn->handle);
 
        if (ltk->authenticated)
-               conn->sec_level = BT_SECURITY_HIGH;
+               conn->pending_sec_level = BT_SECURITY_HIGH;
+       else
+               conn->pending_sec_level = BT_SECURITY_MEDIUM;
+
+       conn->enc_key_size = ltk->enc_size;
 
        hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp);
 
index b3bb7bc..63fa111 100644 (file)
@@ -3755,6 +3755,13 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
 
        sk = chan->sk;
 
+       /* For certain devices (ex: HID mouse), support for authentication,
+        * pairing and bonding is optional. For such devices, inorder to avoid
+        * the ACL alive for too long after L2CAP disconnection, reset the ACL
+        * disc_timeout back to HCI_DISCONN_TIMEOUT during L2CAP connect.
+        */
+       conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
+
        bacpy(&bt_sk(sk)->src, conn->src);
        bacpy(&bt_sk(sk)->dst, conn->dst);
        chan->psm  = psm;
index 6d126fa..84fcf9f 100644 (file)
@@ -569,7 +569,6 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb)
 static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
 {
        struct rfcomm_dev *dev = dlc->owner;
-       struct tty_struct *tty;
        if (!dev)
                return;
 
@@ -581,38 +580,8 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
                            DPM_ORDER_DEV_AFTER_PARENT);
 
                wake_up_interruptible(&dev->port.open_wait);
-       } else if (dlc->state == BT_CLOSED) {
-               tty = tty_port_tty_get(&dev->port);
-               if (!tty) {
-                       if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) {
-                               /* Drop DLC lock here to avoid deadlock
-                                * 1. rfcomm_dev_get will take rfcomm_dev_lock
-                                *    but in rfcomm_dev_add there's lock order:
-                                *    rfcomm_dev_lock -> dlc lock
-                                * 2. tty_port_put will deadlock if it's
-                                *    the last reference
-                                *
-                                * FIXME: when we release the lock anything
-                                * could happen to dev, even its destruction
-                                */
-                               rfcomm_dlc_unlock(dlc);
-                               if (rfcomm_dev_get(dev->id) == NULL) {
-                                       rfcomm_dlc_lock(dlc);
-                                       return;
-                               }
-
-                               if (!test_and_set_bit(RFCOMM_TTY_RELEASED,
-                                                     &dev->flags))
-                                       tty_port_put(&dev->port);
-
-                               tty_port_put(&dev->port);
-                               rfcomm_dlc_lock(dlc);
-                       }
-               } else {
-                       tty_hangup(tty);
-                       tty_kref_put(tty);
-               }
-       }
+       } else if (dlc->state == BT_CLOSED)
+               tty_port_tty_hangup(&dev->port, false);
 }
 
 static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig)
index 5c713f2..65f829c 100644 (file)
@@ -5247,10 +5247,12 @@ static int dev_new_index(struct net *net)
 
 /* Delayed registration/unregisteration */
 static LIST_HEAD(net_todo_list);
+static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
 
 static void net_set_todo(struct net_device *dev)
 {
        list_add_tail(&dev->todo_list, &net_todo_list);
+       dev_net(dev)->dev_unreg_count++;
 }
 
 static void rollback_registered_many(struct list_head *head)
@@ -5918,6 +5920,12 @@ void netdev_run_todo(void)
                if (dev->destructor)
                        dev->destructor(dev);
 
+               /* Report a network device has been unregistered */
+               rtnl_lock();
+               dev_net(dev)->dev_unreg_count--;
+               __rtnl_unlock();
+               wake_up(&netdev_unregistering_wq);
+
                /* Free network device */
                kobject_put(&dev->dev.kobj);
        }
@@ -6603,6 +6611,34 @@ static void __net_exit default_device_exit(struct net *net)
        rtnl_unlock();
 }
 
+static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
+{
+       /* Return with the rtnl_lock held when there are no network
+        * devices unregistering in any network namespace in net_list.
+        */
+       struct net *net;
+       bool unregistering;
+       DEFINE_WAIT(wait);
+
+       for (;;) {
+               prepare_to_wait(&netdev_unregistering_wq, &wait,
+                               TASK_UNINTERRUPTIBLE);
+               unregistering = false;
+               rtnl_lock();
+               list_for_each_entry(net, net_list, exit_list) {
+                       if (net->dev_unreg_count > 0) {
+                               unregistering = true;
+                               break;
+                       }
+               }
+               if (!unregistering)
+                       break;
+               __rtnl_unlock();
+               schedule();
+       }
+       finish_wait(&netdev_unregistering_wq, &wait);
+}
+
 static void __net_exit default_device_exit_batch(struct list_head *net_list)
 {
        /* At exit all network devices most be removed from a network
@@ -6614,7 +6650,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
        struct net *net;
        LIST_HEAD(dev_kill_list);
 
-       rtnl_lock();
+       /* To prevent network device cleanup code from dereferencing
+        * loopback devices or network devices that have been freed
+        * wait here for all pending unregistrations to complete,
+        * before unregistring the loopback device and allowing the
+        * network namespace be freed.
+        *
+        * The netdev todo list containing all network devices
+        * unregistrations that happen in default_device_exit_batch
+        * will run in the rtnl_unlock() at the end of
+        * default_device_exit_batch.
+        */
+       rtnl_lock_unregistering(net_list);
        list_for_each_entry(net, net_list, exit_list) {
                for_each_netdev_reverse(net, dev) {
                        if (dev->rtnl_link_ops)
index 1929af8..8d7d0dd 100644 (file)
@@ -154,8 +154,8 @@ ipv6:
        if (poff >= 0) {
                __be32 *ports, _ports;
 
-               nhoff += poff;
-               ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports);
+               ports = skb_header_pointer(skb, nhoff + poff,
+                                          sizeof(_ports), &_ports);
                if (ports)
                        flow->ports = *ports;
        }
index 6a2f13c..3f1ec15 100644 (file)
 
 #include <net/secure_seq.h>
 
-static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
+#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
 
-void net_secret_init(void)
+static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
+
+static void net_secret_init(void)
 {
-       get_random_bytes(net_secret, sizeof(net_secret));
+       u32 tmp;
+       int i;
+
+       if (likely(net_secret[0]))
+               return;
+
+       for (i = NET_SECRET_SIZE; i > 0;) {
+               do {
+                       get_random_bytes(&tmp, sizeof(tmp));
+               } while (!tmp);
+               cmpxchg(&net_secret[--i], 0, tmp);
+       }
 }
 
 #ifdef CONFIG_INET
@@ -42,6 +55,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
        u32 hash[MD5_DIGEST_WORDS];
        u32 i;
 
+       net_secret_init();
        memcpy(hash, saddr, 16);
        for (i = 0; i < 4; i++)
                secret[i] = net_secret[i] + (__force u32)daddr[i];
@@ -63,6 +77,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
        u32 hash[MD5_DIGEST_WORDS];
        u32 i;
 
+       net_secret_init();
        memcpy(hash, saddr, 16);
        for (i = 0; i < 4; i++)
                secret[i] = net_secret[i] + (__force u32) daddr[i];
@@ -82,6 +97,7 @@ __u32 secure_ip_id(__be32 daddr)
 {
        u32 hash[MD5_DIGEST_WORDS];
 
+       net_secret_init();
        hash[0] = (__force __u32) daddr;
        hash[1] = net_secret[13];
        hash[2] = net_secret[14];
@@ -96,6 +112,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4])
 {
        __u32 hash[4];
 
+       net_secret_init();
        memcpy(hash, daddr, 16);
        md5_transform(hash, net_secret);
 
@@ -107,6 +124,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 {
        u32 hash[MD5_DIGEST_WORDS];
 
+       net_secret_init();
        hash[0] = (__force u32)saddr;
        hash[1] = (__force u32)daddr;
        hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -121,6 +139,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
 {
        u32 hash[MD5_DIGEST_WORDS];
 
+       net_secret_init();
        hash[0] = (__force u32)saddr;
        hash[1] = (__force u32)daddr;
        hash[2] = (__force u32)dport ^ net_secret[14];
@@ -140,6 +159,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
        u32 hash[MD5_DIGEST_WORDS];
        u64 seq;
 
+       net_secret_init();
        hash[0] = (__force u32)saddr;
        hash[1] = (__force u32)daddr;
        hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -164,6 +184,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
        u64 seq;
        u32 i;
 
+       net_secret_init();
        memcpy(hash, saddr, 16);
        for (i = 0; i < 4; i++)
                secret[i] = net_secret[i] + daddr[i];
index 7a1874b..cfeb85c 100644 (file)
@@ -263,10 +263,8 @@ void build_ehash_secret(void)
                get_random_bytes(&rnd, sizeof(rnd));
        } while (rnd == 0);
 
-       if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) {
+       if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0)
                get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
-               net_secret_init();
-       }
 }
 EXPORT_SYMBOL(build_ehash_secret);
 
index dace87f..7defdc9 100644 (file)
@@ -736,7 +736,7 @@ static void igmp_gq_timer_expire(unsigned long data)
 
        in_dev->mr_gq_running = 0;
        igmpv3_send_report(in_dev, NULL);
-       __in_dev_put(in_dev);
+       in_dev_put(in_dev);
 }
 
 static void igmp_ifc_timer_expire(unsigned long data)
@@ -749,7 +749,7 @@ static void igmp_ifc_timer_expire(unsigned long data)
                igmp_ifc_start_timer(in_dev,
                                     unsolicited_report_interval(in_dev));
        }
-       __in_dev_put(in_dev);
+       in_dev_put(in_dev);
 }
 
 static void igmp_ifc_event(struct in_device *in_dev)
index ac9fabe..63a6d6d 100644 (file)
@@ -623,6 +623,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
                        tunnel->err_count = 0;
        }
 
+       tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
        ttl = tnl_params->ttl;
        if (ttl == 0) {
                if (skb->protocol == htons(ETH_P_IP))
@@ -641,18 +642,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 
        max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
                        + rt->dst.header_len;
-       if (max_headroom > dev->needed_headroom) {
+       if (max_headroom > dev->needed_headroom)
                dev->needed_headroom = max_headroom;
-               if (skb_cow_head(skb, dev->needed_headroom)) {
-                       dev->stats.tx_dropped++;
-                       dev_kfree_skb(skb);
-                       return;
-               }
+
+       if (skb_cow_head(skb, dev->needed_headroom)) {
+               dev->stats.tx_dropped++;
+               dev_kfree_skb(skb);
+               return;
        }
 
        err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
-                           ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df,
-                           !net_eq(tunnel->net, dev_net(dev)));
+                           tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
        iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
 
        return;
@@ -853,8 +853,10 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
        /* FB netdevice is special: we have one, and only one per netns.
         * Allowing to move it to another netns is clearly unsafe.
         */
-       if (!IS_ERR(itn->fb_tunnel_dev))
+       if (!IS_ERR(itn->fb_tunnel_dev)) {
                itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+               ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
+       }
        rtnl_unlock();
 
        return PTR_RET(itn->fb_tunnel_dev);
@@ -884,8 +886,6 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
                        if (!net_eq(dev_net(t->dev), net))
                                unregister_netdevice_queue(t->dev, head);
        }
-       if (itn->fb_tunnel_dev)
-               unregister_netdevice_queue(itn->fb_tunnel_dev, head);
 }
 
 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
index d6c856b..c31e3ad 100644 (file)
@@ -61,7 +61,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
        memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 
        /* Push down and install the IP header. */
-       __skb_push(skb, sizeof(struct iphdr));
+       skb_push(skb, sizeof(struct iphdr));
        skb_reset_network_header(skb);
 
        iph = ip_hdr(skb);
index 67e17dc..b6346bf 100644 (file)
@@ -267,7 +267,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
        if (th == NULL)
                return NF_DROP;
 
-       synproxy_parse_options(skb, par->thoff, th, &opts);
+       if (!synproxy_parse_options(skb, par->thoff, th, &opts))
+               return NF_DROP;
 
        if (th->syn && !(th->ack || th->fin || th->rst)) {
                /* Initial SYN from client */
@@ -350,7 +351,8 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
 
                /* fall through */
        case TCP_CONNTRACK_SYN_SENT:
-               synproxy_parse_options(skb, thoff, th, &opts);
+               if (!synproxy_parse_options(skb, thoff, th, &opts))
+                       return NF_DROP;
 
                if (!th->syn && th->ack &&
                    CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
@@ -373,7 +375,9 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
                if (!th->syn || !th->ack)
                        break;
 
-               synproxy_parse_options(skb, thoff, th, &opts);
+               if (!synproxy_parse_options(skb, thoff, th, &opts))
+                       return NF_DROP;
+
                if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
                        synproxy->tsoff = opts.tsval - synproxy->its;
 
index bfec521..193db03 100644 (file)
@@ -218,8 +218,10 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
 
        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
                ipv4_sk_update_pmtu(skb, sk, info);
-       else if (type == ICMP_REDIRECT)
+       else if (type == ICMP_REDIRECT) {
                ipv4_sk_redirect(skb, sk);
+               return;
+       }
 
        /* Report error on raw socket, if:
           1. User requested ip_recverr.
index 7c83cb8..e6bb825 100644 (file)
@@ -895,8 +895,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 
        skb_orphan(skb);
        skb->sk = sk;
-       skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ?
-                         tcp_wfree : sock_wfree;
+       skb->destructor = tcp_wfree;
        atomic_add(skb->truesize, &sk->sk_wmem_alloc);
 
        /* Build TCP header and checksum it. */
@@ -1840,7 +1839,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
        while ((skb = tcp_send_head(sk))) {
                unsigned int limit;
 
-
                tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
                BUG_ON(!tso_segs);
 
@@ -1869,13 +1867,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
                                break;
                }
 
-               /* TSQ : sk_wmem_alloc accounts skb truesize,
-                * including skb overhead. But thats OK.
+               /* TCP Small Queues :
+                * Control number of packets in qdisc/devices to two packets / or ~1 ms.
+                * This allows for :
+                *  - better RTT estimation and ACK scheduling
+                *  - faster recovery
+                *  - high rates
                 */
-               if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) {
+               limit = max(skb->truesize, sk->sk_pacing_rate >> 10);
+
+               if (atomic_read(&sk->sk_wmem_alloc) > limit) {
                        set_bit(TSQ_THROTTLED, &tp->tsq_flags);
                        break;
                }
+
                limit = mss_now;
                if (tso_segs > 1 && !tcp_urg_mode(tp))
                        limit = tcp_mss_split_point(sk, skb, mss_now,
index 74d2c95..0ca44df 100644 (file)
@@ -658,7 +658,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
                break;
        case ICMP_REDIRECT:
                ipv4_sk_redirect(skb, sk);
-               break;
+               goto out;
        }
 
        /*
index d6ff126..cd3fb30 100644 (file)
@@ -1499,6 +1499,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
        return false;
 }
 
+/* Compares an address/prefix_len with addresses on device @dev.
+ * If one is found it returns true.
+ */
+bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
+       const unsigned int prefix_len, struct net_device *dev)
+{
+       struct inet6_dev *idev;
+       struct inet6_ifaddr *ifa;
+       bool ret = false;
+
+       rcu_read_lock();
+       idev = __in6_dev_get(dev);
+       if (idev) {
+               read_lock_bh(&idev->lock);
+               list_for_each_entry(ifa, &idev->addr_list, if_list) {
+                       ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
+                       if (ret)
+                               break;
+               }
+               read_unlock_bh(&idev->lock);
+       }
+       rcu_read_unlock();
+
+       return ret;
+}
+EXPORT_SYMBOL(ipv6_chk_custom_prefix);
+
 int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
 {
        struct inet6_dev *idev;
@@ -2193,43 +2220,21 @@ ok:
                        else
                                stored_lft = 0;
                        if (!update_lft && !create && stored_lft) {
-                               if (valid_lft > MIN_VALID_LIFETIME ||
-                                   valid_lft > stored_lft)
-                                       update_lft = 1;
-                               else if (stored_lft <= MIN_VALID_LIFETIME) {
-                                       /* valid_lft <= stored_lft is always true */
-                                       /*
-                                        * RFC 4862 Section 5.5.3e:
-                                        * "Note that the preferred lifetime of
-                                        *  the corresponding address is always
-                                        *  reset to the Preferred Lifetime in
-                                        *  the received Prefix Information
-                                        *  option, regardless of whether the
-                                        *  valid lifetime is also reset or
-                                        *  ignored."
-                                        *
-                                        *  So if the preferred lifetime in
-                                        *  this advertisement is different
-                                        *  than what we have stored, but the
-                                        *  valid lifetime is invalid, just
-                                        *  reset prefered_lft.
-                                        *
-                                        *  We must set the valid lifetime
-                                        *  to the stored lifetime since we'll
-                                        *  be updating the timestamp below,
-                                        *  else we'll set it back to the
-                                        *  minimum.
-                                        */
-                                       if (prefered_lft != ifp->prefered_lft) {
-                                               valid_lft = stored_lft;
-                                               update_lft = 1;
-                                       }
-                               } else {
-                                       valid_lft = MIN_VALID_LIFETIME;
-                                       if (valid_lft < prefered_lft)
-                                               prefered_lft = valid_lft;
-                                       update_lft = 1;
-                               }
+                               const u32 minimum_lft = min(
+                                       stored_lft, (u32)MIN_VALID_LIFETIME);
+                               valid_lft = max(valid_lft, minimum_lft);
+
+                               /* RFC4862 Section 5.5.3e:
+                                * "Note that the preferred lifetime of the
+                                *  corresponding address is always reset to
+                                *  the Preferred Lifetime in the received
+                                *  Prefix Information option, regardless of
+                                *  whether the valid lifetime is also reset or
+                                *  ignored."
+                                *
+                                * So we should always update prefered_lft here.
+                                */
+                               update_lft = 1;
                        }
 
                        if (update_lft) {
index 6b26e9f..7bb5446 100644 (file)
@@ -618,7 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
        struct ip6_tnl *tunnel = netdev_priv(dev);
        struct net_device *tdev;    /* Device to other host */
        struct ipv6hdr  *ipv6h;     /* Our new IP header */
-       unsigned int max_headroom /* The extra header space needed */
+       unsigned int max_headroom = 0; /* The extra header space needed */
        int    gre_hlen;
        struct ipv6_tel_txoption opt;
        int    mtu;
@@ -693,7 +693,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
 
        skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
 
-       max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
+       max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
 
        if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
            (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
index 3a692d5..a54c45c 100644 (file)
@@ -1015,6 +1015,8 @@ static inline int ip6_ufo_append_data(struct sock *sk,
         * udp datagram
         */
        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
+               struct frag_hdr fhdr;
+
                skb = sock_alloc_send_skb(sk,
                        hh_len + fragheaderlen + transhdrlen + 20,
                        (flags & MSG_DONTWAIT), &err);
@@ -1036,12 +1038,6 @@ static inline int ip6_ufo_append_data(struct sock *sk,
                skb->protocol = htons(ETH_P_IPV6);
                skb->ip_summed = CHECKSUM_PARTIAL;
                skb->csum = 0;
-       }
-
-       err = skb_append_datato_frags(sk,skb, getfrag, from,
-                                     (length - transhdrlen));
-       if (!err) {
-               struct frag_hdr fhdr;
 
                /* Specify the length of each IPv6 datagram fragment.
                 * It has to be a multiple of 8.
@@ -1052,15 +1048,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
                ipv6_select_ident(&fhdr, rt);
                skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
                __skb_queue_tail(&sk->sk_write_queue, skb);
-
-               return 0;
        }
-       /* There is not enough support do UPD LSO,
-        * so follow normal path
-        */
-       kfree_skb(skb);
 
-       return err;
+       return skb_append_datato_frags(sk, skb, getfrag, from,
+                                      (length - transhdrlen));
 }
 
 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
@@ -1227,27 +1218,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
         * --yoshfuji
         */
 
-       cork->length += length;
-       if (length > mtu) {
-               int proto = sk->sk_protocol;
-               if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
-                       ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
-                       return -EMSGSIZE;
-               }
-
-               if (proto == IPPROTO_UDP &&
-                   (rt->dst.dev->features & NETIF_F_UFO)) {
+       if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
+                                          sk->sk_protocol == IPPROTO_RAW)) {
+               ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
+               return -EMSGSIZE;
+       }
 
-                       err = ip6_ufo_append_data(sk, getfrag, from, length,
-                                                 hh_len, fragheaderlen,
-                                                 transhdrlen, mtu, flags, rt);
-                       if (err)
-                               goto error;
-                       return 0;
-               }
+       skb = skb_peek_tail(&sk->sk_write_queue);
+       cork->length += length;
+       if (((length > mtu) ||
+            (skb && skb_is_gso(skb))) &&
+           (sk->sk_protocol == IPPROTO_UDP) &&
+           (rt->dst.dev->features & NETIF_F_UFO)) {
+               err = ip6_ufo_append_data(sk, getfrag, from, length,
+                                         hh_len, fragheaderlen,
+                                         transhdrlen, mtu, flags, rt);
+               if (err)
+                       goto error;
+               return 0;
        }
 
-       if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+       if (!skb)
                goto alloc_new_skb;
 
        while (length > 0) {
index 2d8f482..a791552 100644 (file)
@@ -1731,8 +1731,6 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
                }
        }
 
-       t = rtnl_dereference(ip6n->tnls_wc[0]);
-       unregister_netdevice_queue(t->dev, &list);
        unregister_netdevice_many(&list);
 }
 
@@ -1752,6 +1750,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
        if (!ip6n->fb_tnl_dev)
                goto err_alloc_dev;
        dev_net_set(ip6n->fb_tnl_dev, net);
+       ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
        /* FB netdevice is special: we have one, and only one per netns.
         * Allowing to move it to another netns is clearly unsafe.
         */
index 096cd67..d18f9f9 100644 (file)
@@ -2034,7 +2034,7 @@ static void mld_dad_timer_expire(unsigned long data)
                if (idev->mc_dad_count)
                        mld_dad_start_timer(idev, idev->mc_maxdelay);
        }
-       __in6_dev_put(idev);
+       in6_dev_put(idev);
 }
 
 static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
@@ -2379,7 +2379,7 @@ static void mld_gq_timer_expire(unsigned long data)
 
        idev->mc_gq_running = 0;
        mld_send_report(idev, NULL);
-       __in6_dev_put(idev);
+       in6_dev_put(idev);
 }
 
 static void mld_ifc_timer_expire(unsigned long data)
@@ -2392,7 +2392,7 @@ static void mld_ifc_timer_expire(unsigned long data)
                if (idev->mc_ifc_count)
                        mld_ifc_start_timer(idev, idev->mc_maxdelay);
        }
-       __in6_dev_put(idev);
+       in6_dev_put(idev);
 }
 
 static void mld_ifc_event(struct inet6_dev *idev)
index 19cfea8..2748b04 100644 (file)
@@ -282,7 +282,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
        if (th == NULL)
                return NF_DROP;
 
-       synproxy_parse_options(skb, par->thoff, th, &opts);
+       if (!synproxy_parse_options(skb, par->thoff, th, &opts))
+               return NF_DROP;
 
        if (th->syn && !(th->ack || th->fin || th->rst)) {
                /* Initial SYN from client */
@@ -372,7 +373,8 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
 
                /* fall through */
        case TCP_CONNTRACK_SYN_SENT:
-               synproxy_parse_options(skb, thoff, th, &opts);
+               if (!synproxy_parse_options(skb, thoff, th, &opts))
+                       return NF_DROP;
 
                if (!th->syn && th->ack &&
                    CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
@@ -395,7 +397,9 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
                if (!th->syn || !th->ack)
                        break;
 
-               synproxy_parse_options(skb, thoff, th, &opts);
+               if (!synproxy_parse_options(skb, thoff, th, &opts))
+                       return NF_DROP;
+
                if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
                        synproxy->tsoff = opts.tsval - synproxy->its;
 
index 58916bb..a4ed241 100644 (file)
@@ -335,8 +335,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
                ip6_sk_update_pmtu(skb, sk, info);
                harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
        }
-       if (type == NDISC_REDIRECT)
+       if (type == NDISC_REDIRECT) {
                ip6_sk_redirect(skb, sk);
+               return;
+       }
        if (np->recverr) {
                u8 *payload = skb->data;
                if (!inet->hdrincl)
index 7ee5cb9..1926945 100644 (file)
@@ -566,6 +566,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
        return false;
 }
 
+/* Checks if an address matches an address on the tunnel interface.
+ * Used to detect the NAT of proto 41 packets and let them pass spoofing test.
+ * Long story:
+ * This function is called after we considered the packet as spoofed
+ * in is_spoofed_6rd.
+ * We may have a router that is doing NAT for proto 41 packets
+ * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb
+ * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd
+ * function will return true, dropping the packet.
+ * But, we can still check if is spoofed against the IP
+ * addresses associated with the interface.
+ */
+static bool only_dnatted(const struct ip_tunnel *tunnel,
+       const struct in6_addr *v6dst)
+{
+       int prefix_len;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+       prefix_len = tunnel->ip6rd.prefixlen + 32
+               - tunnel->ip6rd.relay_prefixlen;
+#else
+       prefix_len = 48;
+#endif
+       return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev);
+}
+
+/* Returns true if a packet is spoofed */
+static bool packet_is_spoofed(struct sk_buff *skb,
+                             const struct iphdr *iph,
+                             struct ip_tunnel *tunnel)
+{
+       const struct ipv6hdr *ipv6h;
+
+       if (tunnel->dev->priv_flags & IFF_ISATAP) {
+               if (!isatap_chksrc(skb, iph, tunnel))
+                       return true;
+
+               return false;
+       }
+
+       if (tunnel->dev->flags & IFF_POINTOPOINT)
+               return false;
+
+       ipv6h = ipv6_hdr(skb);
+
+       if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) {
+               net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+                                    &iph->saddr, &ipv6h->saddr,
+                                    &iph->daddr, &ipv6h->daddr);
+               return true;
+       }
+
+       if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr)))
+               return false;
+
+       if (only_dnatted(tunnel, &ipv6h->daddr))
+               return false;
+
+       net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+                            &iph->saddr, &ipv6h->saddr,
+                            &iph->daddr, &ipv6h->daddr);
+       return true;
+}
+
 static int ipip6_rcv(struct sk_buff *skb)
 {
        const struct iphdr *iph = ip_hdr(skb);
@@ -586,19 +650,9 @@ static int ipip6_rcv(struct sk_buff *skb)
                IPCB(skb)->flags = 0;
                skb->protocol = htons(ETH_P_IPV6);
 
-               if (tunnel->dev->priv_flags & IFF_ISATAP) {
-                       if (!isatap_chksrc(skb, iph, tunnel)) {
-                               tunnel->dev->stats.rx_errors++;
-                               goto out;
-                       }
-               } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) {
-                       if (is_spoofed_6rd(tunnel, iph->saddr,
-                                          &ipv6_hdr(skb)->saddr) ||
-                           is_spoofed_6rd(tunnel, iph->daddr,
-                                          &ipv6_hdr(skb)->daddr)) {
-                               tunnel->dev->stats.rx_errors++;
-                               goto out;
-                       }
+               if (packet_is_spoofed(skb, iph, tunnel)) {
+                       tunnel->dev->stats.rx_errors++;
+                       goto out;
                }
 
                __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
@@ -748,7 +802,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
                        neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
 
                if (neigh == NULL) {
-                       net_dbg_ratelimited("sit: nexthop == NULL\n");
+                       net_dbg_ratelimited("nexthop == NULL\n");
                        goto tx_error;
                }
 
@@ -777,7 +831,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
                        neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
 
                if (neigh == NULL) {
-                       net_dbg_ratelimited("sit: nexthop == NULL\n");
+                       net_dbg_ratelimited("nexthop == NULL\n");
                        goto tx_error;
                }
 
@@ -1612,6 +1666,7 @@ static int __net_init sit_init_net(struct net *net)
                goto err_alloc_dev;
        }
        dev_net_set(sitn->fb_tunnel_dev, net);
+       sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops;
        /* FB netdevice is special: we have one, and only one per netns.
         * Allowing to move it to another netns is clearly unsafe.
         */
@@ -1646,7 +1701,6 @@ static void __net_exit sit_exit_net(struct net *net)
 
        rtnl_lock();
        sit_destroy_tunnels(sitn, &list);
-       unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
        unregister_netdevice_many(&list);
        rtnl_unlock();
 }
index f405815..72b7eaa 100644 (file)
@@ -525,8 +525,10 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
        if (type == ICMPV6_PKT_TOOBIG)
                ip6_sk_update_pmtu(skb, sk, info);
-       if (type == NDISC_REDIRECT)
+       if (type == NDISC_REDIRECT) {
                ip6_sk_redirect(skb, sk);
+               goto out;
+       }
 
        np = inet6_sk(sk);
 
index 54563ad..355cc3b 100644 (file)
@@ -154,6 +154,7 @@ static void lapb_t1timer_expiry(unsigned long param)
                        } else {
                                lapb->n2count++;
                                lapb_requeue_frames(lapb);
+                               lapb_kick(lapb);
                        }
                        break;
 
index 4f69e83..74fd00c 100644 (file)
@@ -116,6 +116,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 
        if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
                struct ip_vs_cpu_stats *s;
+               struct ip_vs_service *svc;
 
                s = this_cpu_ptr(dest->stats.cpustats);
                s->ustats.inpkts++;
@@ -123,11 +124,14 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
                s->ustats.inbytes += skb->len;
                u64_stats_update_end(&s->syncp);
 
-               s = this_cpu_ptr(dest->svc->stats.cpustats);
+               rcu_read_lock();
+               svc = rcu_dereference(dest->svc);
+               s = this_cpu_ptr(svc->stats.cpustats);
                s->ustats.inpkts++;
                u64_stats_update_begin(&s->syncp);
                s->ustats.inbytes += skb->len;
                u64_stats_update_end(&s->syncp);
+               rcu_read_unlock();
 
                s = this_cpu_ptr(ipvs->tot_stats.cpustats);
                s->ustats.inpkts++;
@@ -146,6 +150,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 
        if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
                struct ip_vs_cpu_stats *s;
+               struct ip_vs_service *svc;
 
                s = this_cpu_ptr(dest->stats.cpustats);
                s->ustats.outpkts++;
@@ -153,11 +158,14 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
                s->ustats.outbytes += skb->len;
                u64_stats_update_end(&s->syncp);
 
-               s = this_cpu_ptr(dest->svc->stats.cpustats);
+               rcu_read_lock();
+               svc = rcu_dereference(dest->svc);
+               s = this_cpu_ptr(svc->stats.cpustats);
                s->ustats.outpkts++;
                u64_stats_update_begin(&s->syncp);
                s->ustats.outbytes += skb->len;
                u64_stats_update_end(&s->syncp);
+               rcu_read_unlock();
 
                s = this_cpu_ptr(ipvs->tot_stats.cpustats);
                s->ustats.outpkts++;
index c8148e4..a3df9bd 100644 (file)
@@ -460,7 +460,7 @@ static inline void
 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 {
        atomic_inc(&svc->refcnt);
-       dest->svc = svc;
+       rcu_assign_pointer(dest->svc, svc);
 }
 
 static void ip_vs_service_free(struct ip_vs_service *svc)
@@ -470,18 +470,25 @@ static void ip_vs_service_free(struct ip_vs_service *svc)
        kfree(svc);
 }
 
-static void
-__ip_vs_unbind_svc(struct ip_vs_dest *dest)
+static void ip_vs_service_rcu_free(struct rcu_head *head)
 {
-       struct ip_vs_service *svc = dest->svc;
+       struct ip_vs_service *svc;
+
+       svc = container_of(head, struct ip_vs_service, rcu_head);
+       ip_vs_service_free(svc);
+}
 
-       dest->svc = NULL;
+static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay)
+{
        if (atomic_dec_and_test(&svc->refcnt)) {
                IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
                              svc->fwmark,
                              IP_VS_DBG_ADDR(svc->af, &svc->addr),
                              ntohs(svc->port));
-               ip_vs_service_free(svc);
+               if (do_delay)
+                       call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
+               else
+                       ip_vs_service_free(svc);
        }
 }
 
@@ -667,11 +674,6 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
                              IP_VS_DBG_ADDR(svc->af, &dest->addr),
                              ntohs(dest->port),
                              atomic_read(&dest->refcnt));
-               /* We can not reuse dest while in grace period
-                * because conns still can use dest->svc
-                */
-               if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
-                       continue;
                if (dest->af == svc->af &&
                    ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
                    dest->port == dport &&
@@ -697,8 +699,10 @@ out:
 
 static void ip_vs_dest_free(struct ip_vs_dest *dest)
 {
+       struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
+
        __ip_vs_dst_cache_reset(dest);
-       __ip_vs_unbind_svc(dest);
+       __ip_vs_svc_put(svc, false);
        free_percpu(dest->stats.cpustats);
        kfree(dest);
 }
@@ -771,6 +775,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
                    struct ip_vs_dest_user_kern *udest, int add)
 {
        struct netns_ipvs *ipvs = net_ipvs(svc->net);
+       struct ip_vs_service *old_svc;
        struct ip_vs_scheduler *sched;
        int conn_flags;
 
@@ -792,13 +797,14 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
        atomic_set(&dest->conn_flags, conn_flags);
 
        /* bind the service */
-       if (!dest->svc) {
+       old_svc = rcu_dereference_protected(dest->svc, 1);
+       if (!old_svc) {
                __ip_vs_bind_svc(dest, svc);
        } else {
-               if (dest->svc != svc) {
-                       __ip_vs_unbind_svc(dest);
+               if (old_svc != svc) {
                        ip_vs_zero_stats(&dest->stats);
                        __ip_vs_bind_svc(dest, svc);
+                       __ip_vs_svc_put(old_svc, true);
                }
        }
 
@@ -998,16 +1004,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
        return 0;
 }
 
-static void ip_vs_dest_wait_readers(struct rcu_head *head)
-{
-       struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest,
-                                              rcu_head);
-
-       /* End of grace period after unlinking */
-       clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
-}
-
-
 /*
  *     Delete a destination (must be already unlinked from the service)
  */
@@ -1023,20 +1019,16 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
         */
        ip_vs_rs_unhash(dest);
 
-       if (!cleanup) {
-               set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
-               call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers);
-       }
-
        spin_lock_bh(&ipvs->dest_trash_lock);
        IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
                      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
                      atomic_read(&dest->refcnt));
        if (list_empty(&ipvs->dest_trash) && !cleanup)
                mod_timer(&ipvs->dest_trash_timer,
-                         jiffies + IP_VS_DEST_TRASH_PERIOD);
+                         jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
        /* dest lives in trash without reference */
        list_add(&dest->t_list, &ipvs->dest_trash);
+       dest->idle_start = 0;
        spin_unlock_bh(&ipvs->dest_trash_lock);
        ip_vs_dest_put(dest);
 }
@@ -1108,24 +1100,30 @@ static void ip_vs_dest_trash_expire(unsigned long data)
        struct net *net = (struct net *) data;
        struct netns_ipvs *ipvs = net_ipvs(net);
        struct ip_vs_dest *dest, *next;
+       unsigned long now = jiffies;
 
        spin_lock(&ipvs->dest_trash_lock);
        list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
-               /* Skip if dest is in grace period */
-               if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
-                       continue;
                if (atomic_read(&dest->refcnt) > 0)
                        continue;
+               if (dest->idle_start) {
+                       if (time_before(now, dest->idle_start +
+                                            IP_VS_DEST_TRASH_PERIOD))
+                               continue;
+               } else {
+                       dest->idle_start = max(1UL, now);
+                       continue;
+               }
                IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
                              dest->vfwmark,
-                             IP_VS_DBG_ADDR(dest->svc->af, &dest->addr),
+                             IP_VS_DBG_ADDR(dest->af, &dest->addr),
                              ntohs(dest->port));
                list_del(&dest->t_list);
                ip_vs_dest_free(dest);
        }
        if (!list_empty(&ipvs->dest_trash))
                mod_timer(&ipvs->dest_trash_timer,
-                         jiffies + IP_VS_DEST_TRASH_PERIOD);
+                         jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
        spin_unlock(&ipvs->dest_trash_lock);
 }
 
@@ -1320,14 +1318,6 @@ out:
        return ret;
 }
 
-static void ip_vs_service_rcu_free(struct rcu_head *head)
-{
-       struct ip_vs_service *svc;
-
-       svc = container_of(head, struct ip_vs_service, rcu_head);
-       ip_vs_service_free(svc);
-}
-
 /*
  *     Delete a service from the service list
  *     - The service must be unlinked, unlocked and not referenced!
@@ -1376,13 +1366,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
        /*
         *    Free the service if nobody refers to it
         */
-       if (atomic_dec_and_test(&svc->refcnt)) {
-               IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
-                             svc->fwmark,
-                             IP_VS_DBG_ADDR(svc->af, &svc->addr),
-                             ntohs(svc->port));
-               call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
-       }
+       __ip_vs_svc_put(svc, true);
 
        /* decrease the module use count */
        ip_vs_use_count_dec();
index 6bee6d0..1425e9a 100644 (file)
@@ -59,12 +59,13 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
                                 struct ip_vs_cpu_stats __percpu *stats)
 {
        int i;
+       bool add = false;
 
        for_each_possible_cpu(i) {
                struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
                unsigned int start;
                __u64 inbytes, outbytes;
-               if (i) {
+               if (add) {
                        sum->conns += s->ustats.conns;
                        sum->inpkts += s->ustats.inpkts;
                        sum->outpkts += s->ustats.outpkts;
@@ -76,6 +77,7 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
                        sum->inbytes += inbytes;
                        sum->outbytes += outbytes;
                } else {
+                       add = true;
                        sum->conns = s->ustats.conns;
                        sum->inpkts = s->ustats.inpkts;
                        sum->outpkts = s->ustats.outpkts;
index 1383b0e..eff13c9 100644 (file)
@@ -93,7 +93,7 @@ struct ip_vs_lblc_entry {
        struct hlist_node       list;
        int                     af;             /* address family */
        union nf_inet_addr      addr;           /* destination IP address */
-       struct ip_vs_dest __rcu *dest;          /* real server (cache) */
+       struct ip_vs_dest       *dest;          /* real server (cache) */
        unsigned long           lastuse;        /* last used time */
        struct rcu_head         rcu_head;
 };
@@ -130,20 +130,21 @@ static struct ctl_table vs_vars_table[] = {
 };
 #endif
 
-static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
+static void ip_vs_lblc_rcu_free(struct rcu_head *head)
 {
-       struct ip_vs_dest *dest;
+       struct ip_vs_lblc_entry *en = container_of(head,
+                                                  struct ip_vs_lblc_entry,
+                                                  rcu_head);
 
-       hlist_del_rcu(&en->list);
-       /*
-        * We don't kfree dest because it is referred either by its service
-        * or the trash dest list.
-        */
-       dest = rcu_dereference_protected(en->dest, 1);
-       ip_vs_dest_put(dest);
-       kfree_rcu(en, rcu_head);
+       ip_vs_dest_put(en->dest);
+       kfree(en);
 }
 
+static inline void ip_vs_lblc_del(struct ip_vs_lblc_entry *en)
+{
+       hlist_del_rcu(&en->list);
+       call_rcu(&en->rcu_head, ip_vs_lblc_rcu_free);
+}
 
 /*
  *     Returns hash value for IPVS LBLC entry
@@ -203,30 +204,23 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
        struct ip_vs_lblc_entry *en;
 
        en = ip_vs_lblc_get(dest->af, tbl, daddr);
-       if (!en) {
-               en = kmalloc(sizeof(*en), GFP_ATOMIC);
-               if (!en)
-                       return NULL;
-
-               en->af = dest->af;
-               ip_vs_addr_copy(dest->af, &en->addr, daddr);
-               en->lastuse = jiffies;
+       if (en) {
+               if (en->dest == dest)
+                       return en;
+               ip_vs_lblc_del(en);
+       }
+       en = kmalloc(sizeof(*en), GFP_ATOMIC);
+       if (!en)
+               return NULL;
 
-               ip_vs_dest_hold(dest);
-               RCU_INIT_POINTER(en->dest, dest);
+       en->af = dest->af;
+       ip_vs_addr_copy(dest->af, &en->addr, daddr);
+       en->lastuse = jiffies;
 
-               ip_vs_lblc_hash(tbl, en);
-       } else {
-               struct ip_vs_dest *old_dest;
+       ip_vs_dest_hold(dest);
+       en->dest = dest;
 
-               old_dest = rcu_dereference_protected(en->dest, 1);
-               if (old_dest != dest) {
-                       ip_vs_dest_put(old_dest);
-                       ip_vs_dest_hold(dest);
-                       /* No ordering constraints for refcnt */
-                       RCU_INIT_POINTER(en->dest, dest);
-               }
-       }
+       ip_vs_lblc_hash(tbl, en);
 
        return en;
 }
@@ -246,7 +240,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
        tbl->dead = 1;
        for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
                hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
-                       ip_vs_lblc_free(en);
+                       ip_vs_lblc_del(en);
                        atomic_dec(&tbl->entries);
                }
        }
@@ -281,7 +275,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
                                        sysctl_lblc_expiration(svc)))
                                continue;
 
-                       ip_vs_lblc_free(en);
+                       ip_vs_lblc_del(en);
                        atomic_dec(&tbl->entries);
                }
                spin_unlock(&svc->sched_lock);
@@ -335,7 +329,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
                        if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
                                continue;
 
-                       ip_vs_lblc_free(en);
+                       ip_vs_lblc_del(en);
                        atomic_dec(&tbl->entries);
                        goal--;
                }
@@ -443,8 +437,8 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
                        continue;
 
                doh = ip_vs_dest_conn_overhead(dest);
-               if (loh * atomic_read(&dest->weight) >
-                   doh * atomic_read(&least->weight)) {
+               if ((__s64)loh * atomic_read(&dest->weight) >
+                   (__s64)doh * atomic_read(&least->weight)) {
                        least = dest;
                        loh = doh;
                }
@@ -511,7 +505,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
                 * free up entries from the trash at any time.
                 */
 
-               dest = rcu_dereference(en->dest);
+               dest = en->dest;
                if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
                    atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
                        goto out;
@@ -631,7 +625,7 @@ static void __exit ip_vs_lblc_cleanup(void)
 {
        unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
        unregister_pernet_subsys(&ip_vs_lblc_ops);
-       synchronize_rcu();
+       rcu_barrier();
 }
 
 
index 5199448..0b85500 100644 (file)
@@ -89,7 +89,7 @@
  */
 struct ip_vs_dest_set_elem {
        struct list_head        list;          /* list link */
-       struct ip_vs_dest __rcu *dest;         /* destination server */
+       struct ip_vs_dest       *dest;          /* destination server */
        struct rcu_head         rcu_head;
 };
 
@@ -107,11 +107,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
 
        if (check) {
                list_for_each_entry(e, &set->list, list) {
-                       struct ip_vs_dest *d;
-
-                       d = rcu_dereference_protected(e->dest, 1);
-                       if (d == dest)
-                               /* already existed */
+                       if (e->dest == dest)
                                return;
                }
        }
@@ -121,7 +117,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
                return;
 
        ip_vs_dest_hold(dest);
-       RCU_INIT_POINTER(e->dest, dest);
+       e->dest = dest;
 
        list_add_rcu(&e->list, &set->list);
        atomic_inc(&set->size);
@@ -129,22 +125,27 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
        set->lastmod = jiffies;
 }
 
+static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head)
+{
+       struct ip_vs_dest_set_elem *e;
+
+       e = container_of(head, struct ip_vs_dest_set_elem, rcu_head);
+       ip_vs_dest_put(e->dest);
+       kfree(e);
+}
+
 static void
 ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 {
        struct ip_vs_dest_set_elem *e;
 
        list_for_each_entry(e, &set->list, list) {
-               struct ip_vs_dest *d;
-
-               d = rcu_dereference_protected(e->dest, 1);
-               if (d == dest) {
+               if (e->dest == dest) {
                        /* HIT */
                        atomic_dec(&set->size);
                        set->lastmod = jiffies;
-                       ip_vs_dest_put(dest);
                        list_del_rcu(&e->list);
-                       kfree_rcu(e, rcu_head);
+                       call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
                        break;
                }
        }
@@ -155,16 +156,8 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
        struct ip_vs_dest_set_elem *e, *ep;
 
        list_for_each_entry_safe(e, ep, &set->list, list) {
-               struct ip_vs_dest *d;
-
-               d = rcu_dereference_protected(e->dest, 1);
-               /*
-                * We don't kfree dest because it is referred either
-                * by its service or by the trash dest list.
-                */
-               ip_vs_dest_put(d);
                list_del_rcu(&e->list);
-               kfree_rcu(e, rcu_head);
+               call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
        }
 }
 
@@ -175,12 +168,9 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
        struct ip_vs_dest *dest, *least;
        int loh, doh;
 
-       if (set == NULL)
-               return NULL;
-
        /* select the first destination server, whose weight > 0 */
        list_for_each_entry_rcu(e, &set->list, list) {
-               least = rcu_dereference(e->dest);
+               least = e->dest;
                if (least->flags & IP_VS_DEST_F_OVERLOAD)
                        continue;
 
@@ -195,13 +185,13 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
        /* find the destination with the weighted least load */
   nextstage:
        list_for_each_entry_continue_rcu(e, &set->list, list) {
-               dest = rcu_dereference(e->dest);
+               dest = e->dest;
                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
                        continue;
 
                doh = ip_vs_dest_conn_overhead(dest);
-               if ((loh * atomic_read(&dest->weight) >
-                    doh * atomic_read(&least->weight))
+               if (((__s64)loh * atomic_read(&dest->weight) >
+                    (__s64)doh * atomic_read(&least->weight))
                    && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
                        least = dest;
                        loh = doh;
@@ -232,7 +222,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 
        /* select the first destination server, whose weight > 0 */
        list_for_each_entry(e, &set->list, list) {
-               most = rcu_dereference_protected(e->dest, 1);
+               most = e->dest;
                if (atomic_read(&most->weight) > 0) {
                        moh = ip_vs_dest_conn_overhead(most);
                        goto nextstage;
@@ -243,11 +233,11 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
        /* find the destination with the weighted most load */
   nextstage:
        list_for_each_entry_continue(e, &set->list, list) {
-               dest = rcu_dereference_protected(e->dest, 1);
+               dest = e->dest;
                doh = ip_vs_dest_conn_overhead(dest);
                /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
-               if ((moh * atomic_read(&dest->weight) <
-                    doh * atomic_read(&most->weight))
+               if (((__s64)moh * atomic_read(&dest->weight) <
+                    (__s64)doh * atomic_read(&most->weight))
                    && (atomic_read(&dest->weight) > 0)) {
                        most = dest;
                        moh = doh;
@@ -611,8 +601,8 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
                        continue;
 
                doh = ip_vs_dest_conn_overhead(dest);
-               if (loh * atomic_read(&dest->weight) >
-                   doh * atomic_read(&least->weight)) {
+               if ((__s64)loh * atomic_read(&dest->weight) >
+                   (__s64)doh * atomic_read(&least->weight)) {
                        least = dest;
                        loh = doh;
                }
@@ -819,7 +809,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
 {
        unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
        unregister_pernet_subsys(&ip_vs_lblcr_ops);
-       synchronize_rcu();
+       rcu_barrier();
 }
 
 
index d8d9860..961a6de 100644 (file)
@@ -40,7 +40,7 @@
 #include <net/ip_vs.h>
 
 
-static inline unsigned int
+static inline int
 ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
 {
        /*
@@ -59,7 +59,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
                  struct ip_vs_iphdr *iph)
 {
        struct ip_vs_dest *dest, *least = NULL;
-       unsigned int loh = 0, doh;
+       int loh = 0, doh;
 
        IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
@@ -92,8 +92,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
                }
 
                if (!least ||
-                   (loh * atomic_read(&dest->weight) >
-                    doh * atomic_read(&least->weight))) {
+                   ((__s64)loh * atomic_read(&dest->weight) >
+                    (__s64)doh * atomic_read(&least->weight))) {
                        least = dest;
                        loh = doh;
                }
index a5284cc..e446b9f 100644 (file)
@@ -44,7 +44,7 @@
 #include <net/ip_vs.h>
 
 
-static inline unsigned int
+static inline int
 ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
 {
        /*
@@ -63,7 +63,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
                   struct ip_vs_iphdr *iph)
 {
        struct ip_vs_dest *dest, *least;
-       unsigned int loh, doh;
+       int loh, doh;
 
        IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
@@ -99,8 +99,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
                        continue;
                doh = ip_vs_sed_dest_overhead(dest);
-               if (loh * atomic_read(&dest->weight) >
-                   doh * atomic_read(&least->weight)) {
+               if ((__s64)loh * atomic_read(&dest->weight) >
+                   (__s64)doh * atomic_read(&least->weight)) {
                        least = dest;
                        loh = doh;
                }
index 6dc1fa1..b5b4650 100644 (file)
@@ -35,7 +35,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
                   struct ip_vs_iphdr *iph)
 {
        struct ip_vs_dest *dest, *least;
-       unsigned int loh, doh;
+       int loh, doh;
 
        IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
 
@@ -71,8 +71,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
                        continue;
                doh = ip_vs_dest_conn_overhead(dest);
-               if (loh * atomic_read(&dest->weight) >
-                   doh * atomic_read(&least->weight)) {
+               if ((__s64)loh * atomic_read(&dest->weight) >
+                   (__s64)doh * atomic_read(&least->weight)) {
                        least = dest;
                        loh = doh;
                }
index 6fd967c..cdf4567 100644 (file)
@@ -24,7 +24,7 @@
 int synproxy_net_id;
 EXPORT_SYMBOL_GPL(synproxy_net_id);
 
-void
+bool
 synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
                       const struct tcphdr *th, struct synproxy_options *opts)
 {
@@ -32,7 +32,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
        u8 buf[40], *ptr;
 
        ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
-       BUG_ON(ptr == NULL);
+       if (ptr == NULL)
+               return false;
 
        opts->options = 0;
        while (length > 0) {
@@ -41,16 +42,16 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
 
                switch (opcode) {
                case TCPOPT_EOL:
-                       return;
+                       return true;
                case TCPOPT_NOP:
                        length--;
                        continue;
                default:
                        opsize = *ptr++;
                        if (opsize < 2)
-                               return;
+                               return true;
                        if (opsize > length)
-                               return;
+                               return true;
 
                        switch (opcode) {
                        case TCPOPT_MSS:
@@ -84,6 +85,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
                        length -= opsize;
                }
        }
+       return true;
 }
 EXPORT_SYMBOL_GPL(synproxy_parse_options);
 
index 32ad015..a2fef8b 100644 (file)
@@ -285,7 +285,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
 
 
 /* remove one skb from head of flow queue */
-static struct sk_buff *fq_dequeue_head(struct fq_flow *flow)
+static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
 {
        struct sk_buff *skb = flow->head;
 
@@ -293,6 +293,8 @@ static struct sk_buff *fq_dequeue_head(struct fq_flow *flow)
                flow->head = skb->next;
                skb->next = NULL;
                flow->qlen--;
+               sch->qstats.backlog -= qdisc_pkt_len(skb);
+               sch->q.qlen--;
        }
        return skb;
 }
@@ -418,8 +420,9 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
        struct fq_flow_head *head;
        struct sk_buff *skb;
        struct fq_flow *f;
+       u32 rate;
 
-       skb = fq_dequeue_head(&q->internal);
+       skb = fq_dequeue_head(sch, &q->internal);
        if (skb)
                goto out;
        fq_check_throttled(q, now);
@@ -449,7 +452,7 @@ begin:
                goto begin;
        }
 
-       skb = fq_dequeue_head(f);
+       skb = fq_dequeue_head(sch, f);
        if (!skb) {
                head->first = f->next;
                /* force a pass through old_flows to prevent starvation */
@@ -466,43 +469,74 @@ begin:
        f->time_next_packet = now;
        f->credit -= qdisc_pkt_len(skb);
 
-       if (f->credit <= 0 &&
-           q->rate_enable &&
-           skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) {
-               u32 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate;
+       if (f->credit > 0 || !q->rate_enable)
+               goto out;
 
-               rate = min(rate, q->flow_max_rate);
-               if (rate) {
-                       u64 len = (u64)qdisc_pkt_len(skb) * NSEC_PER_SEC;
-
-                       do_div(len, rate);
-                       /* Since socket rate can change later,
-                        * clamp the delay to 125 ms.
-                        * TODO: maybe segment the too big skb, as in commit
-                        * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
-                        */
-                       if (unlikely(len > 125 * NSEC_PER_MSEC)) {
-                               len = 125 * NSEC_PER_MSEC;
-                               q->stat_pkts_too_long++;
-                       }
+       if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) {
+               rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate;
 
-                       f->time_next_packet = now + len;
+               rate = min(rate, q->flow_max_rate);
+       } else {
+               rate = q->flow_max_rate;
+               if (rate == ~0U)
+                       goto out;
+       }
+       if (rate) {
+               u32 plen = max(qdisc_pkt_len(skb), q->quantum);
+               u64 len = (u64)plen * NSEC_PER_SEC;
+
+               do_div(len, rate);
+               /* Since socket rate can change later,
+                * clamp the delay to 125 ms.
+                * TODO: maybe segment the too big skb, as in commit
+                * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
+                */
+               if (unlikely(len > 125 * NSEC_PER_MSEC)) {
+                       len = 125 * NSEC_PER_MSEC;
+                       q->stat_pkts_too_long++;
                }
+
+               f->time_next_packet = now + len;
        }
 out:
-       sch->qstats.backlog -= qdisc_pkt_len(skb);
        qdisc_bstats_update(sch, skb);
-       sch->q.qlen--;
        qdisc_unthrottled(sch);
        return skb;
 }
 
 static void fq_reset(struct Qdisc *sch)
 {
+       struct fq_sched_data *q = qdisc_priv(sch);
+       struct rb_root *root;
        struct sk_buff *skb;
+       struct rb_node *p;
+       struct fq_flow *f;
+       unsigned int idx;
 
-       while ((skb = fq_dequeue(sch)) != NULL)
+       while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL)
                kfree_skb(skb);
+
+       if (!q->fq_root)
+               return;
+
+       for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
+               root = &q->fq_root[idx];
+               while ((p = rb_first(root)) != NULL) {
+                       f = container_of(p, struct fq_flow, fq_node);
+                       rb_erase(p, root);
+
+                       while ((skb = fq_dequeue_head(sch, f)) != NULL)
+                               kfree_skb(skb);
+
+                       kmem_cache_free(fq_flow_cachep, f);
+               }
+       }
+       q->new_flows.first      = NULL;
+       q->old_flows.first      = NULL;
+       q->delayed              = RB_ROOT;
+       q->flows                = 0;
+       q->inactive_flows       = 0;
+       q->throttled_flows      = 0;
 }
 
 static void fq_rehash(struct fq_sched_data *q,
@@ -645,6 +679,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
        while (sch->q.qlen > sch->limit) {
                struct sk_buff *skb = fq_dequeue(sch);
 
+               if (!skb)
+                       break;
                kfree_skb(skb);
                drop_count++;
        }
@@ -657,21 +693,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
 static void fq_destroy(struct Qdisc *sch)
 {
        struct fq_sched_data *q = qdisc_priv(sch);
-       struct rb_root *root;
-       struct rb_node *p;
-       unsigned int idx;
 
-       if (q->fq_root) {
-               for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
-                       root = &q->fq_root[idx];
-                       while ((p = rb_first(root)) != NULL) {
-                               rb_erase(p, root);
-                               kmem_cache_free(fq_flow_cachep,
-                                               container_of(p, struct fq_flow, fq_node));
-                       }
-               }
-               kfree(q->fq_root);
-       }
+       fq_reset(sch);
+       kfree(q->fq_root);
        qdisc_watchdog_cancel(&q->watchdog);
 }
 
index 9bc6db0..e7000be 100644 (file)
@@ -47,12 +47,12 @@ static int net_ctl_permissions(struct ctl_table_header *head,
 
        /* Allow network administrator to have same access as root. */
        if (ns_capable(net->user_ns, CAP_NET_ADMIN) ||
-           uid_eq(root_uid, current_uid())) {
+           uid_eq(root_uid, current_euid())) {
                int mode = (table->mode >> 6) & 7;
                return (mode << 6) | (mode << 3) | mode;
        }
        /* Allow netns root group to have the same access as the root group */
-       if (gid_eq(root_gid, current_gid())) {
+       if (in_egroup_p(root_gid)) {
                int mode = (table->mode >> 3) & 7;
                return (mode << 3) | mode;
        }
index 47016c3..66cad50 100755 (executable)
@@ -3975,8 +3975,8 @@ sub string_find_replace {
 # check for new externs in .h files.
                if ($realfile =~ /\.h$/ &&
                    $line =~ /^\+\s*(extern\s+)$Type\s*$Ident\s*\(/s) {
-                       if (WARN("AVOID_EXTERNS",
-                                "extern prototypes should be avoided in .h files\n" . $herecurr) &&
+                       if (CHK("AVOID_EXTERNS",
+                               "extern prototypes should be avoided in .h files\n" . $herecurr) &&
                            $fix) {
                                $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
                        }
index d6222ba..532471d 100644 (file)
  * it should be.
  */
 
-#include <linux/crypto.h>
+#include <crypto/hash.h>
 
 #include "include/apparmor.h"
 #include "include/crypto.h"
 
 static unsigned int apparmor_hash_size;
 
-static struct crypto_hash *apparmor_tfm;
+static struct crypto_shash *apparmor_tfm;
 
 unsigned int aa_hash_size(void)
 {
@@ -32,35 +32,33 @@ unsigned int aa_hash_size(void)
 int aa_calc_profile_hash(struct aa_profile *profile, u32 version, void *start,
                         size_t len)
 {
-       struct scatterlist sg[2];
-       struct hash_desc desc = {
-               .tfm = apparmor_tfm,
-               .flags = 0
-       };
+       struct {
+               struct shash_desc shash;
+               char ctx[crypto_shash_descsize(apparmor_tfm)];
+       } desc;
        int error = -ENOMEM;
        u32 le32_version = cpu_to_le32(version);
 
        if (!apparmor_tfm)
                return 0;
 
-       sg_init_table(sg, 2);
-       sg_set_buf(&sg[0], &le32_version, 4);
-       sg_set_buf(&sg[1], (u8 *) start, len);
-
        profile->hash = kzalloc(apparmor_hash_size, GFP_KERNEL);
        if (!profile->hash)
                goto fail;
 
-       error = crypto_hash_init(&desc);
+       desc.shash.tfm = apparmor_tfm;
+       desc.shash.flags = 0;
+
+       error = crypto_shash_init(&desc.shash);
        if (error)
                goto fail;
-       error = crypto_hash_update(&desc, &sg[0], 4);
+       error = crypto_shash_update(&desc.shash, (u8 *) &le32_version, 4);
        if (error)
                goto fail;
-       error = crypto_hash_update(&desc, &sg[1], len);
+       error = crypto_shash_update(&desc.shash, (u8 *) start, len);
        if (error)
                goto fail;
-       error = crypto_hash_final(&desc, profile->hash);
+       error = crypto_shash_final(&desc.shash, profile->hash);
        if (error)
                goto fail;
 
@@ -75,19 +73,19 @@ fail:
 
 static int __init init_profile_hash(void)
 {
-       struct crypto_hash *tfm;
+       struct crypto_shash *tfm;
 
        if (!apparmor_initialized)
                return 0;
 
-       tfm = crypto_alloc_hash("sha1", 0, CRYPTO_ALG_ASYNC);
+       tfm = crypto_alloc_shash("sha1", 0, CRYPTO_ALG_ASYNC);
        if (IS_ERR(tfm)) {
                int error = PTR_ERR(tfm);
                AA_ERROR("failed to setup profile sha1 hashing: %d\n", error);
                return error;
        }
        apparmor_tfm = tfm;
-       apparmor_hash_size = crypto_hash_digestsize(apparmor_tfm);
+       apparmor_hash_size = crypto_shash_digestsize(apparmor_tfm);
 
        aa_info_message("AppArmor sha1 policy hashing enabled");
 
index f2d4b63..c28b0f2 100644 (file)
@@ -360,7 +360,9 @@ static inline void aa_put_replacedby(struct aa_replacedby *p)
 static inline void __aa_update_replacedby(struct aa_profile *orig,
                                          struct aa_profile *new)
 {
-       struct aa_profile *tmp = rcu_dereference(orig->replacedby->profile);
+       struct aa_profile *tmp;
+       tmp = rcu_dereference_protected(orig->replacedby->profile,
+                                       mutex_is_locked(&orig->ns->lock));
        rcu_assign_pointer(orig->replacedby->profile, aa_get_profile(new));
        orig->flags |= PFLAG_INVALID;
        aa_put_profile(tmp);
index 6172509..345bec0 100644 (file)
@@ -563,7 +563,8 @@ void __init aa_free_root_ns(void)
 static void free_replacedby(struct aa_replacedby *r)
 {
        if (r) {
-               aa_put_profile(rcu_dereference(r->profile));
+               /* r->profile will not be updated any more as r is dead */
+               aa_put_profile(rcu_dereference_protected(r->profile, true));
                kzfree(r);
        }
 }
index dad36a6..fc3e662 100644 (file)
@@ -746,7 +746,6 @@ inline int avc_has_perm_noaudit(u32 ssid, u32 tsid,
  * @tclass: target security class
  * @requested: requested permissions, interpreted based on @tclass
  * @auditdata: auxiliary audit data
- * @flags: VFS walk flags
  *
  * Check the AVC to determine whether the @requested permissions are granted
  * for the SID pair (@ssid, @tsid), interpreting the permissions
@@ -756,17 +755,15 @@ inline int avc_has_perm_noaudit(u32 ssid, u32 tsid,
  * permissions are granted, -%EACCES if any permissions are denied, or
  * another -errno upon other errors.
  */
-int avc_has_perm_flags(u32 ssid, u32 tsid, u16 tclass,
-                      u32 requested, struct common_audit_data *auditdata,
-                      unsigned flags)
+int avc_has_perm(u32 ssid, u32 tsid, u16 tclass,
+                u32 requested, struct common_audit_data *auditdata)
 {
        struct av_decision avd;
        int rc, rc2;
 
        rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd);
 
-       rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata,
-                       flags);
+       rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata);
        if (rc2)
                return rc2;
        return rc;
index a5091ec..5b52310 100644 (file)
@@ -1502,7 +1502,7 @@ static int cred_has_capability(const struct cred *cred,
 
        rc = avc_has_perm_noaudit(sid, sid, sclass, av, 0, &avd);
        if (audit == SECURITY_CAP_AUDIT) {
-               int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad, 0);
+               int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad);
                if (rc2)
                        return rc2;
        }
@@ -1525,8 +1525,7 @@ static int task_has_system(struct task_struct *tsk,
 static int inode_has_perm(const struct cred *cred,
                          struct inode *inode,
                          u32 perms,
-                         struct common_audit_data *adp,
-                         unsigned flags)
+                         struct common_audit_data *adp)
 {
        struct inode_security_struct *isec;
        u32 sid;
@@ -1539,7 +1538,7 @@ static int inode_has_perm(const struct cred *cred,
        sid = cred_sid(cred);
        isec = inode->i_security;
 
-       return avc_has_perm_flags(sid, isec->sid, isec->sclass, perms, adp, flags);
+       return avc_has_perm(sid, isec->sid, isec->sclass, perms, adp);
 }
 
 /* Same as inode_has_perm, but pass explicit audit data containing
@@ -1554,7 +1553,7 @@ static inline int dentry_has_perm(const struct cred *cred,
 
        ad.type = LSM_AUDIT_DATA_DENTRY;
        ad.u.dentry = dentry;
-       return inode_has_perm(cred, inode, av, &ad, 0);
+       return inode_has_perm(cred, inode, av, &ad);
 }
 
 /* Same as inode_has_perm, but pass explicit audit data containing
@@ -1569,7 +1568,7 @@ static inline int path_has_perm(const struct cred *cred,
 
        ad.type = LSM_AUDIT_DATA_PATH;
        ad.u.path = *path;
-       return inode_has_perm(cred, inode, av, &ad, 0);
+       return inode_has_perm(cred, inode, av, &ad);
 }
 
 /* Same as path_has_perm, but uses the inode from the file struct. */
@@ -1581,7 +1580,7 @@ static inline int file_path_has_perm(const struct cred *cred,
 
        ad.type = LSM_AUDIT_DATA_PATH;
        ad.u.path = file->f_path;
-       return inode_has_perm(cred, file_inode(file), av, &ad, 0);
+       return inode_has_perm(cred, file_inode(file), av, &ad);
 }
 
 /* Check whether a task can use an open file descriptor to
@@ -1617,7 +1616,7 @@ static int file_has_perm(const struct cred *cred,
        /* av is zero if only checking access to the descriptor. */
        rc = 0;
        if (av)
-               rc = inode_has_perm(cred, inode, av, &ad, 0);
+               rc = inode_has_perm(cred, inode, av, &ad);
 
 out:
        return rc;
index 92d0ab5..f53ee3c 100644 (file)
@@ -130,7 +130,7 @@ static inline int avc_audit(u32 ssid, u32 tsid,
                            u16 tclass, u32 requested,
                            struct av_decision *avd,
                            int result,
-                           struct common_audit_data *a, unsigned flags)
+                           struct common_audit_data *a)
 {
        u32 audited, denied;
        audited = avc_audit_required(requested, avd, result, 0, &denied);
@@ -138,7 +138,7 @@ static inline int avc_audit(u32 ssid, u32 tsid,
                return 0;
        return slow_avc_audit(ssid, tsid, tclass,
                              requested, audited, denied,
-                             a, flags);
+                             a, 0);
 }
 
 #define AVC_STRICT 1 /* Ignore permissive mode. */
@@ -147,17 +147,9 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
                         unsigned flags,
                         struct av_decision *avd);
 
-int avc_has_perm_flags(u32 ssid, u32 tsid,
-                      u16 tclass, u32 requested,
-                      struct common_audit_data *auditdata,
-                      unsigned);
-
-static inline int avc_has_perm(u32 ssid, u32 tsid,
-                              u16 tclass, u32 requested,
-                              struct common_audit_data *auditdata)
-{
-       return avc_has_perm_flags(ssid, tsid, tclass, requested, auditdata, 0);
-}
+int avc_has_perm(u32 ssid, u32 tsid,
+                u16 tclass, u32 requested,
+                struct common_audit_data *auditdata);
 
 u32 avc_policy_seqno(void);
 
index 9896954..bea523a 100644 (file)
@@ -139,6 +139,18 @@ static int snd_compr_open(struct inode *inode, struct file *f)
 static int snd_compr_free(struct inode *inode, struct file *f)
 {
        struct snd_compr_file *data = f->private_data;
+       struct snd_compr_runtime *runtime = data->stream.runtime;
+
+       switch (runtime->state) {
+       case SNDRV_PCM_STATE_RUNNING:
+       case SNDRV_PCM_STATE_DRAINING:
+       case SNDRV_PCM_STATE_PAUSED:
+               data->stream.ops->trigger(&data->stream, SNDRV_PCM_TRIGGER_STOP);
+               break;
+       default:
+               break;
+       }
+
        data->stream.ops->free(&data->stream);
        kfree(data->stream.runtime->buffer);
        kfree(data->stream.runtime);
@@ -837,7 +849,8 @@ static int snd_compress_dev_disconnect(struct snd_device *device)
        struct snd_compr *compr;
 
        compr = device->device_data;
-       snd_unregister_device(compr->direction, compr->card, compr->device);
+       snd_unregister_device(SNDRV_DEVICE_TYPE_COMPRESS, compr->card,
+               compr->device);
        return 0;
 }
 
index 445ca48..bf578ba 100644 (file)
@@ -175,6 +175,7 @@ static const struct ac97_codec_id snd_ac97_codec_ids[] = {
 { 0x54524106, 0xffffffff, "TR28026",           NULL,           NULL },
 { 0x54524108, 0xffffffff, "TR28028",           patch_tritech_tr28028,  NULL }, // added by xin jin [07/09/99]
 { 0x54524123, 0xffffffff, "TR28602",           NULL,           NULL }, // only guess --jk [TR28023 = eMicro EM28023 (new CT1297)]
+{ 0x54584e03, 0xffffffff, "TLV320AIC27",       NULL,           NULL },
 { 0x54584e20, 0xffffffff, "TLC320AD9xC",       NULL,           NULL },
 { 0x56494161, 0xffffffff, "VIA1612A",          NULL,           NULL }, // modified ICE1232 with S/PDIF
 { 0x56494170, 0xffffffff, "VIA1617A",          patch_vt1617a,  NULL }, // modified VT1616 with S/PDIF
index b524f89..18d9725 100644 (file)
@@ -111,6 +111,9 @@ enum {
 /* 0x0009 - 0x0014 -> 12 test regs */
 /* 0x0015 - visibility reg */
 
+/* Cirrus Logic CS4208 */
+#define CS4208_VENDOR_NID      0x24
+
 /*
  * Cirrus Logic CS4210
  *
@@ -223,6 +226,16 @@ static const struct hda_verb cs_coef_init_verbs[] = {
        {} /* terminator */
 };
 
+static const struct hda_verb cs4208_coef_init_verbs[] = {
+       {0x01, AC_VERB_SET_POWER_STATE, 0x00}, /* AFG: D0 */
+       {0x24, AC_VERB_SET_PROC_STATE, 0x01},  /* VPW: processing on */
+       {0x24, AC_VERB_SET_COEF_INDEX, 0x0033},
+       {0x24, AC_VERB_SET_PROC_COEF, 0x0001}, /* A1 ICS */
+       {0x24, AC_VERB_SET_COEF_INDEX, 0x0034},
+       {0x24, AC_VERB_SET_PROC_COEF, 0x1C01}, /* A1 Enable, A Thresh = 300mV */
+       {} /* terminator */
+};
+
 /* Errata: CS4207 rev C0/C1/C2 Silicon
  *
  * http://www.cirrus.com/en/pubs/errata/ER880C3.pdf
@@ -295,6 +308,8 @@ static int cs_init(struct hda_codec *codec)
                /* init_verb sequence for C0/C1/C2 errata*/
                snd_hda_sequence_write(codec, cs_errata_init_verbs);
                snd_hda_sequence_write(codec, cs_coef_init_verbs);
+       } else if (spec->vendor_nid == CS4208_VENDOR_NID) {
+               snd_hda_sequence_write(codec, cs4208_coef_init_verbs);
        }
 
        snd_hda_gen_init(codec);
@@ -434,6 +449,29 @@ static const struct hda_pintbl mba42_pincfgs[] = {
        {} /* terminator */
 };
 
+static const struct hda_pintbl mba6_pincfgs[] = {
+       { 0x10, 0x032120f0 }, /* HP */
+       { 0x11, 0x500000f0 },
+       { 0x12, 0x90100010 }, /* Speaker */
+       { 0x13, 0x500000f0 },
+       { 0x14, 0x500000f0 },
+       { 0x15, 0x770000f0 },
+       { 0x16, 0x770000f0 },
+       { 0x17, 0x430000f0 },
+       { 0x18, 0x43ab9030 }, /* Mic */
+       { 0x19, 0x770000f0 },
+       { 0x1a, 0x770000f0 },
+       { 0x1b, 0x770000f0 },
+       { 0x1c, 0x90a00090 },
+       { 0x1d, 0x500000f0 },
+       { 0x1e, 0x500000f0 },
+       { 0x1f, 0x500000f0 },
+       { 0x20, 0x500000f0 },
+       { 0x21, 0x430000f0 },
+       { 0x22, 0x430000f0 },
+       {} /* terminator */
+};
+
 static void cs420x_fixup_gpio_13(struct hda_codec *codec,
                                 const struct hda_fixup *fix, int action)
 {
@@ -556,22 +594,23 @@ static int patch_cs420x(struct hda_codec *codec)
 
 /*
  * CS4208 support:
- * Its layout is no longer compatible with CS4206/CS4207, and the generic
- * parser seems working fairly well, except for trivial fixups.
+ * Its layout is no longer compatible with CS4206/CS4207
  */
 enum {
+       CS4208_MBA6,
        CS4208_GPIO0,
 };
 
 static const struct hda_model_fixup cs4208_models[] = {
        { .id = CS4208_GPIO0, .name = "gpio0" },
+       { .id = CS4208_MBA6, .name = "mba6" },
        {}
 };
 
 static const struct snd_pci_quirk cs4208_fixup_tbl[] = {
        /* codec SSID */
-       SND_PCI_QUIRK(0x106b, 0x7100, "MacBookPro 6,1", CS4208_GPIO0),
-       SND_PCI_QUIRK(0x106b, 0x7200, "MacBookPro 6,2", CS4208_GPIO0),
+       SND_PCI_QUIRK(0x106b, 0x7100, "MacBookAir 6,1", CS4208_MBA6),
+       SND_PCI_QUIRK(0x106b, 0x7200, "MacBookAir 6,2", CS4208_MBA6),
        {} /* terminator */
 };
 
@@ -588,18 +627,35 @@ static void cs4208_fixup_gpio0(struct hda_codec *codec,
 }
 
 static const struct hda_fixup cs4208_fixups[] = {
+       [CS4208_MBA6] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = mba6_pincfgs,
+               .chained = true,
+               .chain_id = CS4208_GPIO0,
+       },
        [CS4208_GPIO0] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = cs4208_fixup_gpio0,
        },
 };
 
+/* correct the 0dB offset of input pins */
+static void cs4208_fix_amp_caps(struct hda_codec *codec, hda_nid_t adc)
+{
+       unsigned int caps;
+
+       caps = query_amp_caps(codec, adc, HDA_INPUT);
+       caps &= ~(AC_AMPCAP_OFFSET);
+       caps |= 0x02;
+       snd_hda_override_amp_caps(codec, adc, HDA_INPUT, caps);
+}
+
 static int patch_cs4208(struct hda_codec *codec)
 {
        struct cs_spec *spec;
        int err;
 
-       spec = cs_alloc_spec(codec, 0); /* no specific w/a */
+       spec = cs_alloc_spec(codec, CS4208_VENDOR_NID);
        if (!spec)
                return -ENOMEM;
 
@@ -609,6 +665,12 @@ static int patch_cs4208(struct hda_codec *codec)
                           cs4208_fixups);
        snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_PRE_PROBE);
 
+       snd_hda_override_wcaps(codec, 0x18,
+                              get_wcaps(codec, 0x18) | AC_WCAP_STEREO);
+       cs4208_fix_amp_caps(codec, 0x18);
+       cs4208_fix_amp_caps(codec, 0x1b);
+       cs4208_fix_amp_caps(codec, 0x1c);
+
        err = cs_parse_auto_config(codec);
        if (err < 0)
                goto error;
index 4edd2d0..ec68eae 100644 (file)
@@ -3231,6 +3231,7 @@ enum {
        CXT_FIXUP_INC_MIC_BOOST,
        CXT_FIXUP_HEADPHONE_MIC_PIN,
        CXT_FIXUP_HEADPHONE_MIC,
+       CXT_FIXUP_GPIO1,
 };
 
 static void cxt_fixup_stereo_dmic(struct hda_codec *codec,
@@ -3375,6 +3376,15 @@ static const struct hda_fixup cxt_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = cxt_fixup_headphone_mic,
        },
+       [CXT_FIXUP_GPIO1] = {
+               .type = HDA_FIXUP_VERBS,
+               .v.verbs = (const struct hda_verb[]) {
+                       { 0x01, AC_VERB_SET_GPIO_MASK, 0x01 },
+                       { 0x01, AC_VERB_SET_GPIO_DIRECTION, 0x01 },
+                       { 0x01, AC_VERB_SET_GPIO_DATA, 0x01 },
+                       { }
+               },
+       },
 };
 
 static const struct snd_pci_quirk cxt5051_fixups[] = {
@@ -3384,6 +3394,7 @@ static const struct snd_pci_quirk cxt5051_fixups[] = {
 
 static const struct snd_pci_quirk cxt5066_fixups[] = {
        SND_PCI_QUIRK(0x1025, 0x0543, "Acer Aspire One 522", CXT_FIXUP_STEREO_DMIC),
+       SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_GPIO1),
        SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
        SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410),
        SND_PCI_QUIRK(0x17aa, 0x215e, "Lenovo T410", CXT_PINCFG_LENOVO_TP410),
index 3d8cd04..50173d4 100644 (file)
@@ -937,6 +937,14 @@ static void hdmi_setup_audio_infoframe(struct hda_codec *codec,
        }
 
        /*
+        * always configure channel mapping, it may have been changed by the
+        * user in the meantime
+        */
+       hdmi_setup_channel_mapping(codec, pin_nid, non_pcm, ca,
+                                  channels, per_pin->chmap,
+                                  per_pin->chmap_set);
+
+       /*
         * sizeof(ai) is used instead of sizeof(*hdmi_ai) or
         * sizeof(*dp_ai) to avoid partial match/update problems when
         * the user switches between HDMI/DP monitors.
@@ -947,20 +955,10 @@ static void hdmi_setup_audio_infoframe(struct hda_codec *codec,
                            "pin=%d channels=%d\n",
                            pin_nid,
                            channels);
-               hdmi_setup_channel_mapping(codec, pin_nid, non_pcm, ca,
-                                          channels, per_pin->chmap,
-                                          per_pin->chmap_set);
                hdmi_stop_infoframe_trans(codec, pin_nid);
                hdmi_fill_audio_infoframe(codec, pin_nid,
                                            ai.bytes, sizeof(ai));
                hdmi_start_infoframe_trans(codec, pin_nid);
-       } else {
-               /* For non-pcm audio switch, setup new channel mapping
-                * accordingly */
-               if (per_pin->non_pcm != non_pcm)
-                       hdmi_setup_channel_mapping(codec, pin_nid, non_pcm, ca,
-                                                  channels, per_pin->chmap,
-                                                  per_pin->chmap_set);
        }
 
        per_pin->non_pcm = non_pcm;
@@ -1149,32 +1147,43 @@ static int hdmi_choose_cvt(struct hda_codec *codec,
 }
 
 static void haswell_config_cvts(struct hda_codec *codec,
-                       int pin_id, int mux_id)
+                       hda_nid_t pin_nid, int mux_idx)
 {
        struct hdmi_spec *spec = codec->spec;
-       struct hdmi_spec_per_pin *per_pin;
-       int pin_idx, mux_idx;
-       int curr;
-       int err;
+       hda_nid_t nid, end_nid;
+       int cvt_idx, curr;
+       struct hdmi_spec_per_cvt *per_cvt;
 
-       for (pin_idx = 0; pin_idx < spec->num_pins; pin_idx++) {
-               per_pin = get_pin(spec, pin_idx);
+       /* configure all pins, including "no physical connection" ones */
+       end_nid = codec->start_nid + codec->num_nodes;
+       for (nid = codec->start_nid; nid < end_nid; nid++) {
+               unsigned int wid_caps = get_wcaps(codec, nid);
+               unsigned int wid_type = get_wcaps_type(wid_caps);
+
+               if (wid_type != AC_WID_PIN)
+                       continue;
 
-               if (pin_idx == pin_id)
+               if (nid == pin_nid)
                        continue;
 
-               curr = snd_hda_codec_read(codec, per_pin->pin_nid, 0,
+               curr = snd_hda_codec_read(codec, nid, 0,
                                          AC_VERB_GET_CONNECT_SEL, 0);
+               if (curr != mux_idx)
+                       continue;
 
-               /* Choose another unused converter */
-               if (curr == mux_id) {
-                       err = hdmi_choose_cvt(codec, pin_idx, NULL, &mux_idx);
-                       if (err < 0)
-                               return;
-                       snd_printdd("HDMI: choose converter %d for pin %d\n", mux_idx, pin_idx);
-                       snd_hda_codec_write_cache(codec, per_pin->pin_nid, 0,
+               /* choose an unassigned converter. The conveters in the
+                * connection list are in the same order as in the codec.
+                */
+               for (cvt_idx = 0; cvt_idx < spec->num_cvts; cvt_idx++) {
+                       per_cvt = get_cvt(spec, cvt_idx);
+                       if (!per_cvt->assigned) {
+                               snd_printdd("choose cvt %d for pin nid %d\n",
+                                       cvt_idx, nid);
+                               snd_hda_codec_write_cache(codec, nid, 0,
                                            AC_VERB_SET_CONNECT_SEL,
-                                           mux_idx);
+                                           cvt_idx);
+                               break;
+                       }
                }
        }
 }
@@ -1216,7 +1225,7 @@ static int hdmi_pcm_open(struct hda_pcm_stream *hinfo,
 
        /* configure unused pins to choose other converters */
        if (is_haswell(codec))
-               haswell_config_cvts(codec, pin_idx, mux_idx);
+               haswell_config_cvts(codec, per_pin->pin_nid, mux_idx);
 
        snd_hda_spdif_ctls_assign(codec, pin_idx, per_cvt->cvt_nid);
 
index bc07d36..bf313be 100644 (file)
@@ -2819,6 +2819,15 @@ static void alc269_fixup_hweq(struct hda_codec *codec,
        alc_write_coef_idx(codec, 0x1e, coef | 0x80);
 }
 
+static void alc269_fixup_headset_mic(struct hda_codec *codec,
+                                      const struct hda_fixup *fix, int action)
+{
+       struct alc_spec *spec = codec->spec;
+
+       if (action == HDA_FIXUP_ACT_PRE_PROBE)
+               spec->parse_flags |= HDA_PINCFG_HEADSET_MIC;
+}
+
 static void alc271_fixup_dmic(struct hda_codec *codec,
                              const struct hda_fixup *fix, int action)
 {
@@ -3439,6 +3448,9 @@ static void alc283_fixup_chromebook(struct hda_codec *codec,
                /* Set to manual mode */
                val = alc_read_coef_idx(codec, 0x06);
                alc_write_coef_idx(codec, 0x06, val & ~0x000c);
+               /* Enable Line1 input control by verb */
+               val = alc_read_coef_idx(codec, 0x1a);
+               alc_write_coef_idx(codec, 0x1a, val | (1 << 4));
                break;
        }
 }
@@ -3493,6 +3505,15 @@ static void alc282_fixup_asus_tx300(struct hda_codec *codec,
        }
 }
 
+static void alc290_fixup_mono_speakers(struct hda_codec *codec,
+                                      const struct hda_fixup *fix, int action)
+{
+       if (action == HDA_FIXUP_ACT_PRE_PROBE)
+               /* Remove DAC node 0x03, as it seems to be
+                  giving mono output */
+               snd_hda_override_wcaps(codec, 0x03, 0);
+}
+
 enum {
        ALC269_FIXUP_SONY_VAIO,
        ALC275_FIXUP_SONY_VAIO_GPIO2,
@@ -3504,6 +3525,7 @@ enum {
        ALC271_FIXUP_DMIC,
        ALC269_FIXUP_PCM_44K,
        ALC269_FIXUP_STEREO_DMIC,
+       ALC269_FIXUP_HEADSET_MIC,
        ALC269_FIXUP_QUANTA_MUTE,
        ALC269_FIXUP_LIFEBOOK,
        ALC269_FIXUP_AMIC,
@@ -3516,9 +3538,11 @@ enum {
        ALC269_FIXUP_HP_GPIO_LED,
        ALC269_FIXUP_INV_DMIC,
        ALC269_FIXUP_LENOVO_DOCK,
+       ALC286_FIXUP_SONY_MIC_NO_PRESENCE,
        ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT,
        ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
        ALC269_FIXUP_DELL2_MIC_NO_PRESENCE,
+       ALC269_FIXUP_DELL3_MIC_NO_PRESENCE,
        ALC269_FIXUP_HEADSET_MODE,
        ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC,
        ALC269_FIXUP_ASUS_X101_FUNC,
@@ -3531,6 +3555,8 @@ enum {
        ALC269VB_FIXUP_ORDISSIMO_EVE2,
        ALC283_FIXUP_CHROME_BOOK,
        ALC282_FIXUP_ASUS_TX300,
+       ALC283_FIXUP_INT_MIC,
+       ALC290_FIXUP_MONO_SPEAKERS,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -3599,6 +3625,10 @@ static const struct hda_fixup alc269_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc269_fixup_stereo_dmic,
        },
+       [ALC269_FIXUP_HEADSET_MIC] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc269_fixup_headset_mic,
+       },
        [ALC269_FIXUP_QUANTA_MUTE] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc269_fixup_quanta_mute,
@@ -3708,6 +3738,15 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
        },
+       [ALC269_FIXUP_DELL3_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x1a, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
+       },
        [ALC269_FIXUP_HEADSET_MODE] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc_fixup_headset_mode,
@@ -3716,6 +3755,15 @@ static const struct hda_fixup alc269_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc_fixup_headset_mode_no_hp_mic,
        },
+       [ALC286_FIXUP_SONY_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MIC
+       },
        [ALC269_FIXUP_ASUS_X101_FUNC] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc269_fixup_x101_headset_mic,
@@ -3790,6 +3838,22 @@ static const struct hda_fixup alc269_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc282_fixup_asus_tx300,
        },
+       [ALC283_FIXUP_INT_MIC] = {
+               .type = HDA_FIXUP_VERBS,
+               .v.verbs = (const struct hda_verb[]) {
+                       {0x20, AC_VERB_SET_COEF_INDEX, 0x1a},
+                       {0x20, AC_VERB_SET_PROC_COEF, 0x0011},
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_LIMIT_INT_MIC_BOOST
+       },
+       [ALC290_FIXUP_MONO_SPEAKERS] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc290_fixup_mono_speakers,
+               .chained = true,
+               .chain_id = ALC269_FIXUP_DELL3_MIC_NO_PRESENCE,
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -3831,6 +3895,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x0608, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x0609, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x0613, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_MONO_SPEAKERS),
        SND_PCI_QUIRK(0x1028, 0x15cc, "Dell X5 Precision", ALC269_FIXUP_DELL2_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x15cd, "Dell X5 Precision", ALC269_FIXUP_DELL2_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -3853,6 +3918,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1043, 0x8398, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC),
        SND_PCI_QUIRK(0x1043, 0x83ce, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC),
        SND_PCI_QUIRK(0x1043, 0x8516, "ASUS X101CH", ALC269_FIXUP_ASUS_X101),
+       SND_PCI_QUIRK(0x104d, 0x90b6, "Sony VAIO Pro 13", ALC286_FIXUP_SONY_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x104d, 0x9073, "Sony VAIO", ALC275_FIXUP_SONY_VAIO_GPIO2),
        SND_PCI_QUIRK(0x104d, 0x907b, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ),
        SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ),
@@ -3874,7 +3940,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x2214, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x17aa, 0x2215, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x17aa, 0x5013, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
-       SND_PCI_QUIRK(0x17aa, 0x501a, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+       SND_PCI_QUIRK(0x17aa, 0x501a, "Thinkpad", ALC283_FIXUP_INT_MIC),
        SND_PCI_QUIRK(0x17aa, 0x5026, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),
@@ -3938,6 +4004,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
        {.id = ALC269_FIXUP_STEREO_DMIC, .name = "alc269-dmic"},
        {.id = ALC271_FIXUP_DMIC, .name = "alc271-dmic"},
        {.id = ALC269_FIXUP_INV_DMIC, .name = "inv-dmic"},
+       {.id = ALC269_FIXUP_HEADSET_MIC, .name = "headset-mic"},
        {.id = ALC269_FIXUP_LENOVO_DOCK, .name = "lenovo-dock"},
        {.id = ALC269_FIXUP_HP_GPIO_LED, .name = "hp-gpio-led"},
        {.id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "dell-headset-multi"},
@@ -4555,6 +4622,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x05d8, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
+       SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_ASUS_MODE4),
        SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT),
        SND_PCI_QUIRK(0x105b, 0x0cd6, "Foxconn", ALC662_FIXUP_ASUS_MODE2),
        SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD),
index c02405c..5810a06 100644 (file)
@@ -88,6 +88,7 @@ static int bfin_i2s_hw_params(struct snd_pcm_substream *substream,
        case SNDRV_PCM_FORMAT_S8:
                param.spctl |= 0x70;
                sport->wdsize = 1;
+               break;
        case SNDRV_PCM_FORMAT_S16_LE:
                param.spctl |= 0xf0;
                sport->wdsize = 2;
index 8af0434..259d1ac 100644 (file)
@@ -349,6 +349,9 @@ static int snd_soc_put_volsw_2r_st(struct snd_kcontrol *kcontrol,
        val = ucontrol->value.integer.value[0];
        val2 = ucontrol->value.integer.value[1];
 
+       if (val >= ARRAY_SIZE(st_table) || val2 >= ARRAY_SIZE(st_table))
+               return -EINVAL;
+
        err = snd_soc_update_bits(codec, reg, 0x3f, st_table[val].m);
        if (err < 0)
                return err;
index b8ba0ad..80555d7 100644 (file)
@@ -1225,13 +1225,18 @@ static int anc_status_control_put(struct snd_kcontrol *kcontrol,
        struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(codec->dev);
        struct device *dev = codec->dev;
        bool apply_fir, apply_iir;
-       int req, status;
+       unsigned int req;
+       int status;
 
        dev_dbg(dev, "%s: Enter.\n", __func__);
 
        mutex_lock(&drvdata->anc_lock);
 
        req = ucontrol->value.integer.value[0];
+       if (req >= ARRAY_SIZE(enum_anc_state)) {
+               status = -EINVAL;
+               goto cleanup;
+       }
        if (req != ANC_APPLY_FIR_IIR && req != ANC_APPLY_FIR &&
                req != ANC_APPLY_IIR) {
                dev_err(dev, "%s: ERROR: Unsupported status to set '%s'!\n",
index 41cdd16..8dbcacd 100644 (file)
@@ -1863,7 +1863,7 @@ static int max98095_put_eq_enum(struct snd_kcontrol *kcontrol,
        struct max98095_pdata *pdata = max98095->pdata;
        int channel = max98095_get_eq_channel(kcontrol->id.name);
        struct max98095_cdata *cdata;
-       int sel = ucontrol->value.integer.value[0];
+       unsigned int sel = ucontrol->value.integer.value[0];
        struct max98095_eq_cfg *coef_set;
        int fs, best, best_val, i;
        int regmask, regsave;
@@ -2016,7 +2016,7 @@ static int max98095_put_bq_enum(struct snd_kcontrol *kcontrol,
        struct max98095_pdata *pdata = max98095->pdata;
        int channel = max98095_get_bq_channel(codec, kcontrol->id.name);
        struct max98095_cdata *cdata;
-       int sel = ucontrol->value.integer.value[0];
+       unsigned int sel = ucontrol->value.integer.value[0];
        struct max98095_biquad_cfg *coef_set;
        int fs, best, best_val, i;
        int regmask, regsave;
index 46c5b4f..ca1be1d 100644 (file)
@@ -62,7 +62,7 @@ static int imx_sgtl5000_probe(struct platform_device *pdev)
        struct device_node *ssi_np, *codec_np;
        struct platform_device *ssi_pdev;
        struct i2c_client *codec_dev;
-       struct imx_sgtl5000_data *data;
+       struct imx_sgtl5000_data *data = NULL;
        int int_port, ext_port;
        int ret;
 
@@ -128,7 +128,7 @@ static int imx_sgtl5000_probe(struct platform_device *pdev)
                goto fail;
        }
 
-       data->codec_clk = devm_clk_get(&codec_dev->dev, NULL);
+       data->codec_clk = clk_get(&codec_dev->dev, NULL);
        if (IS_ERR(data->codec_clk)) {
                ret = PTR_ERR(data->codec_clk);
                goto fail;
@@ -172,6 +172,8 @@ static int imx_sgtl5000_probe(struct platform_device *pdev)
        return 0;
 
 fail:
+       if (data && !IS_ERR(data->codec_clk))
+               clk_put(data->codec_clk);
        if (ssi_np)
                of_node_put(ssi_np);
        if (codec_np)
@@ -185,6 +187,7 @@ static int imx_sgtl5000_remove(struct platform_device *pdev)
        struct imx_sgtl5000_data *data = platform_get_drvdata(pdev);
 
        snd_soc_unregister_card(&data->card);
+       clk_put(data->codec_clk);
 
        return 0;
 }
index 4d05613..1a38be0 100644 (file)
@@ -1380,7 +1380,6 @@ static int soc_probe_link_dais(struct snd_soc_card *card, int num, int order)
                                return -ENODEV;
 
                        list_add(&cpu_dai->dapm.list, &card->dapm_list);
-                       snd_soc_dapm_new_dai_widgets(&cpu_dai->dapm, cpu_dai);
                }
 
                if (cpu_dai->driver->probe) {
index 63fb521..6234a51 100644 (file)
@@ -299,19 +299,6 @@ static void usX2Y_error_urb_status(struct usX2Ydev *usX2Y,
        usX2Y_clients_stop(usX2Y);
 }
 
-static void usX2Y_error_sequence(struct usX2Ydev *usX2Y,
-                                struct snd_usX2Y_substream *subs, struct urb *urb)
-{
-       snd_printk(KERN_ERR
-"Sequence Error!(hcd_frame=%i ep=%i%s;wait=%i,frame=%i).\n"
-"Most probably some urb of usb-frame %i is still missing.\n"
-"Cause could be too long delays in usb-hcd interrupt handling.\n",
-                  usb_get_current_frame_number(usX2Y->dev),
-                  subs->endpoint, usb_pipein(urb->pipe) ? "in" : "out",
-                  usX2Y->wait_iso_frame, urb->start_frame, usX2Y->wait_iso_frame);
-       usX2Y_clients_stop(usX2Y);
-}
-
 static void i_usX2Y_urb_complete(struct urb *urb)
 {
        struct snd_usX2Y_substream *subs = urb->context;
@@ -328,12 +315,9 @@ static void i_usX2Y_urb_complete(struct urb *urb)
                usX2Y_error_urb_status(usX2Y, subs, urb);
                return;
        }
-       if (likely((urb->start_frame & 0xFFFF) == (usX2Y->wait_iso_frame & 0xFFFF)))
-               subs->completed_urb = urb;
-       else {
-               usX2Y_error_sequence(usX2Y, subs, urb);
-               return;
-       }
+
+       subs->completed_urb = urb;
+
        {
                struct snd_usX2Y_substream *capsubs = usX2Y->subs[SNDRV_PCM_STREAM_CAPTURE],
                        *playbacksubs = usX2Y->subs[SNDRV_PCM_STREAM_PLAYBACK];
index f2a1acd..814d0e8 100644 (file)
@@ -244,13 +244,8 @@ static void i_usX2Y_usbpcm_urb_complete(struct urb *urb)
                usX2Y_error_urb_status(usX2Y, subs, urb);
                return;
        }
-       if (likely((urb->start_frame & 0xFFFF) == (usX2Y->wait_iso_frame & 0xFFFF)))
-               subs->completed_urb = urb;
-       else {
-               usX2Y_error_sequence(usX2Y, subs, urb);
-               return;
-       }
 
+       subs->completed_urb = urb;
        capsubs = usX2Y->subs[SNDRV_PCM_STREAM_CAPTURE];
        capsubs2 = usX2Y->subs[SNDRV_PCM_STREAM_CAPTURE + 2];
        playbacksubs = usX2Y->subs[SNDRV_PCM_STREAM_PLAYBACK];
index 099e7cd..7c43479 100644 (file)
@@ -5,7 +5,6 @@
 #include <stdbool.h>
 #include <sys/vfs.h>
 #include <sys/mount.h>
-#include <linux/magic.h>
 #include <linux/kernel.h>
 
 #include "debugfs.h"
index 3a0ff7f..64c043b 100644 (file)
@@ -770,6 +770,7 @@ check: $(OUTPUT)common-cmds.h
 install-bin: all
        $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
        $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'
+       $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
        $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
 ifndef NO_LIBPERL
        $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
index 9570c2b..b2519e4 100644 (file)
@@ -32,7 +32,7 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
 int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
                             struct perf_tsc_conversion *tc)
 {
-       bool cap_usr_time_zero;
+       bool cap_user_time_zero;
        u32 seq;
        int i = 0;
 
@@ -42,7 +42,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
                tc->time_mult = pc->time_mult;
                tc->time_shift = pc->time_shift;
                tc->time_zero = pc->time_zero;
-               cap_usr_time_zero = pc->cap_usr_time_zero;
+               cap_user_time_zero = pc->cap_user_time_zero;
                rmb();
                if (pc->lock == seq && !(seq & 1))
                        break;
@@ -52,7 +52,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
                }
        }
 
-       if (!cap_usr_time_zero)
+       if (!cap_user_time_zero)
                return -EOPNOTSUPP;
 
        return 0;
index 423875c..afe377b 100644 (file)
@@ -321,8 +321,6 @@ found:
        return perf_event__repipe(tool, event_sw, &sample_sw, machine);
 }
 
-extern volatile int session_done;
-
 static void sig_handler(int sig __maybe_unused)
 {
        session_done = 1;
index c2dff9c..9b5f077 100644 (file)
@@ -101,7 +101,7 @@ static int setup_cpunode_map(void)
 
        dir1 = opendir(PATH_SYS_NODE);
        if (!dir1)
-               return -1;
+               return 0;
 
        while ((dent1 = readdir(dir1)) != NULL) {
                if (dent1->d_type != DT_DIR ||
index 8e50d8d..72eae74 100644 (file)
@@ -401,8 +401,6 @@ static int perf_report__setup_sample_type(struct perf_report *rep)
        return 0;
 }
 
-extern volatile int session_done;
-
 static void sig_handler(int sig __maybe_unused)
 {
        session_done = 1;
@@ -568,6 +566,9 @@ static int __cmd_report(struct perf_report *rep)
                }
        }
 
+       if (session_done())
+               return 0;
+
        if (nr_samples == 0) {
                ui__error("The %s file has no samples!\n", session->filename);
                return 0;
index 7f31a3d..9c333ff 100644 (file)
@@ -553,8 +553,6 @@ static struct perf_tool perf_script = {
        .ordering_requires_timestamps = true,
 };
 
-extern volatile int session_done;
-
 static void sig_handler(int sig __maybe_unused)
 {
        session_done = 1;
index f686d5f..5098f14 100644 (file)
@@ -457,6 +457,7 @@ static int __run_perf_stat(int argc, const char **argv)
                        perror("failed to prepare workload");
                        return -1;
                }
+               child_pid = evsel_list->workload.pid;
        }
 
        if (group)
index f5aa637..71aa3e3 100644 (file)
 #include <sys/mman.h>
 #include <linux/futex.h>
 
+/* For older distros: */
+#ifndef MAP_STACK
+# define MAP_STACK             0x20000
+#endif
+
+#ifndef MADV_HWPOISON
+# define MADV_HWPOISON         100
+#endif
+
+#ifndef MADV_MERGEABLE
+# define MADV_MERGEABLE                12
+#endif
+
+#ifndef MADV_UNMERGEABLE
+# define MADV_UNMERGEABLE      13
+#endif
+
 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
                                         unsigned long arg,
                                         u8 arg_idx __maybe_unused,
@@ -1038,6 +1055,7 @@ static int trace__replay(struct trace *trace)
 
        trace->tool.sample        = trace__process_sample;
        trace->tool.mmap          = perf_event__process_mmap;
+       trace->tool.mmap2         = perf_event__process_mmap2;
        trace->tool.comm          = perf_event__process_comm;
        trace->tool.exit          = perf_event__process_exit;
        trace->tool.fork          = perf_event__process_fork;
index 214e17e..5f6f9b3 100644 (file)
@@ -87,7 +87,7 @@ CFLAGS += -Wall
 CFLAGS += -Wextra
 CFLAGS += -std=gnu99
 
-EXTLIBS = -lelf -lpthread -lrt -lm
+EXTLIBS = -lelf -lpthread -lrt -lm -ldl
 
 ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y)
   CFLAGS += -fstack-protector-all
@@ -180,6 +180,9 @@ FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS)
 ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y)
   CFLAGS += -DLIBELF_MMAP
 endif
+ifeq ($(call try-cc,$(SOURCE_ELF_GETPHDRNUM),$(FLAGS_LIBELF),-DHAVE_ELF_GETPHDRNUM),y)
+  CFLAGS += -DHAVE_ELF_GETPHDRNUM
+endif
 
 # include ARCH specific config
 -include $(src-perf)/arch/$(ARCH)/Makefile
index 708fb8e..f793057 100644 (file)
@@ -61,6 +61,15 @@ int main(void)
 }
 endef
 
+define SOURCE_ELF_GETPHDRNUM
+#include <libelf.h>
+int main(void)
+{
+       size_t dst;
+       return elf_getphdrnum(0, &dst);
+}
+endef
+
 ifndef NO_SLANG
 define SOURCE_SLANG
 #include <slang.h>
@@ -210,6 +219,7 @@ define SOURCE_LIBAUDIT
 
 int main(void)
 {
+       printf(\"error message: %s\", audit_errno_to_name(0));
        return audit_open();
 }
 endef
index bfc5a27..7eae548 100644 (file)
@@ -809,7 +809,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
                    end = map__rip_2objdump(map, sym->end);
 
                offset = line_ip - start;
-               if (offset < 0 || (u64)line_ip > end)
+               if ((u64)line_ip < start || (u64)line_ip > end)
                        offset = -1;
                else
                        parsed_line = tmp2 + 1;
index 3e5f543..7defd77 100644 (file)
@@ -263,6 +263,21 @@ bool die_is_signed_type(Dwarf_Die *tp_die)
 }
 
 /**
+ * die_is_func_def - Ensure that this DIE is a subprogram and definition
+ * @dw_die: a DIE
+ *
+ * Ensure that this DIE is a subprogram and NOT a declaration. This
+ * returns true if @dw_die is a function definition.
+ **/
+bool die_is_func_def(Dwarf_Die *dw_die)
+{
+       Dwarf_Attribute attr;
+
+       return (dwarf_tag(dw_die) == DW_TAG_subprogram &&
+               dwarf_attr(dw_die, DW_AT_declaration, &attr) == NULL);
+}
+
+/**
  * die_get_data_member_location - Get the data-member offset
  * @mb_die: a DIE of a member of a data structure
  * @offs: The offset of the member in the data structure
@@ -392,6 +407,10 @@ static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
 {
        struct __addr_die_search_param *ad = data;
 
+       /*
+        * Since a declaration entry doesn't has given pc, this always returns
+        * function definition entry.
+        */
        if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
            dwarf_haspc(fn_die, ad->addr)) {
                memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
@@ -407,7 +426,7 @@ static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
  * @die_mem: a buffer for result DIE
  *
  * Search a non-inlined function DIE which includes @addr. Stores the
- * DIE to @die_mem and returns it if found. Returns NULl if failed.
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
  */
 Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
                                    Dwarf_Die *die_mem)
@@ -435,15 +454,32 @@ static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data)
 }
 
 /**
+ * die_find_top_inlinefunc - Search the top inlined function at given address
+ * @sp_die: a subprogram DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search an inlined function DIE which includes @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ * Even if several inlined functions are expanded recursively, this
+ * doesn't trace it down, and returns the topmost one.
+ */
+Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+                                  Dwarf_Die *die_mem)
+{
+       return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);
+}
+
+/**
  * die_find_inlinefunc - Search an inlined function at given address
- * @cu_die: a CU DIE which including @addr
+ * @sp_die: a subprogram DIE which including @addr
  * @addr: target address
  * @die_mem: a buffer for result DIE
  *
  * Search an inlined function DIE which includes @addr. Stores the
- * DIE to @die_mem and returns it if found. Returns NULl if failed.
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
  * If several inlined functions are expanded recursively, this trace
- * it and returns deepest one.
+ * it down and returns deepest one.
  */
 Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
                               Dwarf_Die *die_mem)
index 6ce1717..b4fe90c 100644 (file)
@@ -38,6 +38,9 @@ extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
 extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
                        int (*callback)(Dwarf_Die *, void *), void *data);
 
+/* Ensure that this DIE is a subprogram and definition (not declaration) */
+extern bool die_is_func_def(Dwarf_Die *dw_die);
+
 /* Compare diename and tname */
 extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
 
@@ -76,7 +79,11 @@ extern Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
 extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
                                    Dwarf_Die *die_mem);
 
-/* Search an inlined function including given address */
+/* Search the top inlined function including given address */
+extern Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+                                         Dwarf_Die *die_mem);
+
+/* Search the deepest inlined function including given address */
 extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
                                      Dwarf_Die *die_mem);
 
index 26441d0..c3e5a3b 100644 (file)
@@ -199,9 +199,11 @@ static int write_buildid(char *name, size_t name_len, u8 *build_id,
        return write_padded(fd, name, name_len + 1, len);
 }
 
-static int __dsos__write_buildid_table(struct list_head *head, pid_t pid,
-                               u16 misc, int fd)
+static int __dsos__write_buildid_table(struct list_head *head,
+                                      struct machine *machine,
+                                      pid_t pid, u16 misc, int fd)
 {
+       char nm[PATH_MAX];
        struct dso *pos;
 
        dsos__for_each_with_build_id(pos, head) {
@@ -215,6 +217,10 @@ static int __dsos__write_buildid_table(struct list_head *head, pid_t pid,
                if (is_vdso_map(pos->short_name)) {
                        name = (char *) VDSO__MAP_NAME;
                        name_len = sizeof(VDSO__MAP_NAME) + 1;
+               } else if (dso__is_kcore(pos)) {
+                       machine__mmap_name(machine, nm, sizeof(nm));
+                       name = nm;
+                       name_len = strlen(nm) + 1;
                } else {
                        name = pos->long_name;
                        name_len = pos->long_name_len + 1;
@@ -240,10 +246,10 @@ static int machine__write_buildid_table(struct machine *machine, int fd)
                umisc = PERF_RECORD_MISC_GUEST_USER;
        }
 
-       err = __dsos__write_buildid_table(&machine->kernel_dsos, machine->pid,
-                                         kmisc, fd);
+       err = __dsos__write_buildid_table(&machine->kernel_dsos, machine,
+                                         machine->pid, kmisc, fd);
        if (err == 0)
-               err = __dsos__write_buildid_table(&machine->user_dsos,
+               err = __dsos__write_buildid_table(&machine->user_dsos, machine,
                                                  machine->pid, umisc, fd);
        return err;
 }
@@ -375,23 +381,31 @@ out_free:
        return err;
 }
 
-static int dso__cache_build_id(struct dso *dso, const char *debugdir)
+static int dso__cache_build_id(struct dso *dso, struct machine *machine,
+                              const char *debugdir)
 {
        bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
        bool is_vdso = is_vdso_map(dso->short_name);
+       char *name = dso->long_name;
+       char nm[PATH_MAX];
 
-       return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id),
-                                    dso->long_name, debugdir,
-                                    is_kallsyms, is_vdso);
+       if (dso__is_kcore(dso)) {
+               is_kallsyms = true;
+               machine__mmap_name(machine, nm, sizeof(nm));
+               name = nm;
+       }
+       return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name,
+                                    debugdir, is_kallsyms, is_vdso);
 }
 
-static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
+static int __dsos__cache_build_ids(struct list_head *head,
+                                  struct machine *machine, const char *debugdir)
 {
        struct dso *pos;
        int err = 0;
 
        dsos__for_each_with_build_id(pos, head)
-               if (dso__cache_build_id(pos, debugdir))
+               if (dso__cache_build_id(pos, machine, debugdir))
                        err = -1;
 
        return err;
@@ -399,8 +413,9 @@ static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
 
 static int machine__cache_build_ids(struct machine *machine, const char *debugdir)
 {
-       int ret = __dsos__cache_build_ids(&machine->kernel_dsos, debugdir);
-       ret |= __dsos__cache_build_ids(&machine->user_dsos, debugdir);
+       int ret = __dsos__cache_build_ids(&machine->kernel_dsos, machine,
+                                         debugdir);
+       ret |= __dsos__cache_build_ids(&machine->user_dsos, machine, debugdir);
        return ret;
 }
 
@@ -2753,6 +2768,18 @@ int perf_session__read_header(struct perf_session *session)
        if (perf_file_header__read(&f_header, header, fd) < 0)
                return -EINVAL;
 
+       /*
+        * Sanity check that perf.data was written cleanly; data size is
+        * initialized to 0 and updated only if the on_exit function is run.
+        * If data size is still 0 then the file contains only partial
+        * information.  Just warn user and process it as much as it can.
+        */
+       if (f_header.data.size == 0) {
+               pr_warning("WARNING: The %s file's data size field is 0 which is unexpected.\n"
+                          "Was the 'perf record' command properly terminated?\n",
+                          session->filename);
+       }
+
        nr_attrs = f_header.attrs.size / f_header.attr_size;
        lseek(fd, f_header.attrs.offset, SEEK_SET);
 
index 46a0d35..9ff6cf3 100644 (file)
@@ -611,6 +611,8 @@ void hists__collapse_resort(struct hists *hists)
        next = rb_first(root);
 
        while (next) {
+               if (session_done())
+                       break;
                n = rb_entry(next, struct hist_entry, rb_node_in);
                next = rb_next(&n->rb_node_in);
 
index 933d14f..6188d28 100644 (file)
@@ -792,7 +792,7 @@ static int machine__create_modules(struct machine *machine)
                modules = path;
        }
 
-       if (symbol__restricted_filename(path, "/proc/modules"))
+       if (symbol__restricted_filename(modules, "/proc/modules"))
                return -1;
 
        file = fopen(modules, "r");
index be03293..c09e0a9 100644 (file)
@@ -118,7 +118,6 @@ static const Dwfl_Callbacks offline_callbacks = {
 static int debuginfo__init_offline_dwarf(struct debuginfo *self,
                                         const char *path)
 {
-       Dwfl_Module *mod;
        int fd;
 
        fd = open(path, O_RDONLY);
@@ -129,11 +128,11 @@ static int debuginfo__init_offline_dwarf(struct debuginfo *self,
        if (!self->dwfl)
                goto error;
 
-       mod = dwfl_report_offline(self->dwfl, "", "", fd);
-       if (!mod)
+       self->mod = dwfl_report_offline(self->dwfl, "", "", fd);
+       if (!self->mod)
                goto error;
 
-       self->dbg = dwfl_module_getdwarf(mod, &self->bias);
+       self->dbg = dwfl_module_getdwarf(self->mod, &self->bias);
        if (!self->dbg)
                goto error;
 
@@ -676,37 +675,42 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
 }
 
 /* Convert subprogram DIE to trace point */
-static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
-                                 bool retprobe, struct probe_trace_point *tp)
+static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod,
+                                 Dwarf_Addr paddr, bool retprobe,
+                                 struct probe_trace_point *tp)
 {
        Dwarf_Addr eaddr, highaddr;
-       const char *name;
-
-       /* Copy the name of probe point */
-       name = dwarf_diename(sp_die);
-       if (name) {
-               if (dwarf_entrypc(sp_die, &eaddr) != 0) {
-                       pr_warning("Failed to get entry address of %s\n",
-                                  dwarf_diename(sp_die));
-                       return -ENOENT;
-               }
-               if (dwarf_highpc(sp_die, &highaddr) != 0) {
-                       pr_warning("Failed to get end address of %s\n",
-                                  dwarf_diename(sp_die));
-                       return -ENOENT;
-               }
-               if (paddr > highaddr) {
-                       pr_warning("Offset specified is greater than size of %s\n",
-                                  dwarf_diename(sp_die));
-                       return -EINVAL;
-               }
-               tp->symbol = strdup(name);
-               if (tp->symbol == NULL)
-                       return -ENOMEM;
-               tp->offset = (unsigned long)(paddr - eaddr);
-       } else
-               /* This function has no name. */
-               tp->offset = (unsigned long)paddr;
+       GElf_Sym sym;
+       const char *symbol;
+
+       /* Verify the address is correct */
+       if (dwarf_entrypc(sp_die, &eaddr) != 0) {
+               pr_warning("Failed to get entry address of %s\n",
+                          dwarf_diename(sp_die));
+               return -ENOENT;
+       }
+       if (dwarf_highpc(sp_die, &highaddr) != 0) {
+               pr_warning("Failed to get end address of %s\n",
+                          dwarf_diename(sp_die));
+               return -ENOENT;
+       }
+       if (paddr > highaddr) {
+               pr_warning("Offset specified is greater than size of %s\n",
+                          dwarf_diename(sp_die));
+               return -EINVAL;
+       }
+
+       /* Get an appropriate symbol from symtab */
+       symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL);
+       if (!symbol) {
+               pr_warning("Failed to find symbol at 0x%lx\n",
+                          (unsigned long)paddr);
+               return -ENOENT;
+       }
+       tp->offset = (unsigned long)(paddr - sym.st_value);
+       tp->symbol = strdup(symbol);
+       if (!tp->symbol)
+               return -ENOMEM;
 
        /* Return probe must be on the head of a subprogram */
        if (retprobe) {
@@ -734,7 +738,7 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
        }
 
        /* If not a real subprogram, find a real one */
-       if (dwarf_tag(sc_die) != DW_TAG_subprogram) {
+       if (!die_is_func_def(sc_die)) {
                if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
                        pr_warning("Failed to find probe point in any "
                                   "functions.\n");
@@ -980,12 +984,10 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
        struct dwarf_callback_param *param = data;
        struct probe_finder *pf = param->data;
        struct perf_probe_point *pp = &pf->pev->point;
-       Dwarf_Attribute attr;
 
        /* Check tag and diename */
-       if (dwarf_tag(sp_die) != DW_TAG_subprogram ||
-           !die_compare_name(sp_die, pp->function) ||
-           dwarf_attr(sp_die, DW_AT_declaration, &attr))
+       if (!die_is_func_def(sp_die) ||
+           !die_compare_name(sp_die, pp->function))
                return DWARF_CB_OK;
 
        /* Check declared file */
@@ -1151,7 +1153,7 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
        tev = &tf->tevs[tf->ntevs++];
 
        /* Trace point should be converted from subprogram DIE */
-       ret = convert_to_trace_point(&pf->sp_die, pf->addr,
+       ret = convert_to_trace_point(&pf->sp_die, tf->mod, pf->addr,
                                     pf->pev->point.retprobe, &tev->point);
        if (ret < 0)
                return ret;
@@ -1183,7 +1185,7 @@ int debuginfo__find_trace_events(struct debuginfo *self,
 {
        struct trace_event_finder tf = {
                        .pf = {.pev = pev, .callback = add_probe_trace_event},
-                       .max_tevs = max_tevs};
+                       .mod = self->mod, .max_tevs = max_tevs};
        int ret;
 
        /* Allocate result tevs array */
@@ -1252,7 +1254,7 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf)
        vl = &af->vls[af->nvls++];
 
        /* Trace point should be converted from subprogram DIE */
-       ret = convert_to_trace_point(&pf->sp_die, pf->addr,
+       ret = convert_to_trace_point(&pf->sp_die, af->mod, pf->addr,
                                     pf->pev->point.retprobe, &vl->point);
        if (ret < 0)
                return ret;
@@ -1291,6 +1293,7 @@ int debuginfo__find_available_vars_at(struct debuginfo *self,
 {
        struct available_var_finder af = {
                        .pf = {.pev = pev, .callback = add_available_vars},
+                       .mod = self->mod,
                        .max_vls = max_vls, .externs = externs};
        int ret;
 
@@ -1324,8 +1327,8 @@ int debuginfo__find_probe_point(struct debuginfo *self, unsigned long addr,
                                struct perf_probe_point *ppt)
 {
        Dwarf_Die cudie, spdie, indie;
-       Dwarf_Addr _addr, baseaddr;
-       const char *fname = NULL, *func = NULL, *tmp;
+       Dwarf_Addr _addr = 0, baseaddr = 0;
+       const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp;
        int baseline = 0, lineno = 0, ret = 0;
 
        /* Adjust address with bias */
@@ -1346,27 +1349,36 @@ int debuginfo__find_probe_point(struct debuginfo *self, unsigned long addr,
        /* Find a corresponding function (name, baseline and baseaddr) */
        if (die_find_realfunc(&cudie, (Dwarf_Addr)addr, &spdie)) {
                /* Get function entry information */
-               tmp = dwarf_diename(&spdie);
-               if (!tmp ||
+               func = basefunc = dwarf_diename(&spdie);
+               if (!func ||
                    dwarf_entrypc(&spdie, &baseaddr) != 0 ||
-                   dwarf_decl_line(&spdie, &baseline) != 0)
+                   dwarf_decl_line(&spdie, &baseline) != 0) {
+                       lineno = 0;
                        goto post;
-               func = tmp;
+               }
 
-               if (addr == (unsigned long)baseaddr)
+               if (addr == (unsigned long)baseaddr) {
                        /* Function entry - Relative line number is 0 */
                        lineno = baseline;
-               else if (die_find_inlinefunc(&spdie, (Dwarf_Addr)addr,
-                                            &indie)) {
+                       fname = dwarf_decl_file(&spdie);
+                       goto post;
+               }
+
+               /* Track down the inline functions step by step */
+               while (die_find_top_inlinefunc(&spdie, (Dwarf_Addr)addr,
+                                               &indie)) {
+                       /* There is an inline function */
                        if (dwarf_entrypc(&indie, &_addr) == 0 &&
-                           _addr == addr)
+                           _addr == addr) {
                                /*
                                 * addr is at an inline function entry.
                                 * In this case, lineno should be the call-site
-                                * line number.
+                                * line number. (overwrite lineinfo)
                                 */
                                lineno = die_get_call_lineno(&indie);
-                       else {
+                               fname = die_get_call_file(&indie);
+                               break;
+                       } else {
                                /*
                                 * addr is in an inline function body.
                                 * Since lineno points one of the lines
@@ -1374,19 +1386,27 @@ int debuginfo__find_probe_point(struct debuginfo *self, unsigned long addr,
                                 * be the entry line of the inline function.
                                 */
                                tmp = dwarf_diename(&indie);
-                               if (tmp &&
-                                   dwarf_decl_line(&spdie, &baseline) == 0)
-                                       func = tmp;
+                               if (!tmp ||
+                                   dwarf_decl_line(&indie, &baseline) != 0)
+                                       break;
+                               func = tmp;
+                               spdie = indie;
                        }
                }
+               /* Verify the lineno and baseline are in a same file */
+               tmp = dwarf_decl_file(&spdie);
+               if (!tmp || strcmp(tmp, fname) != 0)
+                       lineno = 0;
        }
 
 post:
        /* Make a relative line number or an offset */
        if (lineno)
                ppt->line = lineno - baseline;
-       else if (func)
+       else if (basefunc) {
                ppt->offset = addr - (unsigned long)baseaddr;
+               func = basefunc;
+       }
 
        /* Duplicate strings */
        if (func) {
@@ -1474,7 +1494,7 @@ static int line_range_inline_cb(Dwarf_Die *in_die, void *data)
        return 0;
 }
 
-/* Search function from function name */
+/* Search function definition from function name */
 static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
 {
        struct dwarf_callback_param *param = data;
@@ -1485,7 +1505,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
        if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die)))
                return DWARF_CB_OK;
 
-       if (dwarf_tag(sp_die) == DW_TAG_subprogram &&
+       if (die_is_func_def(sp_die) &&
            die_compare_name(sp_die, lr->function)) {
                lf->fname = dwarf_decl_file(sp_die);
                dwarf_decl_line(sp_die, &lr->offset);
index 17e94d0..3b7d630 100644 (file)
@@ -23,6 +23,7 @@ static inline int is_c_varname(const char *name)
 /* debug information structure */
 struct debuginfo {
        Dwarf           *dbg;
+       Dwfl_Module     *mod;
        Dwfl            *dwfl;
        Dwarf_Addr      bias;
 };
@@ -77,6 +78,7 @@ struct probe_finder {
 
 struct trace_event_finder {
        struct probe_finder     pf;
+       Dwfl_Module             *mod;           /* For solving symbols */
        struct probe_trace_event *tevs;         /* Found trace events */
        int                     ntevs;          /* Number of trace events */
        int                     max_tevs;       /* Max number of trace events */
@@ -84,6 +86,7 @@ struct trace_event_finder {
 
 struct available_var_finder {
        struct probe_finder     pf;
+       Dwfl_Module             *mod;           /* For solving symbols */
        struct variable_list    *vls;           /* Found variable lists */
        int                     nvls;           /* Number of variable lists */
        int                     max_vls;        /* Max no. of variable lists */
index 51f5edf..568b750 100644 (file)
@@ -256,6 +256,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
                tool->sample = process_event_sample_stub;
        if (tool->mmap == NULL)
                tool->mmap = process_event_stub;
+       if (tool->mmap2 == NULL)
+               tool->mmap2 = process_event_stub;
        if (tool->comm == NULL)
                tool->comm = process_event_stub;
        if (tool->fork == NULL)
@@ -531,6 +533,9 @@ static int flush_sample_queue(struct perf_session *s,
                return 0;
 
        list_for_each_entry_safe(iter, tmp, head, list) {
+               if (session_done())
+                       return 0;
+
                if (iter->timestamp > limit)
                        break;
 
@@ -1160,7 +1165,6 @@ static void perf_session__warn_about_errors(const struct perf_session *session,
        }
 }
 
-#define session_done() (*(volatile int *)(&session_done))
 volatile int session_done;
 
 static int __perf_session__process_pipe_events(struct perf_session *self,
@@ -1308,7 +1312,7 @@ int __perf_session__process_events(struct perf_session *session,
        file_offset = page_offset;
        head = data_offset - page_offset;
 
-       if (data_offset + data_size < file_size)
+       if (data_size && (data_offset + data_size < file_size))
                file_size = data_offset + data_size;
 
        progress_next = file_size / 16;
@@ -1372,10 +1376,13 @@ more:
                                    "Processing events...");
        }
 
+       err = 0;
+       if (session_done())
+               goto out_err;
+
        if (file_pos < file_size)
                goto more;
 
-       err = 0;
        /* do the final flush for ordered samples */
        session->ordered_samples.next_flush = ULLONG_MAX;
        err = flush_sample_queue(session, tool);
index 3aa75fb..04bf737 100644 (file)
@@ -124,4 +124,8 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session,
 
 #define perf_session__set_tracepoints_handlers(session, array) \
        __perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array))
+
+extern volatile int session_done;
+
+#define session_done() (*(volatile int *)(&session_done))
 #endif /* __PERF_SESSION_H */
index a7b9ab5..a9c829b 100644 (file)
@@ -8,6 +8,22 @@
 #include "symbol.h"
 #include "debug.h"
 
+#ifndef HAVE_ELF_GETPHDRNUM
+static int elf_getphdrnum(Elf *elf, size_t *dst)
+{
+       GElf_Ehdr gehdr;
+       GElf_Ehdr *ehdr;
+
+       ehdr = gelf_getehdr(elf, &gehdr);
+       if (!ehdr)
+               return -1;
+
+       *dst = ehdr->e_phnum;
+
+       return 0;
+}
+#endif
+
 #ifndef NT_GNU_BUILD_ID
 #define NT_GNU_BUILD_ID 3
 #endif
index fe7a27d..e9e1c03 100644 (file)
@@ -186,7 +186,7 @@ void parse_proc_kallsyms(struct pevent *pevent,
        char *next = NULL;
        char *addr_str;
        char *mod;
-       char *fmt;
+       char *fmt = NULL;
 
        line = strtok_r(file, "\n", &next);
        while (line) {
index 779262f..fbe1a48 100644 (file)
@@ -27,3 +27,6 @@ config HAVE_KVM_MSI
 
 config HAVE_KVM_CPU_RELAX_INTERCEPT
        bool
+
+config KVM_VFIO
+       bool
index 8a39dda..8631d9c 100644 (file)
@@ -56,7 +56,6 @@ void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void async_pf_execute(struct work_struct *work)
 {
-       struct page *page = NULL;
        struct kvm_async_pf *apf =
                container_of(work, struct kvm_async_pf, work);
        struct mm_struct *mm = apf->mm;
@@ -68,14 +67,12 @@ static void async_pf_execute(struct work_struct *work)
 
        use_mm(mm);
        down_read(&mm->mmap_sem);
-       get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL);
+       get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
        up_read(&mm->mmap_sem);
        unuse_mm(mm);
 
        spin_lock(&vcpu->async_pf.lock);
        list_add_tail(&apf->link, &vcpu->async_pf.done);
-       apf->page = page;
-       apf->done = true;
        spin_unlock(&vcpu->async_pf.lock);
 
        /*
@@ -83,7 +80,7 @@ static void async_pf_execute(struct work_struct *work)
         * this point
         */
 
-       trace_kvm_async_pf_completed(addr, page, gva);
+       trace_kvm_async_pf_completed(addr, gva);
 
        if (waitqueue_active(&vcpu->wq))
                wake_up_interruptible(&vcpu->wq);
@@ -99,9 +96,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
                struct kvm_async_pf *work =
                        list_entry(vcpu->async_pf.queue.next,
                                   typeof(*work), queue);
-               cancel_work_sync(&work->work);
                list_del(&work->queue);
-               if (!work->done) { /* work was canceled */
+               if (cancel_work_sync(&work->work)) {
                        mmdrop(work->mm);
                        kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
                        kmem_cache_free(async_pf_cache, work);
@@ -114,8 +110,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
                        list_entry(vcpu->async_pf.done.next,
                                   typeof(*work), link);
                list_del(&work->link);
-               if (!is_error_page(work->page))
-                       kvm_release_page_clean(work->page);
                kmem_cache_free(async_pf_cache, work);
        }
        spin_unlock(&vcpu->async_pf.lock);
@@ -135,14 +129,11 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
                list_del(&work->link);
                spin_unlock(&vcpu->async_pf.lock);
 
-               if (work->page)
-                       kvm_arch_async_page_ready(vcpu, work);
+               kvm_arch_async_page_ready(vcpu, work);
                kvm_arch_async_page_present(vcpu, work);
 
                list_del(&work->queue);
                vcpu->async_pf.queued--;
-               if (!is_error_page(work->page))
-                       kvm_release_page_clean(work->page);
                kmem_cache_free(async_pf_cache, work);
        }
 }
@@ -165,8 +156,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
        if (!work)
                return 0;
 
-       work->page = NULL;
-       work->done = false;
+       work->wakeup_all = false;
        work->vcpu = vcpu;
        work->gva = gva;
        work->addr = gfn_to_hva(vcpu->kvm, gfn);
@@ -206,7 +196,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
        if (!work)
                return -ENOMEM;
 
-       work->page = KVM_ERR_PTR_BAD_PAGE;
+       work->wakeup_all = true;
        INIT_LIST_HEAD(&work->queue); /* for list_del to work */
 
        spin_lock(&vcpu->async_pf.lock);
index 72a130b..0df7d4b 100644 (file)
@@ -79,7 +79,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
        flags = IOMMU_READ;
        if (!(slot->flags & KVM_MEM_READONLY))
                flags |= IOMMU_WRITE;
-       if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
+       if (!kvm->arch.iommu_noncoherent)
                flags |= IOMMU_CACHE;
 
 
@@ -103,6 +103,10 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
                while ((gfn << PAGE_SHIFT) & (page_size - 1))
                        page_size >>= 1;
 
+               /* Make sure hva is aligned to the page size we want to map */
+               while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1))
+                       page_size >>= 1;
+
                /*
                 * Pin all pages we are about to map in memory. This is
                 * important because we unmap and unpin in 4kb steps later.
@@ -140,6 +144,9 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
        struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot;
 
+       if (kvm->arch.iommu_noncoherent)
+               kvm_arch_register_noncoherent_dma(kvm);
+
        idx = srcu_read_lock(&kvm->srcu);
        slots = kvm_memslots(kvm);
 
@@ -158,7 +165,8 @@ int kvm_assign_device(struct kvm *kvm,
 {
        struct pci_dev *pdev = NULL;
        struct iommu_domain *domain = kvm->arch.iommu_domain;
-       int r, last_flags;
+       int r;
+       bool noncoherent;
 
        /* check if iommu exists and in use */
        if (!domain)
@@ -174,15 +182,13 @@ int kvm_assign_device(struct kvm *kvm,
                return r;
        }
 
-       last_flags = kvm->arch.iommu_flags;
-       if (iommu_domain_has_cap(kvm->arch.iommu_domain,
-                                IOMMU_CAP_CACHE_COHERENCY))
-               kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY;
+       noncoherent = !iommu_domain_has_cap(kvm->arch.iommu_domain,
+                                           IOMMU_CAP_CACHE_COHERENCY);
 
        /* Check if need to update IOMMU page table for guest memory */
-       if ((last_flags ^ kvm->arch.iommu_flags) ==
-                       KVM_IOMMU_CACHE_COHERENCY) {
+       if (noncoherent != kvm->arch.iommu_noncoherent) {
                kvm_iommu_unmap_memslots(kvm);
+               kvm->arch.iommu_noncoherent = noncoherent;
                r = kvm_iommu_map_memslots(kvm);
                if (r)
                        goto out_unmap;
@@ -190,11 +196,7 @@ int kvm_assign_device(struct kvm *kvm,
 
        pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
 
-       printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
-               assigned_dev->host_segnr,
-               assigned_dev->host_busnr,
-               PCI_SLOT(assigned_dev->host_devfn),
-               PCI_FUNC(assigned_dev->host_devfn));
+       dev_info(&pdev->dev, "kvm assign device\n");
 
        return 0;
 out_unmap:
@@ -220,11 +222,7 @@ int kvm_deassign_device(struct kvm *kvm,
 
        pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
 
-       printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
-               assigned_dev->host_segnr,
-               assigned_dev->host_busnr,
-               PCI_SLOT(assigned_dev->host_devfn),
-               PCI_FUNC(assigned_dev->host_devfn));
+       dev_info(&pdev->dev, "kvm deassign device\n");
 
        return 0;
 }
@@ -336,6 +334,9 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm)
 
        srcu_read_unlock(&kvm->srcu, idx);
 
+       if (kvm->arch.iommu_noncoherent)
+               kvm_arch_unregister_noncoherent_dma(kvm);
+
        return 0;
 }
 
@@ -350,6 +351,7 @@ int kvm_iommu_unmap_guest(struct kvm *kvm)
        mutex_lock(&kvm->slots_lock);
        kvm_iommu_unmap_memslots(kvm);
        kvm->arch.iommu_domain = NULL;
+       kvm->arch.iommu_noncoherent = false;
        mutex_unlock(&kvm->slots_lock);
 
        iommu_domain_free(domain);
index 979bff4..10015d6 100644 (file)
@@ -70,7 +70,8 @@ MODULE_LICENSE("GPL");
  *             kvm->lock --> kvm->slots_lock --> kvm->irq_lock
  */
 
-DEFINE_RAW_SPINLOCK(kvm_lock);
+DEFINE_SPINLOCK(kvm_lock);
+static DEFINE_RAW_SPINLOCK(kvm_count_lock);
 LIST_HEAD(vm_list);
 
 static cpumask_var_t cpus_hardware_enabled;
@@ -186,6 +187,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
                ++kvm->stat.remote_tlb_flush;
        cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
+EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
 
 void kvm_reload_remote_mmus(struct kvm *kvm)
 {
@@ -490,9 +492,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
        if (r)
                goto out_err;
 
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_add(&kvm->vm_list, &vm_list);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
 
        return kvm;
 
@@ -540,13 +542,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 /*
  * Free any memory in @free but not in @dont.
  */
-static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
+static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
                                  struct kvm_memory_slot *dont)
 {
        if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
                kvm_destroy_dirty_bitmap(free);
 
-       kvm_arch_free_memslot(free, dont);
+       kvm_arch_free_memslot(kvm, free, dont);
 
        free->npages = 0;
 }
@@ -557,7 +559,7 @@ void kvm_free_physmem(struct kvm *kvm)
        struct kvm_memory_slot *memslot;
 
        kvm_for_each_memslot(memslot, slots)
-               kvm_free_physmem_slot(memslot, NULL);
+               kvm_free_physmem_slot(kvm, memslot, NULL);
 
        kfree(kvm->memslots);
 }
@@ -581,9 +583,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
        struct mm_struct *mm = kvm->mm;
 
        kvm_arch_sync_events(kvm);
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_del(&kvm->vm_list);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
        kvm_free_irq_routing(kvm);
        for (i = 0; i < KVM_NR_BUSES; i++)
                kvm_io_bus_destroy(kvm->buses[i]);
@@ -821,7 +823,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
        if (change == KVM_MR_CREATE) {
                new.userspace_addr = mem->userspace_addr;
 
-               if (kvm_arch_create_memslot(&new, npages))
+               if (kvm_arch_create_memslot(kvm, &new, npages))
                        goto out_free;
        }
 
@@ -872,6 +874,19 @@ int __kvm_set_memory_region(struct kvm *kvm,
                        goto out_free;
        }
 
+       /* actual memory is freed via old in kvm_free_physmem_slot below */
+       if (change == KVM_MR_DELETE) {
+               new.dirty_bitmap = NULL;
+               memset(&new.arch, 0, sizeof(new.arch));
+       }
+
+       old_memslots = install_new_memslots(kvm, slots, &new);
+
+       kvm_arch_commit_memory_region(kvm, mem, &old, change);
+
+       kvm_free_physmem_slot(kvm, &old, &new);
+       kfree(old_memslots);
+
        /*
         * IOMMU mapping:  New slots need to be mapped.  Old slots need to be
         * un-mapped and re-mapped if their base changes.  Since base change
@@ -883,29 +898,15 @@ int __kvm_set_memory_region(struct kvm *kvm,
         */
        if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
                r = kvm_iommu_map_pages(kvm, &new);
-               if (r)
-                       goto out_slots;
-       }
-
-       /* actual memory is freed via old in kvm_free_physmem_slot below */
-       if (change == KVM_MR_DELETE) {
-               new.dirty_bitmap = NULL;
-               memset(&new.arch, 0, sizeof(new.arch));
+               return r;
        }
 
-       old_memslots = install_new_memslots(kvm, slots, &new);
-
-       kvm_arch_commit_memory_region(kvm, mem, &old, change);
-
-       kvm_free_physmem_slot(&old, &new);
-       kfree(old_memslots);
-
        return 0;
 
 out_slots:
        kfree(slots);
 out_free:
-       kvm_free_physmem_slot(&new, &old);
+       kvm_free_physmem_slot(kvm, &new, &old);
 out:
        return r;
 }
@@ -964,6 +965,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
 out:
        return r;
 }
+EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
 
 bool kvm_largepages_enabled(void)
 {
@@ -1064,10 +1066,12 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
 unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
 {
        struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
-       if (writable)
+       unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
+
+       if (!kvm_is_error_hva(hva) && writable)
                *writable = !memslot_is_readonly(slot);
 
-       return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
+       return hva;
 }
 
 static int kvm_read_hva(void *data, void __user *hva, int len)
@@ -1652,6 +1656,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
        memslot = gfn_to_memslot(kvm, gfn);
        mark_page_dirty_in_slot(kvm, memslot, gfn);
 }
+EXPORT_SYMBOL_GPL(mark_page_dirty);
 
 /*
  * The vCPU has executed a HLT instruction with in-kernel mode enabled.
@@ -1677,6 +1682,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 
        finish_wait(&vcpu->wq, &wait);
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_block);
 
 #ifndef CONFIG_S390
 /*
@@ -2269,6 +2275,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
                ops = &kvm_xics_ops;
                break;
 #endif
+#ifdef CONFIG_KVM_VFIO
+       case KVM_DEV_TYPE_VFIO:
+               ops = &kvm_vfio_ops;
+               break;
+#endif
        default:
                return -ENODEV;
        }
@@ -2517,44 +2528,12 @@ out:
 }
 #endif
 
-static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-       struct page *page[1];
-       unsigned long addr;
-       int npages;
-       gfn_t gfn = vmf->pgoff;
-       struct kvm *kvm = vma->vm_file->private_data;
-
-       addr = gfn_to_hva(kvm, gfn);
-       if (kvm_is_error_hva(addr))
-               return VM_FAULT_SIGBUS;
-
-       npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
-                               NULL);
-       if (unlikely(npages != 1))
-               return VM_FAULT_SIGBUS;
-
-       vmf->page = page[0];
-       return 0;
-}
-
-static const struct vm_operations_struct kvm_vm_vm_ops = {
-       .fault = kvm_vm_fault,
-};
-
-static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
-{
-       vma->vm_ops = &kvm_vm_vm_ops;
-       return 0;
-}
-
 static struct file_operations kvm_vm_fops = {
        .release        = kvm_vm_release,
        .unlocked_ioctl = kvm_vm_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = kvm_vm_compat_ioctl,
 #endif
-       .mmap           = kvm_vm_mmap,
        .llseek         = noop_llseek,
 };
 
@@ -2681,11 +2660,12 @@ static void hardware_enable_nolock(void *junk)
        }
 }
 
-static void hardware_enable(void *junk)
+static void hardware_enable(void)
 {
-       raw_spin_lock(&kvm_lock);
-       hardware_enable_nolock(junk);
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
+       if (kvm_usage_count)
+               hardware_enable_nolock(NULL);
+       raw_spin_unlock(&kvm_count_lock);
 }
 
 static void hardware_disable_nolock(void *junk)
@@ -2698,11 +2678,12 @@ static void hardware_disable_nolock(void *junk)
        kvm_arch_hardware_disable(NULL);
 }
 
-static void hardware_disable(void *junk)
+static void hardware_disable(void)
 {
-       raw_spin_lock(&kvm_lock);
-       hardware_disable_nolock(junk);
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
+       if (kvm_usage_count)
+               hardware_disable_nolock(NULL);
+       raw_spin_unlock(&kvm_count_lock);
 }
 
 static void hardware_disable_all_nolock(void)
@@ -2716,16 +2697,16 @@ static void hardware_disable_all_nolock(void)
 
 static void hardware_disable_all(void)
 {
-       raw_spin_lock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
        hardware_disable_all_nolock();
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_unlock(&kvm_count_lock);
 }
 
 static int hardware_enable_all(void)
 {
        int r = 0;
 
-       raw_spin_lock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
 
        kvm_usage_count++;
        if (kvm_usage_count == 1) {
@@ -2738,7 +2719,7 @@ static int hardware_enable_all(void)
                }
        }
 
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_unlock(&kvm_count_lock);
 
        return r;
 }
@@ -2748,20 +2729,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
 {
        int cpu = (long)v;
 
-       if (!kvm_usage_count)
-               return NOTIFY_OK;
-
        val &= ~CPU_TASKS_FROZEN;
        switch (val) {
        case CPU_DYING:
                printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
                       cpu);
-               hardware_disable(NULL);
+               hardware_disable();
                break;
        case CPU_STARTING:
                printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
                       cpu);
-               hardware_enable(NULL);
+               hardware_enable();
                break;
        }
        return NOTIFY_OK;
@@ -3054,10 +3032,10 @@ static int vm_stat_get(void *_offset, u64 *val)
        struct kvm *kvm;
 
        *val = 0;
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
                *val += *(u32 *)((void *)kvm + offset);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
        return 0;
 }
 
@@ -3071,12 +3049,12 @@ static int vcpu_stat_get(void *_offset, u64 *val)
        int i;
 
        *val = 0;
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
                kvm_for_each_vcpu(i, vcpu, kvm)
                        *val += *(u32 *)((void *)vcpu + offset);
 
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
        return 0;
 }
 
@@ -3131,7 +3109,7 @@ static int kvm_suspend(void)
 static void kvm_resume(void)
 {
        if (kvm_usage_count) {
-               WARN_ON(raw_spin_is_locked(&kvm_lock));
+               WARN_ON(raw_spin_is_locked(&kvm_count_lock));
                hardware_enable_nolock(NULL);
        }
 }
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
new file mode 100644 (file)
index 0000000..ca4260e
--- /dev/null
@@ -0,0 +1,264 @@
+/*
+ * VFIO-KVM bridge pseudo device
+ *
+ * Copyright (C) 2013 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/kvm_host.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+
+struct kvm_vfio_group {
+       struct list_head node;
+       struct vfio_group *vfio_group;
+};
+
+struct kvm_vfio {
+       struct list_head group_list;
+       struct mutex lock;
+       bool noncoherent;
+};
+
+static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
+{
+       struct vfio_group *vfio_group;
+       struct vfio_group *(*fn)(struct file *);
+
+       fn = symbol_get(vfio_group_get_external_user);
+       if (!fn)
+               return ERR_PTR(-EINVAL);
+
+       vfio_group = fn(filep);
+
+       symbol_put(vfio_group_get_external_user);
+
+       return vfio_group;
+}
+
+static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
+{
+       void (*fn)(struct vfio_group *);
+
+       fn = symbol_get(vfio_group_put_external_user);
+       if (!fn)
+               return;
+
+       fn(vfio_group);
+
+       symbol_put(vfio_group_put_external_user);
+}
+
+/*
+ * Groups can use the same or different IOMMU domains.  If the same then
+ * adding a new group may change the coherency of groups we've previously
+ * been told about.  We don't want to care about any of that so we retest
+ * each group and bail as soon as we find one that's noncoherent.  This
+ * means we only ever [un]register_noncoherent_dma once for the whole device.
+ */
+static void kvm_vfio_update_coherency(struct kvm_device *dev)
+{
+       struct kvm_vfio *kv = dev->private;
+       bool noncoherent = false;
+       struct kvm_vfio_group *kvg;
+
+       mutex_lock(&kv->lock);
+
+       list_for_each_entry(kvg, &kv->group_list, node) {
+               /*
+                * TODO: We need an interface to check the coherency of
+                * the IOMMU domain this group is using.  For now, assume
+                * it's always noncoherent.
+                */
+               noncoherent = true;
+               break;
+       }
+
+       if (noncoherent != kv->noncoherent) {
+               kv->noncoherent = noncoherent;
+
+               if (kv->noncoherent)
+                       kvm_arch_register_noncoherent_dma(dev->kvm);
+               else
+                       kvm_arch_unregister_noncoherent_dma(dev->kvm);
+       }
+
+       mutex_unlock(&kv->lock);
+}
+
+static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
+{
+       struct kvm_vfio *kv = dev->private;
+       struct vfio_group *vfio_group;
+       struct kvm_vfio_group *kvg;
+       void __user *argp = (void __user *)arg;
+       struct fd f;
+       int32_t fd;
+       int ret;
+
+       switch (attr) {
+       case KVM_DEV_VFIO_GROUP_ADD:
+               if (get_user(fd, (int32_t __user *)argp))
+                       return -EFAULT;
+
+               f = fdget(fd);
+               if (!f.file)
+                       return -EBADF;
+
+               vfio_group = kvm_vfio_group_get_external_user(f.file);
+               fdput(f);
+
+               if (IS_ERR(vfio_group))
+                       return PTR_ERR(vfio_group);
+
+               mutex_lock(&kv->lock);
+
+               list_for_each_entry(kvg, &kv->group_list, node) {
+                       if (kvg->vfio_group == vfio_group) {
+                               mutex_unlock(&kv->lock);
+                               kvm_vfio_group_put_external_user(vfio_group);
+                               return -EEXIST;
+                       }
+               }
+
+               kvg = kzalloc(sizeof(*kvg), GFP_KERNEL);
+               if (!kvg) {
+                       mutex_unlock(&kv->lock);
+                       kvm_vfio_group_put_external_user(vfio_group);
+                       return -ENOMEM;
+               }
+
+               list_add_tail(&kvg->node, &kv->group_list);
+               kvg->vfio_group = vfio_group;
+
+               mutex_unlock(&kv->lock);
+
+               kvm_vfio_update_coherency(dev);
+
+               return 0;
+
+       case KVM_DEV_VFIO_GROUP_DEL:
+               if (get_user(fd, (int32_t __user *)argp))
+                       return -EFAULT;
+
+               f = fdget(fd);
+               if (!f.file)
+                       return -EBADF;
+
+               vfio_group = kvm_vfio_group_get_external_user(f.file);
+               fdput(f);
+
+               if (IS_ERR(vfio_group))
+                       return PTR_ERR(vfio_group);
+
+               ret = -ENOENT;
+
+               mutex_lock(&kv->lock);
+
+               list_for_each_entry(kvg, &kv->group_list, node) {
+                       if (kvg->vfio_group != vfio_group)
+                               continue;
+
+                       list_del(&kvg->node);
+                       kvm_vfio_group_put_external_user(kvg->vfio_group);
+                       kfree(kvg);
+                       ret = 0;
+                       break;
+               }
+
+               mutex_unlock(&kv->lock);
+
+               kvm_vfio_group_put_external_user(vfio_group);
+
+               kvm_vfio_update_coherency(dev);
+
+               return ret;
+       }
+
+       return -ENXIO;
+}
+
+static int kvm_vfio_set_attr(struct kvm_device *dev,
+                            struct kvm_device_attr *attr)
+{
+       switch (attr->group) {
+       case KVM_DEV_VFIO_GROUP:
+               return kvm_vfio_set_group(dev, attr->attr, attr->addr);
+       }
+
+       return -ENXIO;
+}
+
+static int kvm_vfio_has_attr(struct kvm_device *dev,
+                            struct kvm_device_attr *attr)
+{
+       switch (attr->group) {
+       case KVM_DEV_VFIO_GROUP:
+               switch (attr->attr) {
+               case KVM_DEV_VFIO_GROUP_ADD:
+               case KVM_DEV_VFIO_GROUP_DEL:
+                       return 0;
+               }
+
+               break;
+       }
+
+       return -ENXIO;
+}
+
+static void kvm_vfio_destroy(struct kvm_device *dev)
+{
+       struct kvm_vfio *kv = dev->private;
+       struct kvm_vfio_group *kvg, *tmp;
+
+       list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
+               kvm_vfio_group_put_external_user(kvg->vfio_group);
+               list_del(&kvg->node);
+               kfree(kvg);
+       }
+
+       kvm_vfio_update_coherency(dev);
+
+       kfree(kv);
+       kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
+}
+
+static int kvm_vfio_create(struct kvm_device *dev, u32 type)
+{
+       struct kvm_device *tmp;
+       struct kvm_vfio *kv;
+
+       /* Only one VFIO "device" per VM */
+       list_for_each_entry(tmp, &dev->kvm->devices, vm_node)
+               if (tmp->ops == &kvm_vfio_ops)
+                       return -EBUSY;
+
+       kv = kzalloc(sizeof(*kv), GFP_KERNEL);
+       if (!kv)
+               return -ENOMEM;
+
+       INIT_LIST_HEAD(&kv->group_list);
+       mutex_init(&kv->lock);
+
+       dev->private = kv;
+
+       return 0;
+}
+
+struct kvm_device_ops kvm_vfio_ops = {
+       .name = "kvm-vfio",
+       .create = kvm_vfio_create,
+       .destroy = kvm_vfio_destroy,
+       .set_attr = kvm_vfio_set_attr,
+       .has_attr = kvm_vfio_has_attr,
+};