Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 3 Jul 2017 18:34:53 +0000 (11:34 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 3 Jul 2017 18:34:53 +0000 (11:34 -0700)
Pull RCU updates from Ingo Molnar:
 "The sole purpose of these changes is to shrink and simplify the RCU
  code base, which has suffered from creeping bloat over the past couple
  of years. The end result is a net removal of ~2700 lines of code:

     79 files changed, 1496 insertions(+), 4211 deletions(-)

  Plus there's a marked reduction in the Kconfig space complexity as
  well, here's the number of matches on 'grep RCU' in the .config:

                               before       after

     x86-defconfig                 17          15
     x86-allmodconfig              33          20"

* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (86 commits)
  rcu: Remove RCU CPU stall warnings from Tiny RCU
  rcu: Remove event tracing from Tiny RCU
  rcu: Move RCU debug Kconfig options to kernel/rcu
  rcu: Move RCU non-debug Kconfig options to kernel/rcu
  rcu: Eliminate NOCBs CPU-state Kconfig options
  rcu: Remove debugfs tracing
  srcu: Remove Classic SRCU
  srcu: Fix rcutorture-statistics typo
  rcu: Remove SPARSE_RCU_POINTER Kconfig option
  rcu: Remove the now-obsolete PROVE_RCU_REPEATEDLY Kconfig option
  rcu: Remove typecheck() from RCU locking wrapper functions
  rcu: Remove #ifdef moving rcu_end_inkernel_boot from rcupdate.h
  rcu: Remove nohz_full full-system-idle state machine
  rcu: Remove the RCU_KTHREAD_PRIO Kconfig option
  rcu: Remove *_SLOW_* Kconfig options
  srcu: Use rnp->lock wrappers to replace explicit memory barriers
  rcu: Move rnp->lock wrappers for SRCU use
  rcu: Convert rnp->lock wrappers to macros for SRCU use
  rcu: Refactor #includes from include/linux/rcupdate.h
  bcm47xx: Fix build regression
  ...

958 files changed:
Documentation/admin-guide/kernel-parameters.txt
Documentation/block/biodoc.txt
Documentation/devicetree/bindings/clock/sunxi-ccu.txt
Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
Documentation/devicetree/bindings/mfd/stm32-timers.txt
Documentation/devicetree/bindings/net/dsa/b53.txt
Documentation/devicetree/bindings/net/smsc911x.txt
Documentation/networking/scaling.txt
MAINTAINERS
Makefile
arch/arc/include/asm/processor.h
arch/arc/mm/mmap.c
arch/arm/Kconfig
arch/arm/boot/compressed/efi-header.S
arch/arm/boot/dts/am335x-sl50.dts
arch/arm/boot/dts/sunxi-h3-h5.dtsi
arch/arm/kernel/setup.c
arch/arm/mm/mmap.c
arch/arm/mm/mmu.c
arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi
arch/arm64/boot/dts/allwinner/sunxi-h3-h5.dtsi [deleted symlink]
arch/arm64/kernel/vdso.c
arch/arm64/kernel/vdso/gettimeofday.S
arch/arm64/net/bpf_jit_comp.c
arch/blackfin/include/asm/processor.h
arch/c6x/include/asm/processor.h
arch/cris/arch-v10/kernel/process.c
arch/cris/arch-v32/kernel/process.c
arch/cris/include/asm/processor.h
arch/frv/include/asm/processor.h
arch/frv/kernel/process.c
arch/frv/mm/elf-fdpic.c
arch/h8300/include/asm/processor.h
arch/h8300/kernel/process.c
arch/hexagon/include/asm/processor.h
arch/hexagon/kernel/process.c
arch/ia64/include/asm/processor.h
arch/m32r/include/asm/processor.h
arch/m32r/kernel/process.c
arch/m68k/include/asm/processor.h
arch/m68k/kernel/process.c
arch/microblaze/include/asm/processor.h
arch/microblaze/kernel/process.c
arch/mips/boot/Makefile
arch/mips/include/asm/highmem.h
arch/mips/include/asm/kprobes.h
arch/mips/include/asm/pgtable-32.h
arch/mips/kernel/branch.c
arch/mips/kernel/entry.S
arch/mips/kernel/ftrace.c
arch/mips/kernel/head.S
arch/mips/kernel/perf_event_mipsxx.c
arch/mips/kernel/pm-cps.c
arch/mips/kernel/traps.c
arch/mips/kvm/tlb.c
arch/mips/math-emu/dp_maddf.c
arch/mips/math-emu/sp_maddf.c
arch/mips/mm/dma-default.c
arch/mips/mm/mmap.c
arch/mips/mm/pgtable-32.c
arch/mn10300/include/asm/processor.h
arch/mn10300/kernel/process.c
arch/nios2/include/asm/processor.h
arch/openrisc/include/asm/processor.h
arch/openrisc/kernel/process.c
arch/parisc/include/asm/processor.h
arch/parisc/kernel/process.c
arch/parisc/kernel/sys_parisc.c
arch/powerpc/include/asm/bug.h
arch/powerpc/include/asm/kprobes.h
arch/powerpc/include/asm/processor.h
arch/powerpc/include/asm/uaccess.h
arch/powerpc/include/asm/xive.h
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/kprobes.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/trace/ftrace_64_mprofile.S
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_interrupts.S
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_xive_template.c
arch/powerpc/mm/hugetlbpage-radix.c
arch/powerpc/mm/mmap.c
arch/powerpc/mm/slice.c
arch/powerpc/perf/perf_regs.c
arch/powerpc/platforms/powernv/npu-dma.c
arch/powerpc/sysdev/xive/common.c
arch/s390/configs/default_defconfig
arch/s390/configs/gcov_defconfig
arch/s390/configs/performance_defconfig
arch/s390/configs/zfcpdump_defconfig
arch/s390/defconfig
arch/s390/include/asm/eadm.h
arch/s390/include/asm/processor.h
arch/s390/include/asm/sysinfo.h
arch/s390/kernel/entry.S
arch/s390/kernel/ipl.c
arch/s390/kernel/process.c
arch/s390/kernel/sysinfo.c
arch/s390/kvm/gaccess.c
arch/s390/mm/mmap.c
arch/score/include/asm/processor.h
arch/score/kernel/process.c
arch/sh/mm/mmap.c
arch/sparc/include/asm/processor_32.h
arch/sparc/include/asm/processor_64.h
arch/sparc/kernel/process_32.c
arch/sparc/kernel/process_64.c
arch/sparc/kernel/sys_sparc_64.c
arch/sparc/mm/hugetlbpage.c
arch/tile/include/asm/processor.h
arch/tile/mm/hugetlbpage.c
arch/um/drivers/ubd_kern.c
arch/um/include/asm/processor-generic.h
arch/um/kernel/um_arch.c
arch/x86/boot/compressed/kaslr.c
arch/x86/boot/compressed/misc.c
arch/x86/boot/compressed/misc.h
arch/x86/crypto/Makefile
arch/x86/crypto/sha1-mb/Makefile
arch/x86/crypto/sha256-mb/Makefile
arch/x86/events/intel/core.c
arch/x86/events/intel/uncore.c
arch/x86/include/asm/extable.h
arch/x86/include/asm/kvm_emulate.h
arch/x86/include/asm/mshyperv.h
arch/x86/include/asm/processor.h
arch/x86/kernel/Makefile
arch/x86/kernel/acpi/Makefile
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
arch/x86/kernel/kprobes/opt.c
arch/x86/kernel/process.c
arch/x86/kernel/reboot.c
arch/x86/kernel/sys_x86_64.c
arch/x86/kernel/tboot.c
arch/x86/kernel/traps.c
arch/x86/kvm/emulate.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/lib/msr-reg.S
arch/x86/mm/extable.c
arch/x86/mm/hugetlbpage.c
arch/x86/mm/init.c
arch/x86/mm/init_64.c
arch/x86/net/Makefile
arch/x86/platform/efi/Makefile
arch/x86/power/Makefile
arch/x86/xen/Makefile
arch/xtensa/include/asm/irq.h
arch/xtensa/include/asm/processor.h
arch/xtensa/kernel/irq.c
arch/xtensa/kernel/setup.c
arch/xtensa/kernel/syscall.c
arch/xtensa/kernel/vmlinux.lds.S
arch/xtensa/platforms/iss/simdisk.c
arch/xtensa/platforms/xtfpga/include/platform/hardware.h
arch/xtensa/platforms/xtfpga/setup.c
block/badblocks.c
block/bfq-iosched.c
block/bio-integrity.c
block/bio.c
block/blk-core.c
block/blk-exec.c
block/blk-flush.c
block/blk-integrity.c
block/blk-map.c
block/blk-merge.c
block/blk-mq-cpumap.c
block/blk-mq-debugfs.c
block/blk-mq-sched.c
block/blk-mq-sched.h
block/blk-mq.c
block/blk-mq.h
block/blk-settings.c
block/blk-sysfs.c
block/blk-tag.c
block/blk-timeout.c
block/blk.h
block/bounce.c
block/bsg-lib.c
block/bsg.c
block/cfq-iosched.c
block/elevator.c
block/genhd.c
block/ioprio.c
block/kyber-iosched.c
block/partitions/ldm.c
block/partitions/ldm.h
block/scsi_ioctl.c
block/t10-pi.c
drivers/acpi/acpi_extlog.c
drivers/acpi/acpica/tbutils.c
drivers/acpi/acpica/utresrc.c
drivers/acpi/apei/ghes.c
drivers/acpi/bus.c
drivers/acpi/nfit/core.c
drivers/acpi/nfit/nfit.h
drivers/acpi/scan.c
drivers/acpi/utils.c
drivers/block/DAC960.c
drivers/block/amiflop.c
drivers/block/aoe/aoeblk.c
drivers/block/aoe/aoecmd.c
drivers/block/aoe/aoedev.c
drivers/block/ataflop.c
drivers/block/brd.c
drivers/block/cciss.c
drivers/block/drbd/drbd_actlog.c
drivers/block/drbd/drbd_bitmap.c
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_nl.c
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_req.c
drivers/block/drbd/drbd_req.h
drivers/block/drbd/drbd_worker.c
drivers/block/floppy.c
drivers/block/loop.c
drivers/block/loop.h
drivers/block/mtip32xx/mtip32xx.c
drivers/block/mtip32xx/mtip32xx.h
drivers/block/nbd.c
drivers/block/null_blk.c
drivers/block/paride/pcd.c
drivers/block/paride/pd.c
drivers/block/paride/pf.c
drivers/block/pktcdvd.c
drivers/block/ps3disk.c
drivers/block/ps3vram.c
drivers/block/rbd.c
drivers/block/rsxx/dev.c
drivers/block/rsxx/dma.c
drivers/block/rsxx/rsxx_priv.h
drivers/block/skd_main.c
drivers/block/sunvdc.c
drivers/block/swim.c
drivers/block/swim3.c
drivers/block/sx8.c
drivers/block/umem.c
drivers/block/virtio_blk.c
drivers/block/xen-blkback/blkback.c
drivers/block/xen-blkback/common.h
drivers/block/xen-blkback/xenbus.c
drivers/block/xen-blkfront.c
drivers/block/xsysace.c
drivers/block/z2ram.c
drivers/cdrom/cdrom.c
drivers/cdrom/gdrom.c
drivers/char/random.c
drivers/char/tpm/tpm_crb.c
drivers/char/tpm/tpm_ppi.c
drivers/clk/meson/Kconfig
drivers/clk/sunxi-ng/Kconfig
drivers/clk/sunxi-ng/ccu-sun50i-a64.h
drivers/clk/sunxi-ng/ccu-sun5i.c
drivers/clk/sunxi-ng/ccu-sun6i-a31.c
drivers/clk/sunxi-ng/ccu-sun8i-h3.h
drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
drivers/clocksource/arm_arch_timer.c
drivers/clocksource/cadence_ttc_timer.c
drivers/clocksource/timer-sun5i.c
drivers/cpufreq/cpufreq_conservative.c
drivers/cpuidle/dt_idle_states.c
drivers/devfreq/event/exynos-nocp.c
drivers/devfreq/event/exynos-ppmu.c
drivers/edac/altera_edac.c
drivers/edac/i5000_edac.c
drivers/edac/i5400_edac.c
drivers/edac/ie31200_edac.c
drivers/edac/mce_amd.c
drivers/edac/mv64x60_edac.c
drivers/edac/pnd2_edac.c
drivers/edac/sb_edac.c
drivers/edac/thunderx_edac.c
drivers/firmware/dmi-id.c
drivers/firmware/dmi_scan.c
drivers/gpio/gpio-mvebu.c
drivers/gpio/gpiolib-acpi.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
drivers/gpu/drm/bridge/synopsys/Kconfig
drivers/gpu/drm/drm_connector.c
drivers/gpu/drm/etnaviv/etnaviv_gem.h
drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/i915_gem_request.c
drivers/gpu/drm/i915/i915_guc_submission.c
drivers/gpu/drm/i915/i915_pvinfo.h
drivers/gpu/drm/i915/i915_vgpu.c
drivers/gpu/drm/i915/i915_vma.c
drivers/gpu/drm/i915/intel_acpi.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_dp_aux_backlight.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/i915/intel_ringbuffer.h
drivers/gpu/drm/mgag200/mgag200_mode.c
drivers/gpu/drm/mxsfb/mxsfb_crtc.c
drivers/gpu/drm/nouveau/nouveau_acpi.c
drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/radeon_combios.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_uvd.c
drivers/gpu/drm/radeon/si.c
drivers/gpu/drm/tegra/drm.c
drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
drivers/gpu/host1x/dev.c
drivers/hid/hid-core.c
drivers/hid/hid-ids.h
drivers/hid/hid-magicmouse.c
drivers/hid/i2c-hid/i2c-hid.c
drivers/hid/usbhid/hid-quirks.c
drivers/hsi/clients/ssi_protocol.c
drivers/i2c/busses/i2c-imx.c
drivers/i2c/busses/i2c-ismt.c
drivers/i2c/busses/i2c-rcar.c
drivers/ide/ide-atapi.c
drivers/ide/ide-cd.c
drivers/ide/ide-cd_ioctl.c
drivers/ide/ide-devsets.c
drivers/ide/ide-disk.c
drivers/ide/ide-dma.c
drivers/ide/ide-eh.c
drivers/ide/ide-floppy.c
drivers/ide/ide-io.c
drivers/ide/ide-ioctls.c
drivers/ide/ide-park.c
drivers/ide/ide-pm.c
drivers/ide/ide-probe.c
drivers/ide/ide-tape.c
drivers/ide/ide-taskfile.c
drivers/ide/siimage.c
drivers/iio/adc/meson_saradc.c
drivers/iio/adc/mxs-lradc-adc.c
drivers/iio/buffer/industrialio-buffer-dma.c
drivers/iio/buffer/industrialio-buffer-dmaengine.c
drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
drivers/infiniband/core/addr.c
drivers/infiniband/hw/bnxt_re/bnxt_re.h
drivers/infiniband/hw/bnxt_re/ib_verbs.c
drivers/infiniband/hw/bnxt_re/ib_verbs.h
drivers/infiniband/hw/bnxt_re/main.c
drivers/infiniband/hw/bnxt_re/qplib_fp.c
drivers/infiniband/hw/bnxt_re/qplib_fp.h
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
drivers/infiniband/hw/bnxt_re/qplib_res.h
drivers/infiniband/hw/bnxt_re/qplib_sp.c
drivers/infiniband/hw/bnxt_re/qplib_sp.h
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/qedr/qedr.h
drivers/infiniband/hw/qedr/verbs.c
drivers/infiniband/sw/rxe/rxe.h
drivers/infiniband/sw/rxe/rxe_verbs.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_vlan.c
drivers/input/misc/soc_button_array.c
drivers/input/rmi4/rmi_f54.c
drivers/input/serio/i8042-x86ia64io.h
drivers/iommu/amd_iommu.c
drivers/iommu/dmar.c
drivers/irqchip/irq-mips-gic.c
drivers/irqchip/irq-xtensa-mx.c
drivers/irqchip/irq-xtensa-pic.c
drivers/leds/leds-bcm6328.c
drivers/leds/trigger/ledtrig-heartbeat.c
drivers/lightnvm/core.c
drivers/lightnvm/pblk-cache.c
drivers/lightnvm/pblk-core.c
drivers/lightnvm/pblk-gc.c
drivers/lightnvm/pblk-init.c
drivers/lightnvm/pblk-map.c
drivers/lightnvm/pblk-rb.c
drivers/lightnvm/pblk-read.c
drivers/lightnvm/pblk-recovery.c
drivers/lightnvm/pblk-rl.c
drivers/lightnvm/pblk-sysfs.c
drivers/lightnvm/pblk-write.c
drivers/lightnvm/pblk.h
drivers/lightnvm/rrpc.c
drivers/md/bcache/bcache.h
drivers/md/bcache/btree.c
drivers/md/bcache/debug.c
drivers/md/bcache/io.c
drivers/md/bcache/journal.c
drivers/md/bcache/movinggc.c
drivers/md/bcache/request.c
drivers/md/bcache/request.h
drivers/md/bcache/super.c
drivers/md/bcache/writeback.c
drivers/md/dm-bio-prison-v1.c
drivers/md/dm-bio-prison-v1.h
drivers/md/dm-bufio.c
drivers/md/dm-cache-target.c
drivers/md/dm-crypt.c
drivers/md/dm-flakey.c
drivers/md/dm-integrity.c
drivers/md/dm-io.c
drivers/md/dm-log-writes.c
drivers/md/dm-mpath.c
drivers/md/dm-raid.c
drivers/md/dm-raid1.c
drivers/md/dm-rq.c
drivers/md/dm-rq.h
drivers/md/dm-snap.c
drivers/md/dm-stripe.c
drivers/md/dm-target.c
drivers/md/dm-thin.c
drivers/md/dm-verity-target.c
drivers/md/dm-zero.c
drivers/md/dm.c
drivers/md/md.c
drivers/md/multipath.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5-cache.c
drivers/md/raid5-ppl.c
drivers/md/raid5.c
drivers/media/cec/Kconfig
drivers/media/cec/cec-api.c
drivers/media/i2c/tc358743.c
drivers/media/rc/sir_ir.c
drivers/media/usb/rainshadow-cec/rainshadow-cec.c
drivers/media/v4l2-core/videobuf2-core.c
drivers/memstick/core/ms_block.c
drivers/memstick/core/mspro_block.c
drivers/mfd/arizona-core.c
drivers/misc/cxl/context.c
drivers/misc/cxl/cxl.h
drivers/misc/cxl/fault.c
drivers/misc/cxl/main.c
drivers/misc/cxl/native.c
drivers/misc/cxl/pci.c
drivers/mmc/core/block.c
drivers/mmc/core/queue.c
drivers/mmc/host/meson-gx-mmc.c
drivers/mmc/host/sdhci-pci-core.c
drivers/mtd/mtd_blkdevs.c
drivers/mtd/ubi/block.c
drivers/net/arcnet/arcnet.c
drivers/net/arcnet/capmode.c
drivers/net/arcnet/com20020-pci.c
drivers/net/arcnet/com20020.c
drivers/net/bonding/bond_3ad.c
drivers/net/bonding/bond_main.c
drivers/net/caif/caif_hsi.c
drivers/net/caif/caif_serial.c
drivers/net/caif/caif_spi.c
drivers/net/caif/caif_virtio.c
drivers/net/can/dev.c
drivers/net/can/peak_canfd/peak_canfd.c
drivers/net/can/slcan.c
drivers/net/can/usb/gs_usb.c
drivers/net/can/usb/peak_usb/pcan_usb_core.c
drivers/net/can/vcan.c
drivers/net/can/vxcan.c
drivers/net/dummy.c
drivers/net/ethernet/amazon/ena/ena_com.c
drivers/net/ethernet/amazon/ena/ena_ethtool.c
drivers/net/ethernet/amazon/ena/ena_netdev.c
drivers/net/ethernet/amazon/ena/ena_netdev.h
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
drivers/net/ethernet/freescale/fman/Kconfig
drivers/net/ethernet/freescale/fman/mac.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
drivers/net/ethernet/ibm/emac/core.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/intel/i40e/i40e.h
drivers/net/ethernet/intel/i40e/i40e_ethtool.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/marvell/mvpp2.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/qlogic/qed/qed_debug.c
drivers/net/ethernet/rocker/rocker_ofdpa.c
drivers/net/ethernet/sfc/ef10.c
drivers/net/ethernet/sfc/ef10_sriov.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
drivers/net/ethernet/ti/cpsw-common.c
drivers/net/geneve.c
drivers/net/gtp.c
drivers/net/hamradio/6pack.c
drivers/net/hamradio/bpqether.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc_drv.c
drivers/net/hyperv/rndis_filter.c
drivers/net/ifb.c
drivers/net/ipvlan/ipvlan_main.c
drivers/net/loopback.c
drivers/net/macsec.c
drivers/net/macvlan.c
drivers/net/netconsole.c
drivers/net/nlmon.c
drivers/net/phy/Kconfig
drivers/net/phy/dp83640.c
drivers/net/phy/micrel.c
drivers/net/phy/phy.c
drivers/net/slip/slip.c
drivers/net/team/team.c
drivers/net/tun.c
drivers/net/usb/ax88179_178a.c
drivers/net/usb/cdc-phonet.c
drivers/net/usb/qmi_wwan.c
drivers/net/usb/r8152.c
drivers/net/veth.c
drivers/net/virtio_net.c
drivers/net/vrf.c
drivers/net/vsockmon.c
drivers/net/vxlan.c
drivers/net/wan/dlci.c
drivers/net/wan/hdlc_fr.c
drivers/net/wan/lapbether.c
drivers/net/wireless/ath/ath6kl/main.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
drivers/net/wireless/intersil/hostap/hostap_main.c
drivers/net/wireless/mac80211_hwsim.c
drivers/net/wireless/marvell/mwifiex/main.c
drivers/net/xen-netback/common.h
drivers/net/xen-netback/interface.c
drivers/net/xen-netback/netback.c
drivers/ntb/hw/intel/ntb_hw_intel.c
drivers/ntb/ntb_transport.c
drivers/ntb/test/ntb_perf.c
drivers/nvdimm/blk.c
drivers/nvdimm/btt.c
drivers/nvdimm/btt_devs.c
drivers/nvdimm/pmem.c
drivers/nvme/host/Kconfig
drivers/nvme/host/Makefile
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.c
drivers/nvme/host/fabrics.h
drivers/nvme/host/fc.c
drivers/nvme/host/lightnvm.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/host/scsi.c [deleted file]
drivers/nvme/target/admin-cmd.c
drivers/nvme/target/configfs.c
drivers/nvme/target/core.c
drivers/nvme/target/discovery.c
drivers/nvme/target/fc.c
drivers/nvme/target/fcloop.c
drivers/nvme/target/io-cmd.c
drivers/nvme/target/loop.c
drivers/nvme/target/nvmet.h
drivers/nvme/target/rdma.c
drivers/pci/access.c
drivers/pci/endpoint/functions/Kconfig
drivers/pci/pci-acpi.c
drivers/pci/pci-label.c
drivers/pinctrl/pinctrl-amd.c
drivers/pinctrl/pinctrl-rockchip.c
drivers/pinctrl/stm32/pinctrl-stm32.c
drivers/platform/x86/intel_telemetry_debugfs.c
drivers/s390/block/dasd.c
drivers/s390/block/dcssblk.c
drivers/s390/block/scm_blk.c
drivers/s390/block/scm_blk.h
drivers/s390/block/xpram.c
drivers/s390/cio/eadm_sch.c
drivers/s390/cio/scm.c
drivers/s390/cio/vfio_ccw_ops.c
drivers/s390/crypto/ap_bus.c
drivers/s390/crypto/ap_card.c
drivers/s390/crypto/ap_queue.c
drivers/s390/net/netiucv.c
drivers/sbus/char/jsflash.c
drivers/scsi/osd/osd_initiator.c
drivers/scsi/osst.c
drivers/scsi/qedi/qedi_fw.c
drivers/scsi/qedi/qedi_main.c
drivers/scsi/scsi_debug.c
drivers/scsi/scsi_error.c
drivers/scsi/scsi_lib.c
drivers/scsi/scsi_transport_sas.c
drivers/scsi/sg.c
drivers/scsi/st.c
drivers/staging/iio/cdc/ad7152.c
drivers/staging/rtl8188eu/os_dep/mon.c
drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
drivers/staging/rtl8723bs/os_dep/os_intfs.c
drivers/staging/rtl8723bs/os_dep/osdep_service.c
drivers/target/iscsi/iscsi_target.c
drivers/target/target_core_iblock.c
drivers/target/target_core_internal.h
drivers/target/target_core_pscsi.c
drivers/target/target_core_tmr.c
drivers/target/target_core_transport.c
drivers/thermal/int340x_thermal/int3400_thermal.c
drivers/usb/dwc3/dwc3-pci.c
drivers/usb/gadget/composite.c
drivers/usb/gadget/function/f_phonet.c
drivers/usb/gadget/legacy/inode.c
drivers/usb/gadget/udc/dummy_hcd.c
drivers/usb/gadget/udc/net2280.c
drivers/usb/host/xhci-mem.c
drivers/usb/host/xhci-pci.c
drivers/usb/misc/ucsi.c
drivers/usb/typec/typec_wcove.c
drivers/video/fbdev/core/fbmon.c
drivers/video/fbdev/smscufx.c
drivers/video/fbdev/udlfb.c
drivers/video/fbdev/via/viafbdev.c
drivers/virtio/virtio_balloon.c
drivers/xen/tmem.c
fs/afs/cmservice.c
fs/afs/internal.h
fs/afs/main.c
fs/aio.c
fs/autofs4/dev-ioctl.c
fs/block_dev.c
fs/btrfs/btrfs_inode.h
fs/btrfs/check-integrity.c
fs/btrfs/compression.c
fs/btrfs/compression.h
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/hash.c
fs/btrfs/inode.c
fs/btrfs/raid56.c
fs/btrfs/scrub.c
fs/btrfs/volumes.c
fs/buffer.c
fs/ceph/acl.c
fs/ceph/export.c
fs/ceph/inode.c
fs/ceph/mds_client.c
fs/cifs/file.c
fs/cifs/misc.c
fs/cifs/smb1ops.c
fs/cifs/smb2ops.c
fs/cifs/xattr.c
fs/configfs/item.c
fs/configfs/symlink.c
fs/crypto/bio.c
fs/dax.c
fs/dcache.c
fs/direct-io.c
fs/exec.c
fs/ext4/file.c
fs/ext4/page-io.c
fs/ext4/readpage.c
fs/ext4/super.c
fs/f2fs/data.c
fs/f2fs/f2fs.h
fs/f2fs/segment.c
fs/f2fs/super.c
fs/fcntl.c
fs/gfs2/incore.h
fs/gfs2/lops.c
fs/gfs2/meta_io.c
fs/gfs2/ops_fstype.c
fs/gfs2/sys.c
fs/hugetlbfs/inode.c
fs/inode.c
fs/iomap.c
fs/jfs/jfs_logmgr.c
fs/jfs/jfs_metapage.c
fs/mpage.c
fs/namespace.c
fs/nfs/blocklayout/blocklayout.c
fs/nfs/callback_xdr.c
fs/nfs/dir.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfsd/blocklayout.c
fs/nfsd/export.c
fs/nilfs2/segbuf.c
fs/ocfs2/cluster/heartbeat.c
fs/ocfs2/dlmglue.c
fs/ocfs2/super.c
fs/ocfs2/xattr.c
fs/open.c
fs/overlayfs/copy_up.c
fs/overlayfs/namei.c
fs/overlayfs/overlayfs.h
fs/proc/task_mmu.c
fs/read_write.c
fs/ufs/balloc.c
fs/ufs/inode.c
fs/ufs/super.c
fs/ufs/ufs_fs.h
fs/ufs/util.c
fs/ufs/util.h
fs/userfaultfd.c
fs/xfs/Makefile
fs/xfs/uuid.c [deleted file]
fs/xfs/uuid.h [deleted file]
fs/xfs/xfs_aops.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_file.c
fs/xfs/xfs_icache.c
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_linux.h
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_super.c
include/acpi/acpi_bus.h
include/acpi/actbl.h
include/dt-bindings/clock/sun50i-a64-ccu.h
include/dt-bindings/clock/sun8i-h3-ccu.h
include/linux/acpi.h
include/linux/bio.h
include/linux/blk-mq.h
include/linux/blk_types.h
include/linux/blkdev.h
include/linux/cleancache.h
include/linux/configfs.h
include/linux/device-mapper.h
include/linux/dmi.h
include/linux/elevator.h
include/linux/fs.h
include/linux/genhd.h
include/linux/hashtable.h
include/linux/ide.h
include/linux/iomap.h
include/linux/mm.h
include/linux/moduleparam.h
include/linux/netdevice.h
include/linux/nvme-fc.h
include/linux/nvme.h
include/linux/pci-acpi.h
include/linux/scatterlist.h
include/linux/slub_def.h
include/linux/timekeeper_internal.h
include/linux/uuid.h
include/media/cec-notifier.h
include/media/cec.h
include/net/wext.h
include/net/xfrm.h
include/scsi/osd_initiator.h
include/scsi/scsi_cmnd.h
include/scsi/scsi_request.h
include/uapi/linux/a.out.h
include/uapi/linux/aio_abi.h
include/uapi/linux/dm-ioctl.h
include/uapi/linux/ethtool.h
include/uapi/linux/fcntl.h
include/uapi/linux/fs.h
include/uapi/linux/loop.h
include/uapi/linux/nbd.h
include/uapi/linux/openvswitch.h
include/uapi/linux/uuid.h
kernel/bpf/verifier.c
kernel/events/ring_buffer.c
kernel/irq/manage.c
kernel/kexec_core.c
kernel/livepatch/patch.c
kernel/livepatch/transition.c
kernel/power/swap.c
kernel/sched/core.c
kernel/sched/cpufreq_schedutil.c
kernel/sched/fair.c
kernel/signal.c
kernel/sysctl_binary.c
kernel/time/alarmtimer.c
kernel/time/tick-broadcast.c
kernel/time/tick-internal.h
kernel/time/timekeeping.c
kernel/trace/blktrace.c
kernel/trace/ftrace.c
kernel/trace/trace.c
kernel/trace/trace_functions.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_stack.c
lib/cmdline.c
lib/libcrc32c.c
lib/scatterlist.c
lib/test_uuid.c
lib/uuid.c
lib/vsprintf.c
mm/cleancache.c
mm/filemap.c
mm/gup.c
mm/huge_memory.c
mm/khugepaged.c
mm/memory-failure.c
mm/memory.c
mm/mmap.c
mm/page_io.c
mm/shmem.c
mm/slub.c
mm/swap_cgroup.c
mm/vmalloc.c
mm/vmpressure.c
net/8021q/vlan.c
net/8021q/vlan_dev.c
net/batman-adv/distributed-arp-table.c
net/batman-adv/routing.c
net/batman-adv/soft-interface.c
net/bluetooth/6lowpan.c
net/bridge/br_device.c
net/caif/caif_socket.c
net/caif/cfpkt_skbuff.c
net/caif/chnl_net.c
net/can/af_can.c
net/core/dev.c
net/core/dev_ioctl.c
net/core/dst.c
net/core/fib_rules.c
net/core/rtnetlink.c
net/decnet/dn_route.c
net/decnet/netfilter/dn_rtmsg.c
net/hsr/hsr_device.c
net/hsr/hsr_forward.c
net/hsr/hsr_framereg.c
net/hsr/hsr_framereg.h
net/ieee802154/6lowpan/core.c
net/ipv4/icmp.c
net/ipv4/igmp.c
net/ipv4/ip_output.c
net/ipv4/ip_tunnel.c
net/ipv4/ipmr.c
net/ipv4/tcp.c
net/ipv6/addrconf.c
net/ipv6/datagram.c
net/ipv6/esp6_offload.c
net/ipv6/fib6_rules.c
net/ipv6/icmp.c
net/ipv6/ila/ila_xlat.c
net/ipv6/ip6_fib.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_output.c
net/ipv6/ip6_tunnel.c
net/ipv6/ip6_vti.c
net/ipv6/ip6mr.c
net/ipv6/proc.c
net/ipv6/route.c
net/ipv6/sit.c
net/ipv6/udp.c
net/ipv6/xfrm6_input.c
net/irda/irlan/irlan_eth.c
net/key/af_key.c
net/l2tp/l2tp_eth.c
net/mac80211/cfg.c
net/mac80211/ieee80211_i.h
net/mac80211/iface.c
net/mac80211/mlme.c
net/mac80211/rx.c
net/mac80211/wpa.c
net/mac802154/iface.c
net/openvswitch/vport-internal_dev.c
net/phonet/pep-gprs.c
net/rxrpc/key.c
net/sched/act_pedit.c
net/sched/act_police.c
net/sched/sch_api.c
net/sctp/endpointola.c
net/sctp/sctp_diag.c
net/sctp/socket.c
net/tipc/msg.c
net/unix/af_unix.c
net/wireless/wext-core.c
net/xfrm/Makefile
net/xfrm/xfrm_device.c
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_user.c
scripts/Makefile.headersinst
scripts/genksyms/genksyms.h
scripts/kconfig/Makefile
scripts/kconfig/nconf.c
scripts/kconfig/nconf.gui.c
scripts/tags.sh
security/integrity/evm/evm_crypto.c
security/integrity/ima/ima_policy.c
security/selinux/hooks.c
sound/core/pcm_lib.c
sound/firewire/amdtp-stream.c
sound/firewire/amdtp-stream.h
sound/pci/hda/hda_codec.h
sound/pci/hda/hda_controller.c
sound/pci/hda/hda_generic.c
sound/pci/hda/hda_intel.c
sound/soc/intel/skylake/skl-nhlt.c
tools/objtool/Build
tools/objtool/Documentation/stack-validation.txt
tools/objtool/Makefile
tools/objtool/arch.h
tools/objtool/arch/x86/decode.c
tools/objtool/builtin-check.c
tools/objtool/cfi.h [new file with mode: 0644]
tools/objtool/check.c [new file with mode: 0644]
tools/objtool/check.h [new file with mode: 0644]
tools/objtool/elf.c
tools/objtool/elf.h
tools/objtool/special.c
tools/objtool/warn.h
tools/perf/Makefile.config
tools/perf/Makefile.perf
tools/perf/arch/Build
tools/perf/pmu-events/Build
tools/perf/tests/Build
tools/perf/tests/task-exit.c
tools/perf/util/evsel.c
tools/perf/util/header.c
tools/perf/util/machine.c
tools/perf/util/probe-event.c
tools/perf/util/unwind-libdw.c
tools/testing/nvdimm/test/iomap.c
tools/testing/nvdimm/test/nfit.c
tools/testing/nvdimm/test/nfit_test.h
tools/testing/selftests/bpf/bpf_endian.h
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/ntb/ntb_test.sh

index 05f26c8..f59aad5 100644 (file)
                        expediting.  Set to zero to disable automatic
                        expediting.
 
+       stack_guard_gap=        [MM]
+                       override the default stack gap protection. The value
+                       is in page units and it defines how many pages prior
+                       to (for stacks growing down) resp. after (for stacks
+                       growing up) the main stack are reserved for no other
+                       mapping. Default value is 256 pages.
+
        stacktrace      [FTRACE]
                        Enabled the stack tracer on boot up.
 
index 01ddeaf..9490f28 100644 (file)
@@ -632,7 +632,7 @@ to i/o submission, if the bio fields are likely to be accessed after the
 i/o is issued (since the bio may otherwise get freed in case i/o completion
 happens in the meantime).
 
-The bio_clone() routine may be used to duplicate a bio, where the clone
+The bio_clone_fast() routine may be used to duplicate a bio, where the clone
 shares the bio_vec_list with the original bio (i.e. both point to the
 same bio_vec_list). This would typically be used for splitting i/o requests
 in lvm or md.
index e9c5a1d..f465647 100644 (file)
@@ -22,7 +22,8 @@ Required properties :
 - #clock-cells : must contain 1
 - #reset-cells : must contain 1
 
-For the PRCM CCUs on H3/A64, one more clock is needed:
+For the PRCM CCUs on H3/A64, two more clocks are needed:
+- "pll-periph": the SoC's peripheral PLL from the main CCU
 - "iosc": the SoC's internal frequency oscillator
 
 Example for generic CCU:
@@ -39,8 +40,8 @@ Example for PRCM CCU:
 r_ccu: clock@01f01400 {
        compatible = "allwinner,sun50i-a64-r-ccu";
        reg = <0x01f01400 0x100>;
-       clocks = <&osc24M>, <&osc32k>, <&iosc>;
-       clock-names = "hosc", "losc", "iosc";
+       clocks = <&osc24M>, <&osc32k>, <&iosc>, <&ccu CLK_PLL_PERIPH0>;
+       clock-names = "hosc", "losc", "iosc", "pll-periph";
        #clock-cells = <1>;
        #reset-cells = <1>;
 };
index 42c3bb2..01e331a 100644 (file)
@@ -41,9 +41,9 @@ Required properties:
 Optional properties:
 
 In order to use the GPIO lines in PWM mode, some additional optional
-properties are required. Only Armada 370 and XP support these properties.
+properties are required.
 
-- compatible: Must contain "marvell,armada-370-xp-gpio"
+- compatible: Must contain "marvell,armada-370-gpio"
 
 - reg: an additional register set is needed, for the GPIO Blink
   Counter on/off registers.
@@ -71,7 +71,7 @@ Example:
                };
 
                gpio1: gpio@18140 {
-                       compatible = "marvell,armada-370-xp-gpio";
+                       compatible = "marvell,armada-370-gpio";
                        reg = <0x18140 0x40>, <0x181c8 0x08>;
                        reg-names = "gpio", "pwm";
                        ngpios = <17>;
index bbd083f..1db6e00 100644 (file)
@@ -31,7 +31,7 @@ Example:
                compatible = "st,stm32-timers";
                reg = <0x40010000 0x400>;
                clocks = <&rcc 0 160>;
-               clock-names = "clk_int";
+               clock-names = "int";
 
                pwm {
                        compatible = "st,stm32-pwm";
index d6c6e41..8ec2ca2 100644 (file)
@@ -34,7 +34,7 @@ Required properties:
       "brcm,bcm6328-switch"
       "brcm,bcm6368-switch" and the mandatory "brcm,bcm63xx-switch"
 
-See Documentation/devicetree/bindings/dsa/dsa.txt for a list of additional
+See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional
 required and optional properties.
 
 Examples:
index 16c3a95..acfafc8 100644 (file)
@@ -27,6 +27,7 @@ Optional properties:
   of the device. On many systems this is wired high so the device goes
   out of reset at power-on, but if it is under program control, this
   optional GPIO can wake up in response to it.
+- vdd33a-supply, vddvario-supply : 3.3V analog and IO logic power supplies
 
 Examples:
 
index 59f4db2..f55639d 100644 (file)
@@ -122,7 +122,7 @@ associated flow of the packet. The hash is either provided by hardware
 or will be computed in the stack. Capable hardware can pass the hash in
 the receive descriptor for the packet; this would usually be the same
 hash used for RSS (e.g. computed Toeplitz hash). The hash is saved in
-skb->rx_hash and can be used elsewhere in the stack as a hash of the
+skb->hash and can be used elsewhere in the stack as a hash of the
 packet’s flow.
 
 Each receive hardware queue has an associated list of CPUs to which
index 09b5ab6..503f80a 100644 (file)
@@ -2964,7 +2964,7 @@ F:        sound/pci/oxygen/
 
 C6X ARCHITECTURE
 M:     Mark Salter <msalter@redhat.com>
-M:     Aurelien Jacquiot <a-jacquiot@ti.com>
+M:     Aurelien Jacquiot <jacquiot.aurelien@gmail.com>
 L:     linux-c6x-dev@linux-c6x.org
 W:     http://www.linux-c6x.org/wiki/index.php/Main_Page
 S:     Maintained
@@ -13462,6 +13462,17 @@ W:     http://en.wikipedia.org/wiki/Util-linux
 T:     git git://git.kernel.org/pub/scm/utils/util-linux/util-linux.git
 S:     Maintained
 
+UUID HELPERS
+M:     Christoph Hellwig <hch@lst.de>
+R:     Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+L:     linux-kernel@vger.kernel.org
+T:     git git://git.infradead.org/users/hch/uuid.git
+F:     lib/uuid.c
+F:     lib/test_uuid.c
+F:     include/linux/uuid.h
+F:     include/uapi/linux/uuid.h
+S:     Maintained
+
 UVESAFB DRIVER
 M:     Michal Januszewski <spock@gentoo.org>
 L:     linux-fbdev@vger.kernel.org
index 83f6d99..283c623 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 12
 SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION =
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
@@ -1437,7 +1437,7 @@ help:
        @echo  '  make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build'
        @echo  '  make V=2   [targets] 2 => give reason for rebuild of target'
        @echo  '  make O=dir [targets] Locate all output files in "dir", including .config'
-       @echo  '  make C=1   [targets] Check all c source with $$CHECK (sparse by default)'
+       @echo  '  make C=1   [targets] Check re-compiled c source with $$CHECK (sparse by default)'
        @echo  '  make C=2   [targets] Force check of all c source with $$CHECK'
        @echo  '  make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
        @echo  '  make W=n   [targets] Enable extra gcc checks, n=1,2,3 where'
index 6e1242d..4104a08 100644 (file)
@@ -86,8 +86,6 @@ struct task_struct;
 #define TSK_K_BLINK(tsk)       TSK_K_REG(tsk, 4)
 #define TSK_K_FP(tsk)          TSK_K_REG(tsk, 0)
 
-#define thread_saved_pc(tsk)   TSK_K_BLINK(tsk)
-
 extern void start_thread(struct pt_regs * regs, unsigned long pc,
                         unsigned long usp);
 
index 3e25e8d..2e13683 100644 (file)
@@ -65,7 +65,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
                vma = find_vma(mm, addr);
                if (TASK_SIZE - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
 
index 4c1a35f..c0fcab6 100644 (file)
@@ -1416,6 +1416,7 @@ choice
        config VMSPLIT_3G
                bool "3G/1G user/kernel split"
        config VMSPLIT_3G_OPT
+               depends on !ARM_LPAE
                bool "3G/1G user/kernel split (for full 1G low memory)"
        config VMSPLIT_2G
                bool "2G/2G user/kernel split"
index 3f7d1b7..a17ca8d 100644 (file)
@@ -17,7 +17,8 @@
                @ there.
                .inst   'M' | ('Z' << 8) | (0x1310 << 16)   @ tstne r0, #0x4d000
 #else
-               W(mov)  r0, r0
+ AR_CLASS(     mov     r0, r0          )
+  M_CLASS(     nop.w                   )
 #endif
                .endm
 
index c5d2589..fc864a8 100644 (file)
 
        mmc1_pins: pinmux_mmc1_pins {
                pinctrl-single,pins = <
-                       AM33XX_IOPAD(0x960, PIN_INPUT | MUX_MODE7)              /* spi0_cs1.gpio0_6 */
+                       AM33XX_IOPAD(0x96c, PIN_INPUT | MUX_MODE7)              /* uart0_rtsn.gpio1_9 */
                >;
        };
 
                        AM33XX_IOPAD(0x834, PIN_INPUT_PULLUP | MUX_MODE7)       /* nKbdReset - gpmc_ad13.gpio1_13 */
                        AM33XX_IOPAD(0x838, PIN_INPUT_PULLUP | MUX_MODE7)       /* nDispReset - gpmc_ad14.gpio1_14 */
                        AM33XX_IOPAD(0x844, PIN_INPUT_PULLUP | MUX_MODE7)       /* USB1_enPower - gpmc_a1.gpio1_17 */
-                       /* AVR Programming - SPI Bus (bit bang) - Screen and Keyboard */
-                       AM33XX_IOPAD(0x954, PIN_INPUT_PULLUP | MUX_MODE7)       /* Kbd/Disp/BattMOSI spi0_d0.gpio0_3 */
-                       AM33XX_IOPAD(0x958, PIN_INPUT_PULLUP | MUX_MODE7)       /* Kbd/Disp/BattMISO spi0_d1.gpio0_4 */
-                       AM33XX_IOPAD(0x950, PIN_INPUT_PULLUP | MUX_MODE7)       /* Kbd/Disp/BattSCLK spi0_clk.gpio0_2 */
                        /* PDI Bus - Battery system */
                        AM33XX_IOPAD(0x840, PIN_INPUT_PULLUP | MUX_MODE7)       /* nBattReset  gpmc_a0.gpio1_16 */
                        AM33XX_IOPAD(0x83c, PIN_INPUT_PULLUP | MUX_MODE7)       /* BattPDIData gpmc_ad15.gpio1_15 */
        pinctrl-names = "default";
        pinctrl-0 = <&mmc1_pins>;
        bus-width = <4>;
-       cd-gpios = <&gpio0 6 GPIO_ACTIVE_LOW>;
+       cd-gpios = <&gpio1 9 GPIO_ACTIVE_LOW>;
        vmmc-supply = <&vmmcsd_fixed>;
 };
 
index 1aeeacb..d4f600d 100644 (file)
                };
 
                r_ccu: clock@1f01400 {
-                       compatible = "allwinner,sun50i-a64-r-ccu";
+                       compatible = "allwinner,sun8i-h3-r-ccu";
                        reg = <0x01f01400 0x100>;
-                       clocks = <&osc24M>, <&osc32k>, <&iosc>;
-                       clock-names = "hosc", "losc", "iosc";
+                       clocks = <&osc24M>, <&osc32k>, <&iosc>,
+                                <&ccu 9>;
+                       clock-names = "hosc", "losc", "iosc", "pll-periph";
                        #clock-cells = <1>;
                        #reset-cells = <1>;
                };
index 32e1a95..4e80bf7 100644 (file)
@@ -315,7 +315,7 @@ static void __init cacheid_init(void)
        if (arch >= CPU_ARCH_ARMv6) {
                unsigned int cachetype = read_cpuid_cachetype();
 
-               if ((arch == CPU_ARCH_ARMv7M) && !cachetype) {
+               if ((arch == CPU_ARCH_ARMv7M) && !(cachetype & 0xf000f)) {
                        cacheid = 0;
                } else if ((cachetype & (7 << 29)) == 4 << 29) {
                        /* ARMv7 register format */
index 2239fde..f0701d8 100644 (file)
@@ -90,7 +90,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
                vma = find_vma(mm, addr);
                if (TASK_SIZE - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
 
@@ -141,7 +141,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                        addr = PAGE_ALIGN(addr);
                vma = find_vma(mm, addr);
                if (TASK_SIZE - len >= addr &&
-                               (!vma || addr + len <= vma->vm_start))
+                               (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
 
index 31af3cb..e46a6a4 100644 (file)
@@ -1218,15 +1218,15 @@ void __init adjust_lowmem_bounds(void)
 
        high_memory = __va(arm_lowmem_limit - 1) + 1;
 
+       if (!memblock_limit)
+               memblock_limit = arm_lowmem_limit;
+
        /*
         * Round the memblock limit down to a pmd size.  This
         * helps to ensure that we will allocate memory from the
         * last full pmd, which should be mapped.
         */
-       if (memblock_limit)
-               memblock_limit = round_down(memblock_limit, PMD_SIZE);
-       if (!memblock_limit)
-               memblock_limit = arm_lowmem_limit;
+       memblock_limit = round_down(memblock_limit, PMD_SIZE);
 
        if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) {
                if (memblock_end_of_DRAM() > arm_lowmem_limit) {
index c7f669f..166c9ef 100644 (file)
                r_ccu: clock@1f01400 {
                        compatible = "allwinner,sun50i-a64-r-ccu";
                        reg = <0x01f01400 0x100>;
-                       clocks = <&osc24M>, <&osc32k>, <&iosc>;
-                       clock-names = "hosc", "losc", "iosc";
+                       clocks = <&osc24M>, <&osc32k>, <&iosc>,
+                                <&ccu 11>;
+                       clock-names = "hosc", "losc", "iosc", "pll-periph";
                        #clock-cells = <1>;
                        #reset-cells = <1>;
                };
index 4d314a2..732e2e0 100644 (file)
@@ -40,7 +40,7 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include "sunxi-h3-h5.dtsi"
+#include <arm/sunxi-h3-h5.dtsi>
 
 / {
        cpus {
diff --git a/arch/arm64/boot/dts/allwinner/sunxi-h3-h5.dtsi b/arch/arm64/boot/dts/allwinner/sunxi-h3-h5.dtsi
deleted file mode 120000 (symlink)
index 036f01d..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../../../../arm/boot/dts/sunxi-h3-h5.dtsi
\ No newline at end of file
index 41b6e31..d0cb007 100644 (file)
@@ -221,10 +221,11 @@ void update_vsyscall(struct timekeeper *tk)
                /* tkr_mono.cycle_last == tkr_raw.cycle_last */
                vdso_data->cs_cycle_last        = tk->tkr_mono.cycle_last;
                vdso_data->raw_time_sec         = tk->raw_time.tv_sec;
-               vdso_data->raw_time_nsec        = tk->raw_time.tv_nsec;
+               vdso_data->raw_time_nsec        = (tk->raw_time.tv_nsec <<
+                                                  tk->tkr_raw.shift) +
+                                                 tk->tkr_raw.xtime_nsec;
                vdso_data->xtime_clock_sec      = tk->xtime_sec;
                vdso_data->xtime_clock_nsec     = tk->tkr_mono.xtime_nsec;
-               /* tkr_raw.xtime_nsec == 0 */
                vdso_data->cs_mono_mult         = tk->tkr_mono.mult;
                vdso_data->cs_raw_mult          = tk->tkr_raw.mult;
                /* tkr_mono.shift == tkr_raw.shift */
index e00b467..76320e9 100644 (file)
@@ -256,7 +256,6 @@ monotonic_raw:
        seqcnt_check fail=monotonic_raw
 
        /* All computations are done with left-shifted nsecs. */
-       lsl     x14, x14, x12
        get_nsec_per_sec res=x9
        lsl     x9, x9, x12
 
index 71f9305..c870d6f 100644 (file)
@@ -36,6 +36,7 @@ int bpf_jit_enable __read_mostly;
 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
 #define TCALL_CNT (MAX_BPF_JIT_REG + 2)
+#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
 
 /* Map BPF registers to A64 registers */
 static const int bpf2a64[] = {
@@ -57,6 +58,7 @@ static const int bpf2a64[] = {
        /* temporary registers for internal BPF JIT */
        [TMP_REG_1] = A64_R(10),
        [TMP_REG_2] = A64_R(11),
+       [TMP_REG_3] = A64_R(12),
        /* tail_call_cnt */
        [TCALL_CNT] = A64_R(26),
        /* temporary register for blinding constants */
@@ -319,6 +321,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
        const u8 src = bpf2a64[insn->src_reg];
        const u8 tmp = bpf2a64[TMP_REG_1];
        const u8 tmp2 = bpf2a64[TMP_REG_2];
+       const u8 tmp3 = bpf2a64[TMP_REG_3];
        const s16 off = insn->off;
        const s32 imm = insn->imm;
        const int i = insn - ctx->prog->insnsi;
@@ -689,10 +692,10 @@ emit_cond_jmp:
                emit(A64_PRFM(tmp, PST, L1, STRM), ctx);
                emit(A64_LDXR(isdw, tmp2, tmp), ctx);
                emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
-               emit(A64_STXR(isdw, tmp2, tmp, tmp2), ctx);
+               emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx);
                jmp_offset = -3;
                check_imm19(jmp_offset);
-               emit(A64_CBNZ(0, tmp2, jmp_offset), ctx);
+               emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
                break;
 
        /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
index 85d4af9..dbdbb8a 100644 (file)
@@ -75,11 +75,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/*
- * Return saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk)   (tsk->thread.pc)
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define        KSTK_EIP(tsk)                                                   \
index b9eb3da..7c87b5b 100644 (file)
@@ -96,11 +96,6 @@ static inline void release_thread(struct task_struct *dead_task)
 #define release_segments(mm)           do { } while (0)
 
 /*
- * saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc)
-
-/*
  * saved kernel SP and DP of a blocked thread.
  */
 #ifdef _BIG_ENDIAN
index e299d30..a2cdb15 100644 (file)
@@ -69,14 +69,6 @@ void hard_reset_now (void)
        while(1) /* waiting for RETRIBUTION! */ ;
 }
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-       return task_pt_regs(t)->irp;
-}
-
 /* setup the child's kernel stack with a pt_regs and switch_stack on it.
  * it will be un-nested during _resume and _ret_from_sys_call when the
  * new thread is scheduled.
index c530a8f..fe87b38 100644 (file)
@@ -85,14 +85,6 @@ hard_reset_now(void)
 }
 
 /*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-       return task_pt_regs(t)->erp;
-}
-
-/*
  * Setup the child's kernel stack with a pt_regs and call switch_stack() on it.
  * It will be unnested during _resume and _ret_from_sys_call when the new thread
  * is scheduled.
index 15b815d..bc2729e 100644 (file)
@@ -52,8 +52,6 @@ unsigned long get_wchan(struct task_struct *p);
 
 #define KSTK_ESP(tsk)   ((tsk) == current ? rdusp() : (tsk)->thread.usp)
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 /* Free all resources held by a thread. */
 static inline void release_thread(struct task_struct *dead_task)
 {
index ddaeb9c..e4d08d7 100644 (file)
@@ -96,11 +96,6 @@ extern asmlinkage void *restore_user_regs(const struct user_context *target, ...
 #define release_segments(mm)           do { } while (0)
 #define forget_segments()              do { } while (0)
 
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define        KSTK_EIP(tsk)   ((tsk)->thread.frame0->pc)
index 5a4c92a..a957b37 100644 (file)
@@ -198,15 +198,6 @@ unsigned long get_wchan(struct task_struct *p)
        return 0;
 }
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       /* Check whether the thread is blocked in resume() */
-       if (in_sched_functions(tsk->thread.pc))
-               return ((unsigned long *)tsk->thread.fp)[2];
-       else
-               return tsk->thread.pc;
-}
-
 int elf_check_arch(const struct elf32_hdr *hdr)
 {
        unsigned long hsr0 = __get_HSR(0);
index da82c25..46aa289 100644 (file)
@@ -75,7 +75,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
                addr = PAGE_ALIGN(addr);
                vma = find_vma(current->mm, addr);
                if (TASK_SIZE - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        goto success;
        }
 
index 65132d7..afa5314 100644 (file)
@@ -110,10 +110,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk);
 unsigned long get_wchan(struct task_struct *p);
 
 #define        KSTK_EIP(tsk)   \
index 0f5db5b..d1ddcab 100644 (file)
@@ -129,11 +129,6 @@ int copy_thread(unsigned long clone_flags,
        return 0;
 }
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       return ((struct pt_regs *)tsk->thread.esp0)->pc;
-}
-
 unsigned long get_wchan(struct task_struct *p)
 {
        unsigned long fp, pc;
index 45a8254..ce67940 100644 (file)
@@ -33,9 +33,6 @@
 /*  task_struct, defined elsewhere, is the "process descriptor" */
 struct task_struct;
 
-/*  this is defined in arch/process.c  */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
 
 /*
index de715ba..656050c 100644 (file)
@@ -61,14 +61,6 @@ void arch_cpu_idle(void)
 }
 
 /*
- *  Return saved PC of a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       return 0;
-}
-
-/*
  * Copy architecture-specific thread state
  */
 int copy_thread(unsigned long clone_flags, unsigned long usp,
index 26a63d6..ab982f0 100644 (file)
@@ -602,23 +602,6 @@ ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat)
 }
 
 /*
- * Return saved PC of a blocked thread.
- * Note that the only way T can block is through a call to schedule() -> switch_to().
- */
-static inline unsigned long
-thread_saved_pc (struct task_struct *t)
-{
-       struct unw_frame_info info;
-       unsigned long ip;
-
-       unw_init_from_blocked_task(&info, t);
-       if (unw_unwind(&info) < 0)
-               return 0;
-       unw_get_ip(&info, &ip);
-       return ip;
-}
-
-/*
  * Get the current instruction/program counter value.
  */
 #define current_text_addr() \
index 5767367..657874e 100644 (file)
@@ -122,8 +122,6 @@ extern void release_thread(struct task_struct *);
 extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
 extern void release_segments(struct mm_struct * mm);
 
-extern unsigned long thread_saved_pc(struct task_struct *);
-
 /* Copy and release all segment info associated with a VM */
 #define copy_segments(p, mm)  do { } while (0)
 #define release_segments(mm)  do { } while (0)
index d8ffcfe..8cd7e03 100644 (file)
 
 #include <linux/err.h>
 
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       return tsk->thread.lr;
-}
-
 void (*pm_power_off)(void) = NULL;
 EXPORT_SYMBOL(pm_power_off);
 
index 77239e8..94c3603 100644 (file)
@@ -130,8 +130,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define        KSTK_EIP(tsk)   \
index e475c94..7df92f8 100644 (file)
 asmlinkage void ret_from_fork(void);
 asmlinkage void ret_from_kernel_thread(void);
 
-
-/*
- * Return saved PC from a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       struct switch_stack *sw = (struct switch_stack *)tsk->thread.ksp;
-       /* Check whether the thread is blocked in resume() */
-       if (in_sched_functions(sw->retpc))
-               return ((unsigned long *)sw->a6)[1];
-       else
-               return sw->retpc;
-}
-
 void arch_cpu_idle(void)
 {
 #if defined(MACH_ATARI_ONLY)
index 37ef196..330d556 100644 (file)
@@ -69,8 +69,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 # define KSTK_EIP(tsk) (0)
@@ -121,10 +119,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/* Return saved (kernel) PC of a blocked thread.  */
-#  define thread_saved_pc(tsk) \
-       ((tsk)->thread.regs ? (tsk)->thread.regs->r15 : 0)
-
 unsigned long get_wchan(struct task_struct *p);
 
 /* The size allocated for kernel stacks. This _must_ be a power of two! */
index e92a817..6527ec2 100644 (file)
@@ -119,23 +119,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
        return 0;
 }
 
-#ifndef CONFIG_MMU
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       struct cpu_context *ctx =
-               &(((struct thread_info *)(tsk->stack))->cpu_context);
-
-       /* Check whether the thread is blocked in resume() */
-       if (in_sched_functions(ctx->r15))
-               return (unsigned long)ctx->r15;
-       else
-               return ctx->r14;
-}
-#endif
-
 unsigned long get_wchan(struct task_struct *p)
 {
 /* TBD (used by procfs) */
index 2728a9a..145b5ce 100644 (file)
@@ -128,19 +128,19 @@ quiet_cmd_cpp_its_S = ITS     $@
                        -DADDR_BITS=$(ADDR_BITS) \
                        -DADDR_CELLS=$(itb_addr_cells)
 
-$(obj)/vmlinux.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
        $(call if_changed_dep,cpp_its_S,none,vmlinux.bin)
 
-$(obj)/vmlinux.gz.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.gz.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
        $(call if_changed_dep,cpp_its_S,gzip,vmlinux.bin.gz)
 
-$(obj)/vmlinux.bz2.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.bz2.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX)  FORCE
        $(call if_changed_dep,cpp_its_S,bzip2,vmlinux.bin.bz2)
 
-$(obj)/vmlinux.lzma.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.lzma.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
        $(call if_changed_dep,cpp_its_S,lzma,vmlinux.bin.lzma)
 
-$(obj)/vmlinux.lzo.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+$(obj)/vmlinux.lzo.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
        $(call if_changed_dep,cpp_its_S,lzo,vmlinux.bin.lzo)
 
 quiet_cmd_itb-image = ITB     $@
index d34536e..279b6d1 100644 (file)
@@ -35,7 +35,12 @@ extern pte_t *pkmap_page_table;
  * easily, subsequent pte tables have to be allocated in one physical
  * chunk of RAM.
  */
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+#define LAST_PKMAP 512
+#else
 #define LAST_PKMAP 1024
+#endif
+
 #define LAST_PKMAP_MASK (LAST_PKMAP-1)
 #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT)
 #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT))
index 291846d..ad1a999 100644 (file)
@@ -43,7 +43,8 @@ typedef union mips_instruction kprobe_opcode_t;
 
 #define flush_insn_slot(p)                                             \
 do {                                                                   \
-       flush_icache_range((unsigned long)p->addr,                      \
+       if (p->addr)                                                    \
+               flush_icache_range((unsigned long)p->addr,              \
                           (unsigned long)p->addr +                     \
                           (MAX_INSN_SIZE * sizeof(kprobe_opcode_t)));  \
 } while (0)
index 6f94bed..74afe8c 100644 (file)
 #define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
+#ifdef CONFIG_HIGHMEM
+#include <asm/highmem.h>
+#endif
+
 extern int temp_tlb_entry;
 
 /*
@@ -62,7 +66,8 @@ extern int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1,
 
 #define VMALLOC_START    MAP_BASE
 
-#define PKMAP_BASE             (0xfe000000UL)
+#define PKMAP_END      ((FIXADDR_START) & ~((LAST_PKMAP << PAGE_SHIFT)-1))
+#define PKMAP_BASE     (PKMAP_END - PAGE_SIZE * LAST_PKMAP)
 
 #ifdef CONFIG_HIGHMEM
 # define VMALLOC_END   (PKMAP_BASE-2*PAGE_SIZE)
index b11facd..f702a45 100644 (file)
@@ -804,8 +804,10 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
                        break;
                }
                /* Compact branch: BNEZC || JIALC */
-               if (insn.i_format.rs)
+               if (!insn.i_format.rs) {
+                       /* JIALC: set $31/ra */
                        regs->regs[31] = epc + 4;
+               }
                regs->cp0_epc += 8;
                break;
 #endif
index 8d83fc2..38a3029 100644 (file)
@@ -11,6 +11,7 @@
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/compiler.h>
+#include <asm/irqflags.h>
 #include <asm/regdef.h>
 #include <asm/mipsregs.h>
 #include <asm/stackframe.h>
@@ -119,6 +120,7 @@ work_pending:
        andi    t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS
        beqz    t0, work_notifysig
 work_resched:
+       TRACE_IRQS_OFF
        jal     schedule
 
        local_irq_disable               # make sure need_resched and
@@ -155,6 +157,7 @@ syscall_exit_work:
        beqz    t0, work_pending        # trace bit set?
        local_irq_enable                # could let syscall_trace_leave()
                                        # call schedule() instead
+       TRACE_IRQS_ON
        move    a0, sp
        jal     syscall_trace_leave
        b       resume_userspace
index 30a3b75..9d9b8fb 100644 (file)
@@ -38,20 +38,6 @@ void arch_ftrace_update_code(int command)
 
 #endif
 
-/*
- * Check if the address is in kernel space
- *
- * Clone core_kernel_text() from kernel/extable.c, but doesn't call
- * init_kernel_text() for Ftrace doesn't trace functions in init sections.
- */
-static inline int in_kernel_space(unsigned long ip)
-{
-       if (ip >= (unsigned long)_stext &&
-           ip <= (unsigned long)_etext)
-               return 1;
-       return 0;
-}
-
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 #define JAL 0x0c000000         /* jump & link: ip --> ra, jump to target */
@@ -198,7 +184,7 @@ int ftrace_make_nop(struct module *mod,
         * If ip is in kernel space, no long call, otherwise, long call is
         * needed.
         */
-       new = in_kernel_space(ip) ? INSN_NOP : INSN_B_1F;
+       new = core_kernel_text(ip) ? INSN_NOP : INSN_B_1F;
 #ifdef CONFIG_64BIT
        return ftrace_modify_code(ip, new);
 #else
@@ -218,12 +204,12 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
        unsigned int new;
        unsigned long ip = rec->ip;
 
-       new = in_kernel_space(ip) ? insn_jal_ftrace_caller : insn_la_mcount[0];
+       new = core_kernel_text(ip) ? insn_jal_ftrace_caller : insn_la_mcount[0];
 
 #ifdef CONFIG_64BIT
        return ftrace_modify_code(ip, new);
 #else
-       return ftrace_modify_code_2r(ip, new, in_kernel_space(ip) ?
+       return ftrace_modify_code_2r(ip, new, core_kernel_text(ip) ?
                                                INSN_NOP : insn_la_mcount[1]);
 #endif
 }
@@ -289,7 +275,7 @@ unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long
         * instruction "lui v1, hi_16bit_of_mcount"(offset is 24), but for
         * kernel, move after the instruction "move ra, at"(offset is 16)
         */
-       ip = self_ra - (in_kernel_space(self_ra) ? 16 : 24);
+       ip = self_ra - (core_kernel_text(self_ra) ? 16 : 24);
 
        /*
         * search the text until finding the non-store instruction or "s{d,w}
@@ -394,7 +380,7 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
         * entries configured through the tracing/set_graph_function interface.
         */
 
-       insns = in_kernel_space(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
+       insns = core_kernel_text(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
        trace.func = self_ra - (MCOUNT_INSN_SIZE * insns);
 
        /* Only trace if the calling function expects to */
index cf05220..d1bb506 100644 (file)
@@ -106,8 +106,8 @@ NESTED(kernel_entry, 16, sp)                        # kernel entry point
        beq             t0, t1, dtb_found
 #endif
        li              t1, -2
-       beq             a0, t1, dtb_found
        move            t2, a1
+       beq             a0, t1, dtb_found
 
        li              t2, 0
 dtb_found:
index 313a88b..f3e301f 100644 (file)
@@ -1597,7 +1597,6 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
                break;
        case CPU_P5600:
        case CPU_P6600:
-       case CPU_I6400:
                /* 8-bit event numbers */
                raw_id = config & 0x1ff;
                base_id = raw_id & 0xff;
@@ -1610,6 +1609,11 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
                raw_event.range = P;
 #endif
                break;
+       case CPU_I6400:
+               /* 8-bit event numbers */
+               base_id = config & 0xff;
+               raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD;
+               break;
        case CPU_1004K:
                if (IS_BOTH_COUNTERS_1004K_EVENT(base_id))
                        raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD;
index 5f928c3..d994160 100644 (file)
@@ -56,7 +56,6 @@ DECLARE_BITMAP(state_support, CPS_PM_STATE_COUNT);
  * state. Actually per-core rather than per-CPU.
  */
 static DEFINE_PER_CPU_ALIGNED(u32*, ready_count);
-static DEFINE_PER_CPU_ALIGNED(void*, ready_count_alloc);
 
 /* Indicates online CPUs coupled with the current CPU */
 static DEFINE_PER_CPU_ALIGNED(cpumask_t, online_coupled);
@@ -642,7 +641,6 @@ static int cps_pm_online_cpu(unsigned int cpu)
 {
        enum cps_pm_state state;
        unsigned core = cpu_data[cpu].core;
-       unsigned dlinesz = cpu_data[cpu].dcache.linesz;
        void *entry_fn, *core_rc;
 
        for (state = CPS_PM_NC_WAIT; state < CPS_PM_STATE_COUNT; state++) {
@@ -662,16 +660,11 @@ static int cps_pm_online_cpu(unsigned int cpu)
        }
 
        if (!per_cpu(ready_count, core)) {
-               core_rc = kmalloc(dlinesz * 2, GFP_KERNEL);
+               core_rc = kmalloc(sizeof(u32), GFP_KERNEL);
                if (!core_rc) {
                        pr_err("Failed allocate core %u ready_count\n", core);
                        return -ENOMEM;
                }
-               per_cpu(ready_count_alloc, core) = core_rc;
-
-               /* Ensure ready_count is aligned to a cacheline boundary */
-               core_rc += dlinesz - 1;
-               core_rc = (void *)((unsigned long)core_rc & ~(dlinesz - 1));
                per_cpu(ready_count, core) = core_rc;
        }
 
index 9681b58..38dfa27 100644 (file)
@@ -201,6 +201,8 @@ void show_stack(struct task_struct *task, unsigned long *sp)
 {
        struct pt_regs regs;
        mm_segment_t old_fs = get_fs();
+
+       regs.cp0_status = KSU_KERNEL;
        if (sp) {
                regs.regs[29] = (unsigned long)sp;
                regs.regs[31] = 0;
index 7c6336d..7cd9216 100644 (file)
@@ -166,7 +166,11 @@ static int _kvm_mips_host_tlb_inv(unsigned long entryhi)
 int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
                          bool user, bool kernel)
 {
-       int idx_user, idx_kernel;
+       /*
+        * Initialize idx_user and idx_kernel to workaround bogus
+        * maybe-initialized warning when using GCC 6.
+        */
+       int idx_user = 0, idx_kernel = 0;
        unsigned long flags, old_entryhi;
 
        local_irq_save(flags);
index 4a2d03c..caa62f2 100644 (file)
@@ -54,7 +54,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
                return ieee754dp_nanxcpt(z);
        case IEEE754_CLASS_DNORM:
                DPDNORMZ;
-       /* QNAN is handled separately below */
+       /* QNAN and ZERO cases are handled separately below */
        }
 
        switch (CLPAIR(xc, yc)) {
@@ -210,6 +210,9 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
        }
        assert(rm & (DP_HIDDEN_BIT << 3));
 
+       if (zc == IEEE754_CLASS_ZERO)
+               return ieee754dp_format(rs, re, rm);
+
        /* And now the addition */
        assert(zm & DP_HIDDEN_BIT);
 
index a8cd8b4..c91d5e5 100644 (file)
@@ -54,7 +54,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
                return ieee754sp_nanxcpt(z);
        case IEEE754_CLASS_DNORM:
                SPDNORMZ;
-       /* QNAN is handled separately below */
+       /* QNAN and ZERO cases are handled separately below */
        }
 
        switch (CLPAIR(xc, yc)) {
@@ -203,6 +203,9 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
        }
        assert(rm & (SP_HIDDEN_BIT << 3));
 
+       if (zc == IEEE754_CLASS_ZERO)
+               return ieee754sp_format(rs, re, rm);
+
        /* And now the addition */
 
        assert(zm & SP_HIDDEN_BIT);
index fe8df14..e08598c 100644 (file)
@@ -68,12 +68,25 @@ static inline struct page *dma_addr_to_page(struct device *dev,
  * systems and only the R10000 and R12000 are used in such systems, the
  * SGI IP28 Indigo² rsp. SGI IP32 aka O2.
  */
-static inline int cpu_needs_post_dma_flush(struct device *dev)
+static inline bool cpu_needs_post_dma_flush(struct device *dev)
 {
-       return !plat_device_is_coherent(dev) &&
-              (boot_cpu_type() == CPU_R10000 ||
-               boot_cpu_type() == CPU_R12000 ||
-               boot_cpu_type() == CPU_BMIPS5000);
+       if (plat_device_is_coherent(dev))
+               return false;
+
+       switch (boot_cpu_type()) {
+       case CPU_R10000:
+       case CPU_R12000:
+       case CPU_BMIPS5000:
+               return true;
+
+       default:
+               /*
+                * Presence of MAARs suggests that the CPU supports
+                * speculatively prefetching data, and therefore requires
+                * the post-DMA flush/invalidate.
+                */
+               return cpu_has_maar;
+       }
 }
 
 static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp)
index 64dd8bd..28adeab 100644 (file)
@@ -93,7 +93,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
 
                vma = find_vma(mm, addr);
                if (TASK_SIZE - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
 
index adc6911..b19a3c5 100644 (file)
@@ -51,15 +51,15 @@ void __init pagetable_init(void)
        /*
         * Fixed mappings:
         */
-       vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
-       fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base);
+       vaddr = __fix_to_virt(__end_of_fixed_addresses - 1);
+       fixrange_init(vaddr & PMD_MASK, vaddr + FIXADDR_SIZE, pgd_base);
 
 #ifdef CONFIG_HIGHMEM
        /*
         * Permanent kmaps:
         */
        vaddr = PKMAP_BASE;
-       fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+       fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
 
        pgd = swapper_pg_dir + __pgd_offset(vaddr);
        pud = pud_offset(pgd, vaddr);
index 18e17ab..3ae4791 100644 (file)
@@ -132,11 +132,6 @@ static inline void start_thread(struct pt_regs *regs,
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define task_pt_regs(task) ((task)->thread.uregs)
index c9fa426..89e8027 100644 (file)
 #include "internal.h"
 
 /*
- * return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       return ((unsigned long *) tsk->thread.sp)[3];
-}
-
-/*
  * power off function, if any
  */
 void (*pm_power_off)(void);
index 3bbbc3d..4944e2e 100644 (file)
@@ -75,9 +75,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-/* Return saved PC of a blocked thread. */
-#define thread_saved_pc(tsk)   ((tsk)->thread.kregs->ea)
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 #define task_pt_regs(p) \
index a908e6c..396d8f3 100644 (file)
@@ -84,11 +84,6 @@ void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
 void release_thread(struct task_struct *);
 unsigned long get_wchan(struct task_struct *p);
 
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 #define init_stack      (init_thread_union.stack)
 
 #define cpu_relax()     barrier()
index 106859a..f9b7700 100644 (file)
@@ -110,11 +110,6 @@ void show_regs(struct pt_regs *regs)
        show_registers(regs);
 }
 
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-       return (unsigned long)user_regs(t->stack)->pc;
-}
-
 void release_thread(struct task_struct *dead_task)
 {
 }
index a3661ee..4c6694b 100644 (file)
@@ -163,12 +163,7 @@ struct thread_struct {
        .flags          = 0 \
        }
 
-/*
- * Return saved PC of a blocked thread.  This is used by ps mostly.
- */
-
 struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *t);
 void show_trace(struct task_struct *task, unsigned long *stack);
 
 /*
index 4516a5b..b64d7d2 100644 (file)
@@ -239,11 +239,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
        return 0;
 }
 
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-       return t->thread.regs.kpc;
-}
-
 unsigned long
 get_wchan(struct task_struct *p)
 {
index e528863..378a754 100644 (file)
@@ -90,7 +90,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
                unsigned long len, unsigned long pgoff, unsigned long flags)
 {
        struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma;
+       struct vm_area_struct *vma, *prev;
        unsigned long task_size = TASK_SIZE;
        int do_color_align, last_mmap;
        struct vm_unmapped_area_info info;
@@ -117,9 +117,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
                else
                        addr = PAGE_ALIGN(addr);
 
-               vma = find_vma(mm, addr);
+               vma = find_vma_prev(mm, addr, &prev);
                if (task_size - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)) &&
+                   (!prev || addr >= vm_end_gap(prev)))
                        goto found_addr;
        }
 
@@ -143,7 +144,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                          const unsigned long len, const unsigned long pgoff,
                          const unsigned long flags)
 {
-       struct vm_area_struct *vma;
+       struct vm_area_struct *vma, *prev;
        struct mm_struct *mm = current->mm;
        unsigned long addr = addr0;
        int do_color_align, last_mmap;
@@ -177,9 +178,11 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                        addr = COLOR_ALIGN(addr, last_mmap, pgoff);
                else
                        addr = PAGE_ALIGN(addr);
-               vma = find_vma(mm, addr);
+
+               vma = find_vma_prev(mm, addr, &prev);
                if (TASK_SIZE - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)) &&
+                   (!prev || addr >= vm_end_gap(prev)))
                        goto found_addr;
        }
 
index f2c562a..0151af6 100644 (file)
                "1:     "PPC_TLNEI"     %4,0\n"                 \
                _EMIT_BUG_ENTRY                                 \
                : : "i" (__FILE__), "i" (__LINE__),             \
-                 "i" (BUGFLAG_TAINT(TAINT_WARN)),              \
+                 "i" (BUGFLAG_WARNING|BUGFLAG_TAINT(TAINT_WARN)),\
                  "i" (sizeof(struct bug_entry)),               \
                  "r" (__ret_warn_on));                         \
        }                                                       \
index a83821f..8814a72 100644 (file)
@@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_handler(struct pt_regs *regs);
 extern int kprobe_post_handler(struct pt_regs *regs);
+extern int is_current_kprobe_addr(unsigned long addr);
 #ifdef CONFIG_KPROBES_ON_FTRACE
 extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
                           struct kprobe_ctlblk *kcb);
index bb99b65..1189d04 100644 (file)
@@ -378,12 +378,6 @@ struct thread_struct {
 }
 #endif
 
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-#define thread_saved_pc(tsk)    \
-        ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0)
-
 #define task_pt_regs(tsk)      ((struct pt_regs *)(tsk)->thread.regs)
 
 unsigned long get_wchan(struct task_struct *p);
index 5c0d8a8..41e88d3 100644 (file)
@@ -267,13 +267,7 @@ do {                                                               \
 extern unsigned long __copy_tofrom_user(void __user *to,
                const void __user *from, unsigned long size);
 
-#ifndef __powerpc64__
-
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
-#else /* __powerpc64__ */
-
+#ifdef __powerpc64__
 static inline unsigned long
 raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
 {
index c8a822a..c23ff43 100644 (file)
@@ -94,11 +94,13 @@ struct xive_q {
  * store at 0 and some ESBs support doing a trigger via a
  * separate trigger page.
  */
-#define XIVE_ESB_GET           0x800
-#define XIVE_ESB_SET_PQ_00     0xc00
-#define XIVE_ESB_SET_PQ_01     0xd00
-#define XIVE_ESB_SET_PQ_10     0xe00
-#define XIVE_ESB_SET_PQ_11     0xf00
+#define XIVE_ESB_STORE_EOI     0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI      0x000 /* Load */
+#define XIVE_ESB_GET           0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00     0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01     0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10     0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11     0xf00 /* Load */
 
 #define XIVE_ESB_VAL_P         0x2
 #define XIVE_ESB_VAL_Q         0x1
index ae418b8..b886795 100644 (file)
@@ -1411,10 +1411,8 @@ USE_TEXT_SECTION()
        .balign IFETCH_ALIGN_BYTES
 do_hash_page:
 #ifdef CONFIG_PPC_STD_MMU_64
-       andis.  r0,r4,0xa410            /* weird error? */
+       andis.  r0,r4,0xa450            /* weird error? */
        bne-    handle_page_fault       /* if not, try to insert a HPTE */
-       andis.  r0,r4,DSISR_DABRMATCH@h
-       bne-    handle_dabr_fault
        CURRENT_THREAD_INFO(r11, r1)
        lwz     r0,TI_PREEMPT(r11)      /* If we're in an "NMI" */
        andis.  r0,r0,NMI_MASK@h        /* (i.e. an irq when soft-disabled) */
@@ -1438,11 +1436,16 @@ do_hash_page:
 
        /* Error */
        blt-    13f
+
+       /* Reload DSISR into r4 for the DABR check below */
+       ld      r4,_DSISR(r1)
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
 /* Here we have a page fault that hash_page can't handle. */
 handle_page_fault:
-11:    ld      r4,_DAR(r1)
+11:    andis.  r0,r4,DSISR_DABRMATCH@h
+       bne-    handle_dabr_fault
+       ld      r4,_DAR(r1)
        ld      r5,_DSISR(r1)
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      do_page_fault
index fc43435..01addfb 100644 (file)
@@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
 struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
 
+int is_current_kprobe_addr(unsigned long addr)
+{
+       struct kprobe *p = kprobe_running();
+       return (p && (unsigned long)p->addr == addr) ? 1 : 0;
+}
+
 bool arch_within_kprobe_blacklist(unsigned long addr)
 {
        return  (addr >= (unsigned long)__kprobes_text_start &&
@@ -617,6 +623,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
        regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
 #endif
 
+       /*
+        * jprobes use jprobe_return() which skips the normal return
+        * path of the function, and this messes up the accounting of the
+        * function graph tracer.
+        *
+        * Pause function graph tracing while performing the jprobe function.
+        */
+       pause_graph_tracing();
+
        return 1;
 }
 NOKPROBE_SYMBOL(setjmp_pre_handler);
@@ -642,6 +657,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
         * saved regs...
         */
        memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+       /* It's OK to start function graph tracing again */
+       unpause_graph_tracing();
        preempt_enable_no_resched();
        return 1;
 }
index a8c1f99..4640f6d 100644 (file)
@@ -616,6 +616,24 @@ void __init exc_lvl_early_init(void)
 #endif
 
 /*
+ * Emergency stacks are used for a range of things, from asynchronous
+ * NMIs (system reset, machine check) to synchronous, process context.
+ * We set preempt_count to zero, even though that isn't necessarily correct. To
+ * get the right value we'd need to copy it from the previous thread_info, but
+ * doing that might fault causing more problems.
+ * TODO: what to do with accounting?
+ */
+static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
+{
+       ti->task = NULL;
+       ti->cpu = cpu;
+       ti->preempt_count = 0;
+       ti->local_flags = 0;
+       ti->flags = 0;
+       klp_init_thread_info(ti);
+}
+
+/*
  * Stack space used when we detect a bad kernel stack pointer, and
  * early in SMP boots before relocation is enabled. Exclusive emergency
  * stack for machine checks.
@@ -633,24 +651,31 @@ void __init emergency_stack_init(void)
         * Since we use these as temporary stacks during secondary CPU
         * bringup, we need to get at them in real mode. This means they
         * must also be within the RMO region.
+        *
+        * The IRQ stacks allocated elsewhere in this file are zeroed and
+        * initialized in kernel/irq.c. These are initialized here in order
+        * to have emergency stacks available as early as possible.
         */
        limit = min(safe_stack_limit(), ppc64_rma_size);
 
        for_each_possible_cpu(i) {
                struct thread_info *ti;
                ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-               klp_init_thread_info(ti);
+               memset(ti, 0, THREAD_SIZE);
+               emerg_stack_init_thread_info(ti, i);
                paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
 
 #ifdef CONFIG_PPC_BOOK3S_64
                /* emergency stack for NMI exception handling. */
                ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-               klp_init_thread_info(ti);
+               memset(ti, 0, THREAD_SIZE);
+               emerg_stack_init_thread_info(ti, i);
                paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
 
                /* emergency stack for machine check exception handling. */
                ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-               klp_init_thread_info(ti);
+               memset(ti, 0, THREAD_SIZE);
+               emerg_stack_init_thread_info(ti, i);
                paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
 #endif
        }
index 7c933a9..c98e90b 100644 (file)
@@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller)
        stdu    r1,-SWITCH_FRAME_SIZE(r1)
 
        /* Save all gprs to pt_regs */
-       SAVE_8GPRS(0,r1)
-       SAVE_8GPRS(8,r1)
-       SAVE_8GPRS(16,r1)
-       SAVE_8GPRS(24,r1)
+       SAVE_GPR(0, r1)
+       SAVE_10GPRS(2, r1)
+       SAVE_10GPRS(12, r1)
+       SAVE_10GPRS(22, r1)
+
+       /* Save previous stack pointer (r1) */
+       addi    r8, r1, SWITCH_FRAME_SIZE
+       std     r8, GPR1(r1)
 
        /* Load special regs for save below */
        mfmsr   r8
@@ -95,18 +99,44 @@ ftrace_call:
        bl      ftrace_stub
        nop
 
-       /* Load ctr with the possibly modified NIP */
-       ld      r3, _NIP(r1)
-       mtctr   r3
+       /* Load the possibly modified NIP */
+       ld      r15, _NIP(r1)
+
 #ifdef CONFIG_LIVEPATCH
-       cmpd    r14,r3          /* has NIP been altered? */
+       cmpd    r14, r15        /* has NIP been altered? */
+#endif
+
+#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE)
+       /* NIP has not been altered, skip over further checks */
+       beq     1f
+
+       /* Check if there is an active kprobe on us */
+       subi    r3, r14, 4
+       bl      is_current_kprobe_addr
+       nop
+
+       /*
+        * If r3 == 1, then this is a kprobe/jprobe.
+        * else, this is livepatched function.
+        *
+        * The conditional branch for livepatch_handler below will use the
+        * result of this comparison. For kprobe/jprobe, we just need to branch to
+        * the new NIP, not call livepatch_handler. The branch below is bne, so we
+        * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want
+        * CR0[EQ] = (r3 == 1).
+        */
+       cmpdi   r3, 1
+1:
 #endif
 
+       /* Load CTR with the possibly modified NIP */
+       mtctr   r15
+
        /* Restore gprs */
-       REST_8GPRS(0,r1)
-       REST_8GPRS(8,r1)
-       REST_8GPRS(16,r1)
-       REST_8GPRS(24,r1)
+       REST_GPR(0,r1)
+       REST_10GPRS(2,r1)
+       REST_10GPRS(12,r1)
+       REST_10GPRS(22,r1)
 
        /* Restore possibly modified LR */
        ld      r0, _LINK(r1)
@@ -119,7 +149,10 @@ ftrace_call:
        addi r1, r1, SWITCH_FRAME_SIZE
 
 #ifdef CONFIG_LIVEPATCH
-        /* Based on the cmpd above, if the NIP was altered handle livepatch */
+        /*
+        * Based on the cmpd or cmpdi above, if the NIP was altered and we're
+        * not on a kprobe/jprobe, then handle livepatch.
+        */
        bne-    livepatch_handler
 #endif
 
index 42b7a4f..8d1a365 100644 (file)
@@ -1486,6 +1486,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
                r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
                break;
        case KVM_REG_PPC_TB_OFFSET:
+               /*
+                * POWER9 DD1 has an erratum where writing TBU40 causes
+                * the timebase to lose ticks.  So we don't let the
+                * timebase offset be changed on P9 DD1.  (It is
+                * initialized to zero.)
+                */
+               if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+                       break;
                /* round up to multiple of 2^24 */
                vcpu->arch.vcore->tb_offset =
                        ALIGN(set_reg_val(id, *val), 1UL << 24);
@@ -2907,12 +2915,36 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
        int r;
        int srcu_idx;
+       unsigned long ebb_regs[3] = {}; /* shut up GCC */
+       unsigned long user_tar = 0;
+       unsigned int user_vrsave;
 
        if (!vcpu->arch.sane) {
                run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                return -EINVAL;
        }
 
+       /*
+        * Don't allow entry with a suspended transaction, because
+        * the guest entry/exit code will lose it.
+        * If the guest has TM enabled, save away their TM-related SPRs
+        * (they will get restored by the TM unavailable interrupt).
+        */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+           (current->thread.regs->msr & MSR_TM)) {
+               if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
+                       run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+                       run->fail_entry.hardware_entry_failure_reason = 0;
+                       return -EINVAL;
+               }
+               current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+               current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+               current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+               current->thread.regs->msr &= ~MSR_TM;
+       }
+#endif
+
        kvmppc_core_prepare_to_enter(vcpu);
 
        /* No need to go into the guest when all we'll do is come back out */
@@ -2934,6 +2966,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
        flush_all_to_thread(current);
 
+       /* Save userspace EBB and other register values */
+       if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+               ebb_regs[0] = mfspr(SPRN_EBBHR);
+               ebb_regs[1] = mfspr(SPRN_EBBRR);
+               ebb_regs[2] = mfspr(SPRN_BESCR);
+               user_tar = mfspr(SPRN_TAR);
+       }
+       user_vrsave = mfspr(SPRN_VRSAVE);
+
        vcpu->arch.wqp = &vcpu->arch.vcore->wq;
        vcpu->arch.pgdir = current->mm->pgd;
        vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
@@ -2960,6 +3001,16 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
                }
        } while (is_kvmppc_resume_guest(r));
 
+       /* Restore userspace EBB and other register values */
+       if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+               mtspr(SPRN_EBBHR, ebb_regs[0]);
+               mtspr(SPRN_EBBRR, ebb_regs[1]);
+               mtspr(SPRN_BESCR, ebb_regs[2]);
+               mtspr(SPRN_TAR, user_tar);
+               mtspr(SPRN_FSCR, current->thread.fscr);
+       }
+       mtspr(SPRN_VRSAVE, user_vrsave);
+
  out:
        vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
        atomic_dec(&vcpu->kvm->arch.vcpus_running);
index 0fdc4a2..404deb5 100644 (file)
@@ -121,10 +121,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         * Put whatever is in the decrementer into the
         * hypervisor decrementer.
         */
+BEGIN_FTR_SECTION
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       ld      r6, VCORE_KVM(r5)
+       ld      r9, KVM_HOST_LPCR(r6)
+       andis.  r9, r9, LPCR_LD@h
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
        mfspr   r8,SPRN_DEC
        mftb    r7
-       mtspr   SPRN_HDEC,r8
+BEGIN_FTR_SECTION
+       /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
+       bne     32f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
        extsw   r8,r8
+32:    mtspr   SPRN_HDEC,r8
        add     r8,r8,r7
        std     r8,HSTATE_DECEXP(r13)
 
index bdb3f76..4888dd4 100644 (file)
 #include <asm/opal.h>
 #include <asm/xive-regs.h>
 
+/* Sign-extend HDEC if not on POWER9 */
+#define EXTEND_HDEC(reg)                       \
+BEGIN_FTR_SECTION;                             \
+       extsw   reg, reg;                       \
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
 #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
 
 /* Values in HSTATE_NAPPING(r13) */
 #define NAPPING_CEDE   1
 #define NAPPING_NOVCPU 2
 
+/* Stack frame offsets for kvmppc_hv_entry */
+#define SFS                    144
+#define STACK_SLOT_TRAP                (SFS-4)
+#define STACK_SLOT_TID         (SFS-16)
+#define STACK_SLOT_PSSCR       (SFS-24)
+#define STACK_SLOT_PID         (SFS-32)
+#define STACK_SLOT_IAMR                (SFS-40)
+#define STACK_SLOT_CIABR       (SFS-48)
+#define STACK_SLOT_DAWR                (SFS-56)
+#define STACK_SLOT_DAWRX       (SFS-64)
+
 /*
  * Call kvmppc_hv_entry in real mode.
  * Must be called with interrupts hard-disabled.
@@ -214,6 +231,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 kvmppc_primary_no_guest:
        /* We handle this much like a ceded vcpu */
        /* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+       /* HDEC may be larger than DEC for arch >= v3.00, but since the */
+       /* HDEC value came from DEC in the first place, it will fit */
        mfspr   r3, SPRN_HDEC
        mtspr   SPRN_DEC, r3
        /*
@@ -295,8 +314,9 @@ kvm_novcpu_wakeup:
 
        /* See if our timeslice has expired (HDEC is negative) */
        mfspr   r0, SPRN_HDEC
+       EXTEND_HDEC(r0)
        li      r12, BOOK3S_INTERRUPT_HV_DECREMENTER
-       cmpwi   r0, 0
+       cmpdi   r0, 0
        blt     kvm_novcpu_exit
 
        /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
@@ -319,10 +339,10 @@ kvm_novcpu_exit:
        bl      kvmhv_accumulate_time
 #endif
 13:    mr      r3, r12
-       stw     r12, 112-4(r1)
+       stw     r12, STACK_SLOT_TRAP(r1)
        bl      kvmhv_commence_exit
        nop
-       lwz     r12, 112-4(r1)
+       lwz     r12, STACK_SLOT_TRAP(r1)
        b       kvmhv_switch_to_host
 
 /*
@@ -390,8 +410,8 @@ kvm_secondary_got_guest:
        lbz     r4, HSTATE_PTID(r13)
        cmpwi   r4, 0
        bne     63f
-       lis     r6, 0x7fff
-       ori     r6, r6, 0xffff
+       LOAD_REG_ADDR(r6, decrementer_max)
+       ld      r6, 0(r6)
        mtspr   SPRN_HDEC, r6
        /* and set per-LPAR registers, if doing dynamic micro-threading */
        ld      r6, HSTATE_SPLIT_MODE(r13)
@@ -545,11 +565,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  *                                                                            *
  *****************************************************************************/
 
-/* Stack frame offsets */
-#define STACK_SLOT_TID         (112-16)
-#define STACK_SLOT_PSSCR       (112-24)
-#define STACK_SLOT_PID         (112-32)
-
 .global kvmppc_hv_entry
 kvmppc_hv_entry:
 
@@ -565,7 +580,7 @@ kvmppc_hv_entry:
         */
        mflr    r0
        std     r0, PPC_LR_STKOFF(r1)
-       stdu    r1, -112(r1)
+       stdu    r1, -SFS(r1)
 
        /* Save R1 in the PACA */
        std     r1, HSTATE_HOST_R1(r13)
@@ -749,10 +764,20 @@ BEGIN_FTR_SECTION
        mfspr   r5, SPRN_TIDR
        mfspr   r6, SPRN_PSSCR
        mfspr   r7, SPRN_PID
+       mfspr   r8, SPRN_IAMR
        std     r5, STACK_SLOT_TID(r1)
        std     r6, STACK_SLOT_PSSCR(r1)
        std     r7, STACK_SLOT_PID(r1)
+       std     r8, STACK_SLOT_IAMR(r1)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+BEGIN_FTR_SECTION
+       mfspr   r5, SPRN_CIABR
+       mfspr   r6, SPRN_DAWR
+       mfspr   r7, SPRN_DAWRX
+       std     r5, STACK_SLOT_CIABR(r1)
+       std     r6, STACK_SLOT_DAWR(r1)
+       std     r7, STACK_SLOT_DAWRX(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 BEGIN_FTR_SECTION
        /* Set partition DABR */
@@ -968,7 +993,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 
        /* Check if HDEC expires soon */
        mfspr   r3, SPRN_HDEC
-       cmpwi   r3, 512         /* 1 microsecond */
+       EXTEND_HDEC(r3)
+       cmpdi   r3, 512         /* 1 microsecond */
        blt     hdec_soon
 
 #ifdef CONFIG_KVM_XICS
@@ -1505,11 +1531,10 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
         * set by the guest could disrupt the host.
         */
        li      r0, 0
-       mtspr   SPRN_IAMR, r0
-       mtspr   SPRN_CIABR, r0
-       mtspr   SPRN_DAWRX, r0
+       mtspr   SPRN_PSPB, r0
        mtspr   SPRN_WORT, r0
 BEGIN_FTR_SECTION
+       mtspr   SPRN_IAMR, r0
        mtspr   SPRN_TCSCR, r0
        /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
        li      r0, 1
@@ -1525,6 +1550,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
        std     r6,VCPU_UAMOR(r9)
        li      r6,0
        mtspr   SPRN_AMR,r6
+       mtspr   SPRN_UAMOR, r6
 
        /* Switch DSCR back to host value */
        mfspr   r8, SPRN_DSCR
@@ -1670,12 +1696,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
        /* Restore host values of some registers */
 BEGIN_FTR_SECTION
+       ld      r5, STACK_SLOT_CIABR(r1)
+       ld      r6, STACK_SLOT_DAWR(r1)
+       ld      r7, STACK_SLOT_DAWRX(r1)
+       mtspr   SPRN_CIABR, r5
+       mtspr   SPRN_DAWR, r6
+       mtspr   SPRN_DAWRX, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
        ld      r5, STACK_SLOT_TID(r1)
        ld      r6, STACK_SLOT_PSSCR(r1)
        ld      r7, STACK_SLOT_PID(r1)
+       ld      r8, STACK_SLOT_IAMR(r1)
        mtspr   SPRN_TIDR, r5
        mtspr   SPRN_PSSCR, r6
        mtspr   SPRN_PID, r7
+       mtspr   SPRN_IAMR, r8
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 BEGIN_FTR_SECTION
        PPC_INVALIDATE_ERAT
@@ -1819,8 +1855,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
        li      r0, KVM_GUEST_MODE_NONE
        stb     r0, HSTATE_IN_GUEST(r13)
 
-       ld      r0, 112+PPC_LR_STKOFF(r1)
-       addi    r1, r1, 112
+       ld      r0, SFS+PPC_LR_STKOFF(r1)
+       addi    r1, r1, SFS
        mtlr    r0
        blr
 
@@ -2366,12 +2402,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
        mfspr   r3, SPRN_DEC
        mfspr   r4, SPRN_HDEC
        mftb    r5
-       cmpw    r3, r4
+       extsw   r3, r3
+       EXTEND_HDEC(r4)
+       cmpd    r3, r4
        ble     67f
        mtspr   SPRN_DEC, r4
 67:
        /* save expiry time of guest decrementer */
-       extsw   r3, r3
        add     r3, r3, r5
        ld      r4, HSTATE_KVM_VCPU(r13)
        ld      r5, HSTATE_KVM_VCORE(r13)
index 023a311..4636ca6 100644 (file)
@@ -69,7 +69,7 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
 {
        /* If the XIVE supports the new "store EOI facility, use it */
        if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
-               __x_writeq(0, __x_eoi_page(xd));
+               __x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
        else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
                opal_int_eoi(hw_irq);
        } else {
@@ -89,7 +89,7 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
                 * properly.
                 */
                if (xd->flags & XIVE_IRQ_FLAG_LSI)
-                       __x_readq(__x_eoi_page(xd));
+                       __x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
                else {
                        eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
 
index 6575b9a..a12e863 100644 (file)
@@ -68,7 +68,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
                addr = ALIGN(addr, huge_page_size(h));
                vma = find_vma(mm, addr);
                if (mm->task_size - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
        /*
index 9dbd2a7..0ee6be4 100644 (file)
@@ -112,7 +112,7 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
                addr = PAGE_ALIGN(addr);
                vma = find_vma(mm, addr);
                if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
 
@@ -157,7 +157,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
                addr = PAGE_ALIGN(addr);
                vma = find_vma(mm, addr);
                if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
-                               (!vma || addr + len <= vma->vm_start))
+                               (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
 
index 966b9fc..45f6740 100644 (file)
@@ -99,7 +99,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
        if ((mm->task_size - len) < addr)
                return 0;
        vma = find_vma(mm, addr);
-       return (!vma || (addr + len) <= vma->vm_start);
+       return (!vma || (addr + len) <= vm_start_gap(vma));
 }
 
 static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
index cbd82fd..09ceea6 100644 (file)
@@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
                        struct pt_regs *regs_user_copy)
 {
        regs_user->regs = task_pt_regs(current);
-       regs_user->abi  = perf_reg_abi(current);
+       regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
+                        PERF_SAMPLE_REGS_ABI_NONE;
 }
index 78fa939..b5d960d 100644 (file)
@@ -75,7 +75,8 @@ struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
        if (WARN_ON(!gpdev))
                return NULL;
 
-       if (WARN_ON(!gpdev->dev.of_node))
+       /* Not all PCI devices have device-tree nodes */
+       if (!gpdev->dev.of_node)
                return NULL;
 
        /* Get assoicated PCI device */
@@ -448,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
        return mmio_atsd_reg;
 }
 
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
+static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
 {
        unsigned long launch;
 
@@ -464,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
        /* PID */
        launch |= pid << PPC_BITLSHIFT(38);
 
+       /* No flush */
+       launch |= !flush << PPC_BITLSHIFT(39);
+
        /* Invalidating the entire process doesn't use a va */
        return mmio_launch_invalidate(npu, launch, 0);
 }
 
 static int mmio_invalidate_va(struct npu *npu, unsigned long va,
-                       unsigned long pid)
+                       unsigned long pid, bool flush)
 {
        unsigned long launch;
 
@@ -485,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
        /* PID */
        launch |= pid << PPC_BITLSHIFT(38);
 
+       /* No flush */
+       launch |= !flush << PPC_BITLSHIFT(39);
+
        return mmio_launch_invalidate(npu, launch, va);
 }
 
 #define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
 
+struct mmio_atsd_reg {
+       struct npu *npu;
+       int reg;
+};
+
+static void mmio_invalidate_wait(
+       struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+{
+       struct npu *npu;
+       int i, reg;
+
+       /* Wait for all invalidations to complete */
+       for (i = 0; i <= max_npu2_index; i++) {
+               if (mmio_atsd_reg[i].reg < 0)
+                       continue;
+
+               /* Wait for completion */
+               npu = mmio_atsd_reg[i].npu;
+               reg = mmio_atsd_reg[i].reg;
+               while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
+                       cpu_relax();
+
+               put_mmio_atsd_reg(npu, reg);
+
+               /*
+                * The GPU requires two flush ATSDs to ensure all entries have
+                * been flushed. We use PID 0 as it will never be used for a
+                * process on the GPU.
+                */
+               if (flush)
+                       mmio_invalidate_pid(npu, 0, true);
+       }
+}
+
 /*
  * Invalidate either a single address or an entire PID depending on
  * the value of va.
  */
 static void mmio_invalidate(struct npu_context *npu_context, int va,
-                       unsigned long address)
+                       unsigned long address, bool flush)
 {
-       int i, j, reg;
+       int i, j;
        struct npu *npu;
        struct pnv_phb *nphb;
        struct pci_dev *npdev;
-       struct {
-               struct npu *npu;
-               int reg;
-       } mmio_atsd_reg[NV_MAX_NPUS];
+       struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
        unsigned long pid = npu_context->mm->context.id;
 
        /*
@@ -524,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
 
                        if (va)
                                mmio_atsd_reg[i].reg =
-                                       mmio_invalidate_va(npu, address, pid);
+                                       mmio_invalidate_va(npu, address, pid,
+                                                       flush);
                        else
                                mmio_atsd_reg[i].reg =
-                                       mmio_invalidate_pid(npu, pid);
+                                       mmio_invalidate_pid(npu, pid, flush);
 
                        /*
                         * The NPU hardware forwards the shootdown to all GPUs
@@ -543,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
         */
        flush_tlb_mm(npu_context->mm);
 
-       /* Wait for all invalidations to complete */
-       for (i = 0; i <= max_npu2_index; i++) {
-               if (mmio_atsd_reg[i].reg < 0)
-                       continue;
-
-               /* Wait for completion */
-               npu = mmio_atsd_reg[i].npu;
-               reg = mmio_atsd_reg[i].reg;
-               while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
-                       cpu_relax();
-               put_mmio_atsd_reg(npu, reg);
-       }
+       mmio_invalidate_wait(mmio_atsd_reg, flush);
+       if (flush)
+               /* Wait for the flush to complete */
+               mmio_invalidate_wait(mmio_atsd_reg, false);
 }
 
 static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -570,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
         * There should be no more translation requests for this PID, but we
         * need to ensure any entries for it are removed from the TLB.
         */
-       mmio_invalidate(npu_context, 0, 0);
+       mmio_invalidate(npu_context, 0, 0, true);
 }
 
 static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -580,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
 {
        struct npu_context *npu_context = mn_to_npu_context(mn);
 
-       mmio_invalidate(npu_context, 1, address);
+       mmio_invalidate(npu_context, 1, address, true);
 }
 
 static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
@@ -589,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
 {
        struct npu_context *npu_context = mn_to_npu_context(mn);
 
-       mmio_invalidate(npu_context, 1, address);
+       mmio_invalidate(npu_context, 1, address, true);
 }
 
 static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -599,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
        struct npu_context *npu_context = mn_to_npu_context(mn);
        unsigned long address;
 
-       for (address = start; address <= end; address += PAGE_SIZE)
-               mmio_invalidate(npu_context, 1, address);
+       for (address = start; address < end; address += PAGE_SIZE)
+               mmio_invalidate(npu_context, 1, address, false);
+
+       /* Do the flush only on the final addess == end */
+       mmio_invalidate(npu_context, 1, address, true);
 }
 
 static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -650,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
                /* No nvlink associated with this GPU device */
                return ERR_PTR(-ENODEV);
 
-       if (!mm) {
-               /* kernel thread contexts are not supported */
+       if (!mm || mm->context.id == 0) {
+               /*
+                * Kernel thread contexts are not supported and context id 0 is
+                * reserved on the GPU.
+                */
                return ERR_PTR(-EINVAL);
        }
 
index 9138250..8f5e303 100644 (file)
@@ -297,7 +297,7 @@ void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
 {
        /* If the XIVE supports the new "store EOI facility, use it */
        if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
-               out_be64(xd->eoi_mmio, 0);
+               out_be64(xd->eoi_mmio + XIVE_ESB_STORE_EOI, 0);
        else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
                /*
                 * The FW told us to call it. This happens for some
index a5039fa..2820722 100644 (file)
@@ -30,6 +30,7 @@ CONFIG_USER_NS=y
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
 CONFIG_BPF_SYSCALL=y
 CONFIG_USERFAULTFD=y
 # CONFIG_COMPAT_BRK is not set
@@ -44,7 +45,10 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_SQ=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 CONFIG_BSD_DISKLABEL=y
@@ -90,6 +94,8 @@ CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
 CONFIG_XFRM_USER=m
 CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
@@ -359,6 +365,7 @@ CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_NETLINK_DIAG=m
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
@@ -367,16 +374,19 @@ CONFIG_DEVTMPFS=y
 CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=0
 CONFIG_CONNECTOR=y
+CONFIG_ZRAM=m
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_OSD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_ATA_OVER_ETH=m
+CONFIG_BLK_DEV_RAM_DAX=y
 CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_RBD=m
 CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -442,6 +452,8 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_NATSEMI is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -452,7 +464,6 @@ CONFIG_PPTP=m
 CONFIG_PPPOL2TP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -471,6 +482,7 @@ CONFIG_DIAG288_WATCHDOG=m
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_ACCESS=m
 CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -487,12 +499,18 @@ CONFIG_XFS_POSIX_ACL=y
 CONFIG_XFS_RT=y
 CONFIG_XFS_DEBUG=y
 CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
 CONFIG_OCFS2_FS=m
 CONFIG_BTRFS_FS=y
 CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_BTRFS_DEBUG=y
 CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
 CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QUOTA_DEBUG=y
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
 CONFIG_AUTOFS4_FS=m
@@ -558,6 +576,7 @@ CONFIG_HEADERS_CHECK=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_RODATA_TEST=y
 CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_OBJECTS_SELFTEST=y
 CONFIG_DEBUG_OBJECTS_FREE=y
@@ -580,7 +599,6 @@ CONFIG_DETECT_HUNG_TASK=y
 CONFIG_WQ_WATCHDOG=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_DEBUG_TIMEKEEPING=y
-CONFIG_TIMER_STATS=y
 CONFIG_DEBUG_RT_MUTEXES=y
 CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
 CONFIG_PROVE_LOCKING=y
@@ -595,6 +613,7 @@ CONFIG_RCU_TORTURE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=300
 CONFIG_NOTIFIER_ERROR_INJECTION=m
 CONFIG_PM_NOTIFIER_ERROR_INJECT=m
+CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
 CONFIG_FAULT_INJECTION=y
 CONFIG_FAILSLAB=y
 CONFIG_FAIL_PAGE_ALLOC=y
@@ -616,13 +635,12 @@ CONFIG_HIST_TRIGGERS=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
 CONFIG_LKDTM=m
 CONFIG_TEST_LIST_SORT=y
+CONFIG_TEST_SORT=y
 CONFIG_KPROBES_SANITY_TEST=y
 CONFIG_RBTREE_TEST=y
 CONFIG_INTERVAL_TREE_TEST=m
 CONFIG_PERCPU_TEST=m
 CONFIG_ATOMIC64_SELFTEST=y
-CONFIG_TEST_STRING_HELPERS=y
-CONFIG_TEST_KSTRTOX=y
 CONFIG_DMA_API_DEBUG=y
 CONFIG_TEST_BPF=m
 CONFIG_BUG_ON_DATA_CORRUPTION=y
@@ -630,6 +648,7 @@ CONFIG_S390_PTDUMP=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
@@ -640,7 +659,9 @@ CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_USER=m
+CONFIG_CRYPTO_PCRYPT=m
 CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
@@ -648,6 +669,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_CRC32=m
@@ -657,8 +679,10 @@ CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -674,6 +698,7 @@ CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
@@ -685,6 +710,7 @@ CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_PAES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
 CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_ASYMMETRIC_KEY_TYPE=y
@@ -692,6 +718,7 @@ CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
 CONFIG_CRC7=m
 CONFIG_CRC8=m
+CONFIG_RANDOM32_SELFTEST=y
 CONFIG_CORDIC=m
 CONFIG_CMM=m
 CONFIG_APPLDATA_BASE=y
index 83970b5..3c6b781 100644 (file)
@@ -31,6 +31,7 @@ CONFIG_USER_NS=y
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
 CONFIG_BPF_SYSCALL=y
 CONFIG_USERFAULTFD=y
 # CONFIG_COMPAT_BRK is not set
@@ -46,7 +47,10 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_SQ=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 CONFIG_BSD_DISKLABEL=y
@@ -88,6 +92,8 @@ CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
 CONFIG_XFRM_USER=m
 CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
@@ -356,6 +362,7 @@ CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_NETLINK_DIAG=m
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
@@ -364,16 +371,18 @@ CONFIG_DEVTMPFS=y
 CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=0
 CONFIG_CONNECTOR=y
+CONFIG_ZRAM=m
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_OSD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_ATA_OVER_ETH=m
+CONFIG_BLK_DEV_RAM_DAX=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -439,6 +448,8 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_NATSEMI is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -449,7 +460,6 @@ CONFIG_PPTP=m
 CONFIG_PPPOL2TP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -468,6 +478,7 @@ CONFIG_DIAG288_WATCHDOG=m
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_ACCESS=m
 CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -483,11 +494,15 @@ CONFIG_XFS_QUOTA=y
 CONFIG_XFS_POSIX_ACL=y
 CONFIG_XFS_RT=y
 CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
 CONFIG_OCFS2_FS=m
 CONFIG_BTRFS_FS=y
 CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
 CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
@@ -553,7 +568,6 @@ CONFIG_UNUSED_SYMBOLS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_PANIC_ON_OOPS=y
-CONFIG_TIMER_STATS=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 CONFIG_LATENCYTOP=y
@@ -576,6 +590,7 @@ CONFIG_BIG_KEYS=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
@@ -599,6 +614,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_CRC32=m
@@ -611,6 +627,7 @@ CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -626,16 +643,19 @@ CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_ZCRYPT=m
+CONFIG_PKEY=m
 CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_PAES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
 CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_CRC7=m
index fbc6542..653d72b 100644 (file)
@@ -31,6 +31,7 @@ CONFIG_USER_NS=y
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
 CONFIG_BPF_SYSCALL=y
 CONFIG_USERFAULTFD=y
 # CONFIG_COMPAT_BRK is not set
@@ -44,7 +45,10 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_SQ=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 CONFIG_BSD_DISKLABEL=y
@@ -86,6 +90,8 @@ CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
 CONFIG_XFRM_USER=m
 CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
@@ -354,6 +360,7 @@ CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_NETLINK_DIAG=m
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
@@ -362,16 +369,18 @@ CONFIG_DEVTMPFS=y
 CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=0
 CONFIG_CONNECTOR=y
+CONFIG_ZRAM=m
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_OSD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_CDROM_PKTCDVD=m
-CONFIG_ATA_OVER_ETH=m
+CONFIG_BLK_DEV_RAM_DAX=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -437,6 +446,8 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_NATSEMI is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
@@ -447,7 +458,6 @@ CONFIG_PPTP=m
 CONFIG_PPPOL2TP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
@@ -466,6 +476,7 @@ CONFIG_DIAG288_WATCHDOG=m
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_ACCESS=m
 CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -481,11 +492,15 @@ CONFIG_XFS_QUOTA=y
 CONFIG_XFS_POSIX_ACL=y
 CONFIG_XFS_RT=y
 CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
 CONFIG_OCFS2_FS=m
 CONFIG_BTRFS_FS=y
 CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
 CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
@@ -551,7 +566,6 @@ CONFIG_UNUSED_SYMBOLS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_PANIC_ON_OOPS=y
-CONFIG_TIMER_STATS=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 CONFIG_LATENCYTOP=y
@@ -574,6 +588,7 @@ CONFIG_BIG_KEYS=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
+CONFIG_HARDENED_USERCOPY=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
@@ -597,6 +612,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_CRC32=m
@@ -609,6 +625,7 @@ CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -624,6 +641,7 @@ CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_842=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_ANSI_CPRNG=m
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
@@ -635,6 +653,7 @@ CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_PAES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
 CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_CRC7=m
index e23d97c..afa46a7 100644 (file)
@@ -12,8 +12,10 @@ CONFIG_TUNE_ZEC12=y
 CONFIG_NR_CPUS=2
 # CONFIG_HOTPLUG_CPU is not set
 CONFIG_HZ_100=y
+# CONFIG_ARCH_RANDOM is not set
 # CONFIG_COMPACTION is not set
 # CONFIG_MIGRATION is not set
+# CONFIG_BOUNCE is not set
 # CONFIG_CHECK_STACK is not set
 # CONFIG_CHSC_SCH is not set
 # CONFIG_SCM_BUS is not set
@@ -36,11 +38,11 @@ CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_FC_ATTRS=y
 CONFIG_ZFCP=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 # CONFIG_HVC_IUCV is not set
+# CONFIG_HW_RANDOM_S390 is not set
 CONFIG_RAW_DRIVER=y
 # CONFIG_SCLP_ASYNC is not set
 # CONFIG_HMC_DRV is not set
@@ -54,9 +56,9 @@ CONFIG_RAW_DRIVER=y
 # CONFIG_INOTIFY_USER is not set
 CONFIG_CONFIGFS_FS=y
 # CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_PANIC_ON_OOPS=y
 # CONFIG_SCHED_DEBUG is not set
index 97189db..20244a3 100644 (file)
@@ -28,6 +28,7 @@ CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
 CONFIG_BPF_SYSCALL=y
 CONFIG_USERFAULTFD=y
 # CONFIG_COMPAT_BRK is not set
@@ -108,7 +109,6 @@ CONFIG_ZFCP=y
 CONFIG_SCSI_VIRTIO=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
 CONFIG_MD_MULTIPATH=m
 CONFIG_BLK_DEV_DM=y
 CONFIG_DM_CRYPT=m
@@ -131,6 +131,7 @@ CONFIG_TUN=m
 CONFIG_VIRTIO_NET=y
 # CONFIG_NET_VENDOR_ALACRITECH is not set
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 CONFIG_DEVKMEM=y
@@ -162,7 +163,6 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_PAGEALLOC=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_PANIC_ON_OOPS=y
-CONFIG_TIMER_STATS=y
 CONFIG_DEBUG_RT_MUTEXES=y
 CONFIG_PROVE_LOCKING=y
 CONFIG_LOCK_STAT=y
@@ -172,14 +172,12 @@ CONFIG_DEBUG_LIST=y
 CONFIG_DEBUG_SG=y
 CONFIG_DEBUG_NOTIFIERS=y
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
-CONFIG_RCU_TRACE=y
 CONFIG_LATENCYTOP=y
 CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
 CONFIG_KPROBES_SANITY_TEST=y
@@ -190,7 +188,6 @@ CONFIG_CRYPTO_CCM=m
 CONFIG_CRYPTO_GCM=m
 CONFIG_CRYPTO_CBC=y
 CONFIG_CRYPTO_CTS=m
-CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_XTS=m
@@ -230,6 +227,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_ZCRYPT=m
 CONFIG_PKEY=m
+CONFIG_CRYPTO_PAES_S390=m
 CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
index 6702630..144809a 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/device.h>
+#include <linux/blkdev.h>
 
 struct arqb {
        u64 data;
@@ -105,13 +106,14 @@ struct scm_driver {
        int (*probe) (struct scm_device *scmdev);
        int (*remove) (struct scm_device *scmdev);
        void (*notify) (struct scm_device *scmdev, enum scm_event event);
-       void (*handler) (struct scm_device *scmdev, void *data, int error);
+       void (*handler) (struct scm_device *scmdev, void *data,
+                       blk_status_t error);
 };
 
 int scm_driver_register(struct scm_driver *scmdrv);
 void scm_driver_unregister(struct scm_driver *scmdrv);
 
 int eadm_start_aob(struct aob *aob);
-void scm_irq_handler(struct aob *aob, int error);
+void scm_irq_handler(struct aob *aob, blk_status_t error);
 
 #endif /* _ASM_S390_EADM_H */
index 60d395f..aeac013 100644 (file)
@@ -221,11 +221,6 @@ extern void release_thread(struct task_struct *);
 /* Free guarded storage control block for current */
 void exit_thread_gs(void);
 
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 unsigned long get_wchan(struct task_struct *p);
 #define task_pt_regs(tsk) ((struct pt_regs *) \
         (task_stack_page(tsk) + THREAD_SIZE) - 1)
index e784bed..2b498e5 100644 (file)
@@ -109,7 +109,7 @@ struct sysinfo_2_2_2 {
        unsigned short cpus_shared;
        char reserved_4[3];
        unsigned char vsne;
-       uuid_be uuid;
+       uuid_t uuid;
        char reserved_5[160];
        char ext_name[256];
 };
@@ -134,7 +134,7 @@ struct sysinfo_3_2_2 {
                char reserved_1[3];
                unsigned char evmne;
                unsigned int reserved_2;
-               uuid_be uuid;
+               uuid_t uuid;
        } vm[8];
        char reserved_3[1504];
        char ext_names[8][256];
index e408d9c..6315037 100644 (file)
@@ -231,12 +231,17 @@ ENTRY(sie64a)
        lctlg   %c1,%c1,__LC_USER_ASCE          # load primary asce
 .Lsie_done:
 # some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions between sie64a and .Lsie_done should not cause program
-# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
+# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
+# Other instructions between sie64a and .Lsie_done should not cause program
+# interrupts. So lets use 3 nops as a landing pad for all possible rewinds.
 # See also .Lcleanup_sie
-.Lrewind_pad:
-       nop     0
+.Lrewind_pad6:
+       nopr    7
+.Lrewind_pad4:
+       nopr    7
+.Lrewind_pad2:
+       nopr    7
        .globl sie_exit
 sie_exit:
        lg      %r14,__SF_EMPTY+8(%r15)         # load guest register save area
@@ -249,7 +254,9 @@ sie_exit:
        stg     %r14,__SF_EMPTY+16(%r15)        # set exit reason code
        j       sie_exit
 
-       EX_TABLE(.Lrewind_pad,.Lsie_fault)
+       EX_TABLE(.Lrewind_pad6,.Lsie_fault)
+       EX_TABLE(.Lrewind_pad4,.Lsie_fault)
+       EX_TABLE(.Lrewind_pad2,.Lsie_fault)
        EX_TABLE(sie_exit,.Lsie_fault)
 EXPORT_SYMBOL(sie64a)
 EXPORT_SYMBOL(sie_exit)
index e545ffe..8e622bb 100644 (file)
@@ -564,8 +564,6 @@ static struct kset *ipl_kset;
 
 static void __ipl_run(void *unused)
 {
-       if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW)
-               diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
        diag308(DIAG308_LOAD_CLEAR, NULL);
        if (MACHINE_IS_VM)
                __cpcmd("IPL", NULL, 0, NULL);
@@ -1088,10 +1086,7 @@ static void __reipl_run(void *unused)
                break;
        case REIPL_METHOD_CCW_DIAG:
                diag308(DIAG308_SET, reipl_block_ccw);
-               if (MACHINE_IS_LPAR)
-                       diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
-               else
-                       diag308(DIAG308_LOAD_CLEAR, NULL);
+               diag308(DIAG308_LOAD_CLEAR, NULL);
                break;
        case REIPL_METHOD_FCP_RW_DIAG:
                diag308(DIAG308_SET, reipl_block_fcp);
index 999d715..bb32b86 100644 (file)
 
 asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
 
-/*
- * Return saved PC of a blocked thread. used in kernel/sched.
- * resume in entry.S does not create a new stack frame, it
- * just stores the registers %r6-%r15 to the frame given by
- * schedule. We want to return the address of the caller of
- * schedule, so we have to walk the backchain one time to
- * find the frame schedule() store its return address.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       struct stack_frame *sf, *low, *high;
-
-       if (!tsk || !task_stack_page(tsk))
-               return 0;
-       low = task_stack_page(tsk);
-       high = (struct stack_frame *) task_pt_regs(tsk);
-       sf = (struct stack_frame *) tsk->thread.ksp;
-       if (sf <= low || sf > high)
-               return 0;
-       sf = (struct stack_frame *) sf->back_chain;
-       if (sf <= low || sf > high)
-               return 0;
-       return sf->gprs[8];
-}
-
 extern void kernel_thread_starter(void);
 
 /*
index eefcb54..fb869b1 100644 (file)
@@ -242,7 +242,7 @@ static void print_ext_name(struct seq_file *m, int lvl,
 
 static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info)
 {
-       if (!memcmp(&info->vm[i].uuid, &NULL_UUID_BE, sizeof(uuid_be)))
+       if (uuid_is_null(&info->vm[i].uuid))
                return;
        seq_printf(m, "VM%02d UUID:            %pUb\n", i, &info->vm[i].uuid);
 }
index 9da243d..3b297fa 100644 (file)
@@ -977,11 +977,12 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
        ptr = asce.origin * 4096;
        if (asce.r) {
                *fake = 1;
+               ptr = 0;
                asce.dt = ASCE_TYPE_REGION1;
        }
        switch (asce.dt) {
        case ASCE_TYPE_REGION1:
-               if (vaddr.rfx01 > asce.tl && !asce.r)
+               if (vaddr.rfx01 > asce.tl && !*fake)
                        return PGM_REGION_FIRST_TRANS;
                break;
        case ASCE_TYPE_REGION2:
@@ -1009,8 +1010,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
                union region1_table_entry rfte;
 
                if (*fake) {
-                       /* offset in 16EB guest memory block */
-                       ptr = ptr + ((unsigned long) vaddr.rsx << 53UL);
+                       ptr += (unsigned long) vaddr.rfx << 53;
                        rfte.val = ptr;
                        goto shadow_r2t;
                }
@@ -1036,8 +1036,7 @@ shadow_r2t:
                union region2_table_entry rste;
 
                if (*fake) {
-                       /* offset in 8PB guest memory block */
-                       ptr = ptr + ((unsigned long) vaddr.rtx << 42UL);
+                       ptr += (unsigned long) vaddr.rsx << 42;
                        rste.val = ptr;
                        goto shadow_r3t;
                }
@@ -1064,8 +1063,7 @@ shadow_r3t:
                union region3_table_entry rtte;
 
                if (*fake) {
-                       /* offset in 4TB guest memory block */
-                       ptr = ptr + ((unsigned long) vaddr.sx << 31UL);
+                       ptr += (unsigned long) vaddr.rtx << 31;
                        rtte.val = ptr;
                        goto shadow_sgt;
                }
@@ -1101,8 +1099,7 @@ shadow_sgt:
                union segment_table_entry ste;
 
                if (*fake) {
-                       /* offset in 2G guest memory block */
-                       ptr = ptr + ((unsigned long) vaddr.sx << 20UL);
+                       ptr += (unsigned long) vaddr.sx << 20;
                        ste.val = ptr;
                        goto shadow_pgt;
                }
index b017dae..b854b1d 100644 (file)
@@ -101,7 +101,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
                addr = PAGE_ALIGN(addr);
                vma = find_vma(mm, addr);
                if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        goto check_asce_limit;
        }
 
@@ -151,7 +151,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                addr = PAGE_ALIGN(addr);
                vma = find_vma(mm, addr);
                if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-                               (!vma || addr + len <= vma->vm_start))
+                               (!vma || addr + len <= vm_start_gap(vma)))
                        goto check_asce_limit;
        }
 
index d9a922d..2992745 100644 (file)
@@ -13,7 +13,6 @@ struct task_struct;
  */
 extern void (*cpu_wait)(void);
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
 extern void start_thread(struct pt_regs *regs,
                        unsigned long pc, unsigned long sp);
 extern unsigned long get_wchan(struct task_struct *p);
index eb64d7a..6e20241 100644 (file)
@@ -101,11 +101,6 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *r)
        return 1;
 }
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       return task_pt_regs(tsk)->cp0_epc;
-}
-
 unsigned long get_wchan(struct task_struct *task)
 {
        if (!task || task == current || task->state == TASK_RUNNING)
index 08e7af0..6a1a129 100644 (file)
@@ -64,7 +64,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
                vma = find_vma(mm, addr);
                if (TASK_SIZE - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
 
@@ -114,7 +114,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
                vma = find_vma(mm, addr);
                if (TASK_SIZE - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
 
index dd27159..b395e56 100644 (file)
@@ -67,9 +67,6 @@ struct thread_struct {
        .current_ds = KERNEL_DS, \
 }
 
-/* Return saved PC of a blocked thread. */
-unsigned long thread_saved_pc(struct task_struct *t);
-
 /* Do necessary setup to start up a newly executed thread. */
 static inline void start_thread(struct pt_regs * regs, unsigned long pc,
                                    unsigned long sp)
index b58ee90..f04dc5a 100644 (file)
@@ -89,9 +89,7 @@ struct thread_struct {
 #include <linux/types.h>
 #include <asm/fpumacro.h>
 
-/* Return saved PC of a blocked thread. */
 struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *);
 
 /* On Uniprocessor, even in RMO processes see TSO semantics */
 #ifdef CONFIG_SMP
index b6dac8e..9245f93 100644 (file)
@@ -177,14 +177,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
 }
 
 /*
- * Note: sparc64 has a pretty intricated thread_saved_pc, check it out.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       return task_thread_info(tsk)->kpc;
-}
-
-/*
  * Free current thread data structures etc..
  */
 void exit_thread(struct task_struct *tsk)
index 1badc49..b96104d 100644 (file)
@@ -400,25 +400,6 @@ core_initcall(sparc_sysrq_init);
 
 #endif
 
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       struct thread_info *ti = task_thread_info(tsk);
-       unsigned long ret = 0xdeadbeefUL;
-       
-       if (ti && ti->ksp) {
-               unsigned long *sp;
-               sp = (unsigned long *)(ti->ksp + STACK_BIAS);
-               if (((unsigned long)sp & (sizeof(long) - 1)) == 0UL &&
-                   sp[14]) {
-                       unsigned long *fp;
-                       fp = (unsigned long *)(sp[14] + STACK_BIAS);
-                       if (((unsigned long)fp & (sizeof(long) - 1)) == 0UL)
-                               ret = fp[15];
-               }
-       }
-       return ret;
-}
-
 /* Free current thread data structures etc.. */
 void exit_thread(struct task_struct *tsk)
 {
index ef4520e..043544d 100644 (file)
@@ -120,7 +120,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
 
                vma = find_vma(mm, addr);
                if (task_size - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
 
@@ -183,7 +183,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
                vma = find_vma(mm, addr);
                if (task_size - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
 
index 7c29d38..88855e3 100644 (file)
@@ -120,7 +120,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
                addr = ALIGN(addr, huge_page_size(h));
                vma = find_vma(mm, addr);
                if (task_size - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
        if (mm->get_unmapped_area == arch_get_unmapped_area)
index 0bc9968..f71e520 100644 (file)
@@ -214,13 +214,6 @@ static inline void release_thread(struct task_struct *dead_task)
 
 extern void prepare_exit_to_usermode(struct pt_regs *regs, u32 flags);
 
-
-/*
- * Return saved (kernel) PC of a blocked thread.
- * Only used in a printk() in kernel/sched/core.c, so don't work too hard.
- */
-#define thread_saved_pc(t)   ((t)->thread.pc)
-
 unsigned long get_wchan(struct task_struct *p);
 
 /* Return initial ksp value for given task. */
index cb10153..03e5cc4 100644 (file)
@@ -233,7 +233,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
                addr = ALIGN(addr, huge_page_size(h));
                vma = find_vma(mm, addr);
                if (TASK_SIZE - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
        if (current->mm->get_unmapped_area == arch_get_unmapped_area)
index 8541027..b55fe9b 100644 (file)
@@ -534,7 +534,7 @@ static void ubd_handler(void)
                for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
                        blk_end_request(
                                (*irq_req_buffer)[count]->req,
-                               0,
+                               BLK_STS_OK,
                                (*irq_req_buffer)[count]->length
                        );
                        kfree((*irq_req_buffer)[count]);
index 2d1e0dd..f6d1a3f 100644 (file)
@@ -58,8 +58,6 @@ static inline void release_thread(struct task_struct *task)
 {
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
 static inline void mm_copy_segments(struct mm_struct *from_mm,
                                    struct mm_struct *new_mm)
 {
index 64a1fd0..7b56401 100644 (file)
@@ -56,12 +56,6 @@ union thread_union cpu0_irqstack
        __attribute__((__section__(".data..init_irqstack"))) =
                { INIT_THREAD_INFO(init_task) };
 
-unsigned long thread_saved_pc(struct task_struct *task)
-{
-       /* FIXME: Need to look up userspace_pid by cpu */
-       return os_process_pc(userspace_pid[0]);
-}
-
 /* Changed in setup_arch, which is called in early boot */
 static char host_info[(__NEW_UTS_LEN + 1) * 5];
 
index 54c24f0..56a7e92 100644 (file)
@@ -564,9 +564,6 @@ void choose_random_location(unsigned long input,
 {
        unsigned long random_addr, min_addr;
 
-       /* By default, keep output position unchanged. */
-       *virt_addr = *output;
-
        if (cmdline_find_option_bool("nokaslr")) {
                warn("KASLR disabled: 'nokaslr' on cmdline.");
                return;
index b3c5a5f..00241c8 100644 (file)
@@ -338,7 +338,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
                                  unsigned long output_len)
 {
        const unsigned long kernel_total_size = VO__end - VO__text;
-       unsigned long virt_addr = (unsigned long)output;
+       unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
 
        /* Retain x86 boot parameters pointer passed from startup_32/64. */
        boot_params = rmode;
@@ -390,6 +390,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 #ifdef CONFIG_X86_64
        if (heap > 0x3fffffffffffUL)
                error("Destination address too large");
+       if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE)
+               error("Destination virtual address is beyond the kernel mapping area");
 #else
        if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
                error("Destination address too large");
@@ -397,7 +399,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 #ifndef CONFIG_RELOCATABLE
        if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
                error("Destination address does not match LOAD_PHYSICAL_ADDR");
-       if ((unsigned long)output != virt_addr)
+       if (virt_addr != LOAD_PHYSICAL_ADDR)
                error("Destination virtual address changed when not relocatable");
 #endif
 
index 1c8355e..766a521 100644 (file)
@@ -81,8 +81,6 @@ static inline void choose_random_location(unsigned long input,
                                          unsigned long output_size,
                                          unsigned long *virt_addr)
 {
-       /* No change from existing output location. */
-       *virt_addr = *output;
 }
 #endif
 
index 34b3fa2..9e32d40 100644 (file)
@@ -2,6 +2,8 @@
 # Arch-specific CryptoAPI modules.
 #
 
+OBJECT_FILES_NON_STANDARD := y
+
 avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
 avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
                                $(comma)4)$(comma)%ymm2,yes,no)
index 2f87563..2e14acc 100644 (file)
@@ -2,6 +2,8 @@
 # Arch-specific CryptoAPI modules.
 #
 
+OBJECT_FILES_NON_STANDARD := y
+
 avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
                                 $(comma)4)$(comma)%ymm2,yes,no)
 ifeq ($(avx2_supported),yes)
index 41089e7..45b4fca 100644 (file)
@@ -2,6 +2,8 @@
 # Arch-specific CryptoAPI modules.
 #
 
+OBJECT_FILES_NON_STANDARD := y
+
 avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
                                 $(comma)4)$(comma)%ymm2,yes,no)
 ifeq ($(avx2_supported),yes)
index a6d91d4..110ce82 100644 (file)
@@ -431,11 +431,11 @@ static __initconst const u64 skl_hw_cache_event_ids
  [ C(DTLB) ] = {
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_INST_RETIRED.ALL_LOADS */
-               [ C(RESULT_MISS)   ] = 0x608,   /* DTLB_LOAD_MISSES.WALK_COMPLETED */
+               [ C(RESULT_MISS)   ] = 0xe08,   /* DTLB_LOAD_MISSES.WALK_COMPLETED */
        },
        [ C(OP_WRITE) ] = {
                [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_INST_RETIRED.ALL_STORES */
-               [ C(RESULT_MISS)   ] = 0x649,   /* DTLB_STORE_MISSES.WALK_COMPLETED */
+               [ C(RESULT_MISS)   ] = 0xe49,   /* DTLB_STORE_MISSES.WALK_COMPLETED */
        },
        [ C(OP_PREFETCH) ] = {
                [ C(RESULT_ACCESS) ] = 0x0,
index 758c1aa..44ec523 100644 (file)
@@ -1170,7 +1170,7 @@ static int uncore_event_cpu_online(unsigned int cpu)
                pmu = type->pmus;
                for (i = 0; i < type->num_boxes; i++, pmu++) {
                        box = pmu->boxes[pkg];
-                       if (!box && atomic_inc_return(&box->refcnt) == 1)
+                       if (box && atomic_inc_return(&box->refcnt) == 1)
                                uncore_box_init(box);
                }
        }
index b8ad261..c66d19e 100644 (file)
@@ -29,6 +29,7 @@ struct pt_regs;
        } while (0)
 
 extern int fixup_exception(struct pt_regs *regs, int trapnr);
+extern int fixup_bug(struct pt_regs *regs, int trapnr);
 extern bool ex_has_fault_handler(unsigned long ip);
 extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
 
index 0559626..722d0e5 100644 (file)
@@ -296,6 +296,7 @@ struct x86_emulate_ctxt {
 
        bool perm_ok; /* do not check permissions if true */
        bool ud;        /* inject an #UD if host doesn't support insn */
+       bool tf;        /* TF value before instruction (after for syscall/sysret) */
 
        bool have_exception;
        struct x86_exception exception;
index fba1007..d5acc27 100644 (file)
@@ -2,8 +2,7 @@
 #define _ASM_X86_MSHYPER_H
 
 #include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
+#include <linux/atomic.h>
 #include <asm/hyperv.h>
 
 /*
index 3cada99..a28b671 100644 (file)
@@ -860,8 +860,6 @@ extern unsigned long KSTK_ESP(struct task_struct *task);
 
 #endif /* CONFIG_X86_64 */
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
                                               unsigned long new_sp);
 
index 4b99423..3c7c419 100644 (file)
@@ -29,6 +29,7 @@ OBJECT_FILES_NON_STANDARD_head_$(BITS).o              := y
 OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o    := y
 OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o             := y
 OBJECT_FILES_NON_STANDARD_test_nx.o                    := y
+OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o     := y
 
 # If instrumentation of this dir is enabled, boot hangs during first second.
 # Probably could be more selective here, but note that files related to irqs,
index 26b78d8..85a9e17 100644 (file)
@@ -1,3 +1,5 @@
+OBJECT_FILES_NON_STANDARD_wakeup_$(BITS).o := y
+
 obj-$(CONFIG_ACPI)             += boot.o
 obj-$(CONFIG_ACPI_SLEEP)       += sleep.o wakeup_$(BITS).o
 obj-$(CONFIG_ACPI_APEI)                += apei.o
index f5af0cc..9257bd9 100644 (file)
@@ -856,11 +856,13 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
        dentry = kernfs_mount(fs_type, flags, rdt_root,
                              RDTGROUP_SUPER_MAGIC, NULL);
        if (IS_ERR(dentry))
-               goto out_cdp;
+               goto out_destroy;
 
        static_branch_enable(&rdt_enable_key);
        goto out;
 
+out_destroy:
+       kernfs_remove(kn_info);
 out_cdp:
        cdp_disable();
 out:
index 901c640..69ea0bc 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/kdebug.h>
 #include <linux/kallsyms.h>
 #include <linux/ftrace.h>
+#include <linux/frame.h>
 
 #include <asm/text-patching.h>
 #include <asm/cacheflush.h>
@@ -94,6 +95,7 @@ static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
 }
 
 asm (
+                       "optprobe_template_func:\n"
                        ".global optprobe_template_entry\n"
                        "optprobe_template_entry:\n"
 #ifdef CONFIG_X86_64
@@ -131,7 +133,12 @@ asm (
                        "       popf\n"
 #endif
                        ".global optprobe_template_end\n"
-                       "optprobe_template_end:\n");
+                       "optprobe_template_end:\n"
+                       ".type optprobe_template_func, @function\n"
+                       ".size optprobe_template_func, .-optprobe_template_func\n");
+
+void optprobe_template_func(void);
+STACK_FRAME_NON_STANDARD(optprobe_template_func);
 
 #define TMPL_MOVE_IDX \
        ((long)&optprobe_template_val - (long)&optprobe_template_entry)
index 0bb8842..3ca1980 100644 (file)
@@ -545,17 +545,6 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 }
 
 /*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       struct inactive_task_frame *frame =
-               (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
-       return READ_ONCE_NOCHECK(frame->ret_addr);
-}
-
-/*
  * Called from fs/proc with a reference on @p to find the function
  * which called into schedule(). This needs to be done carefully
  * because the task might wake up and we might look at a stack
index 2544700..67393fc 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/sched.h>
 #include <linux/tboot.h>
 #include <linux/delay.h>
+#include <linux/frame.h>
 #include <acpi/reboot.h>
 #include <asm/io.h>
 #include <asm/apic.h>
@@ -123,6 +124,7 @@ void __noreturn machine_real_restart(unsigned int type)
 #ifdef CONFIG_APM_MODULE
 EXPORT_SYMBOL(machine_real_restart);
 #endif
+STACK_FRAME_NON_STANDARD(machine_real_restart);
 
 /*
  * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
index 207b8f2..213ddf3 100644 (file)
@@ -144,7 +144,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
                addr = PAGE_ALIGN(addr);
                vma = find_vma(mm, addr);
                if (end - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
 
@@ -187,7 +187,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                addr = PAGE_ALIGN(addr);
                vma = find_vma(mm, addr);
                if (TASK_SIZE - len >= addr &&
-                               (!vma || addr + len <= vma->vm_start))
+                               (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
 
index 4b17240..a4eb279 100644 (file)
@@ -514,7 +514,7 @@ int tboot_force_iommu(void)
        if (!tboot_enabled())
                return 0;
 
-       if (!intel_iommu_tboot_noforce)
+       if (intel_iommu_tboot_noforce)
                return 1;
 
        if (no_iommu || swiotlb || dmar_disabled)
index 3995d3a..bf54309 100644 (file)
@@ -182,7 +182,7 @@ int is_valid_bugaddr(unsigned long addr)
        return ud == INSN_UD0 || ud == INSN_UD2;
 }
 
-static int fixup_bug(struct pt_regs *regs, int trapnr)
+int fixup_bug(struct pt_regs *regs, int trapnr)
 {
        if (trapnr != X86_TRAP_UD)
                return 0;
index 0816ab2..80890de 100644 (file)
@@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
                ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
        }
 
+       ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
        return X86EMUL_CONTINUE;
 }
 
index ba9891a..33460fc 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/slab.h>
 #include <linux/amd-iommu.h>
 #include <linux/hashtable.h>
+#include <linux/frame.h>
 
 #include <asm/apic.h>
 #include <asm/perf_event.h>
@@ -4906,6 +4907,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
        mark_all_clean(svm->vmcb);
 }
+STACK_FRAME_NON_STANDARD(svm_vcpu_run);
 
 static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 {
index ca5d2b9..1b469b6 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/slab.h>
 #include <linux/tboot.h>
 #include <linux/hrtimer.h>
+#include <linux/frame.h>
 #include "kvm_cache_regs.h"
 #include "x86.h"
 
@@ -8652,6 +8653,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
                        );
        }
 }
+STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
 
 static bool vmx_has_high_real_mode_segbase(void)
 {
@@ -9028,6 +9030,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
        vmx_recover_nmi_blocking(vmx);
        vmx_complete_interrupts(vmx);
 }
+STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
 
 static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
 {
index 87d3cb9..0e846f0 100644 (file)
@@ -5313,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
        kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
        ctxt->eflags = kvm_get_rflags(vcpu);
+       ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
        ctxt->eip = kvm_rip_read(vcpu);
        ctxt->mode = (!is_protmode(vcpu))               ? X86EMUL_MODE_REAL :
                     (ctxt->eflags & X86_EFLAGS_VM)     ? X86EMUL_MODE_VM86 :
@@ -5528,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
        return dr6;
 }
 
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
 {
        struct kvm_run *kvm_run = vcpu->run;
 
-       /*
-        * rflags is the old, "raw" value of the flags.  The new value has
-        * not been saved yet.
-        *
-        * This is correct even for TF set by the guest, because "the
-        * processor will not generate this exception after the instruction
-        * that sets the TF flag".
-        */
-       if (unlikely(rflags & X86_EFLAGS_TF)) {
-               if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
-                       kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
-                                                 DR6_RTM;
-                       kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
-                       kvm_run->debug.arch.exception = DB_VECTOR;
-                       kvm_run->exit_reason = KVM_EXIT_DEBUG;
-                       *r = EMULATE_USER_EXIT;
-               } else {
-                       /*
-                        * "Certain debug exceptions may clear bit 0-3.  The
-                        * remaining contents of the DR6 register are never
-                        * cleared by the processor".
-                        */
-                       vcpu->arch.dr6 &= ~15;
-                       vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
-                       kvm_queue_exception(vcpu, DB_VECTOR);
-               }
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+               kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+               kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+               kvm_run->debug.arch.exception = DB_VECTOR;
+               kvm_run->exit_reason = KVM_EXIT_DEBUG;
+               *r = EMULATE_USER_EXIT;
+       } else {
+               /*
+                * "Certain debug exceptions may clear bit 0-3.  The
+                * remaining contents of the DR6 register are never
+                * cleared by the processor".
+                */
+               vcpu->arch.dr6 &= ~15;
+               vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
+               kvm_queue_exception(vcpu, DB_VECTOR);
        }
 }
 
@@ -5567,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
        int r = EMULATE_DONE;
 
        kvm_x86_ops->skip_emulated_instruction(vcpu);
-       kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+
+       /*
+        * rflags is the old, "raw" value of the flags.  The new value has
+        * not been saved yet.
+        *
+        * This is correct even for TF set by the guest, because "the
+        * processor will not generate this exception after the instruction
+        * that sets the TF flag".
+        */
+       if (unlikely(rflags & X86_EFLAGS_TF))
+               kvm_vcpu_do_singlestep(vcpu, &r);
        return r == EMULATE_DONE;
 }
 EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
@@ -5726,8 +5727,9 @@ restart:
                toggle_interruptibility(vcpu, ctxt->interruptibility);
                vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
                kvm_rip_write(vcpu, ctxt->eip);
-               if (r == EMULATE_DONE)
-                       kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+               if (r == EMULATE_DONE &&
+                   (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+                       kvm_vcpu_do_singlestep(vcpu, &r);
                if (!ctxt->have_exception ||
                    exception_type(ctxt->exception.vector) == EXCPT_TRAP)
                        __kvm_set_rflags(vcpu, ctxt->eflags);
index c815564..10ffa7e 100644 (file)
 .macro op_safe_regs op
 ENTRY(\op\()_safe_regs)
        pushq %rbx
-       pushq %rbp
+       pushq %r12
        movq    %rdi, %r10      /* Save pointer */
        xorl    %r11d, %r11d    /* Return value */
        movl    (%rdi), %eax
        movl    4(%rdi), %ecx
        movl    8(%rdi), %edx
        movl    12(%rdi), %ebx
-       movl    20(%rdi), %ebp
+       movl    20(%rdi), %r12d
        movl    24(%rdi), %esi
        movl    28(%rdi), %edi
 1:     \op
@@ -29,10 +29,10 @@ ENTRY(\op\()_safe_regs)
        movl    %ecx, 4(%r10)
        movl    %edx, 8(%r10)
        movl    %ebx, 12(%r10)
-       movl    %ebp, 20(%r10)
+       movl    %r12d, 20(%r10)
        movl    %esi, 24(%r10)
        movl    %edi, 28(%r10)
-       popq %rbp
+       popq %r12
        popq %rbx
        ret
 3:
index 35ea061..0ea8afc 100644 (file)
@@ -162,6 +162,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
        if (fixup_exception(regs, trapnr))
                return;
 
+       if (fixup_bug(regs, trapnr))
+               return;
+
 fail:
        early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
                     (unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
index 302f43f..adad702 100644 (file)
@@ -148,7 +148,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
                addr = ALIGN(addr, huge_page_size(h));
                vma = find_vma(mm, addr);
                if (TASK_SIZE - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
        if (mm->get_unmapped_area == arch_get_unmapped_area)
index cbc87ea..9b3f9fa 100644 (file)
@@ -161,16 +161,16 @@ static int page_size_mask;
 
 static void __init probe_page_size_mask(void)
 {
-#if !defined(CONFIG_KMEMCHECK)
        /*
         * For CONFIG_KMEMCHECK or pagealloc debugging, identity mapping will
         * use small pages.
         * This will simplify cpa(), which otherwise needs to support splitting
         * large pages into small in interrupt context, etc.
         */
-       if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled())
+       if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled() && !IS_ENABLED(CONFIG_KMEMCHECK))
                page_size_mask |= 1 << PG_LEVEL_2M;
-#endif
+       else
+               direct_gbpages = 0;
 
        /* Enable PSE if available */
        if (boot_cpu_has(X86_FEATURE_PSE))
index 95651dc..0a59daf 100644 (file)
@@ -990,7 +990,13 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
 
                pud_base = pud_offset(p4d, 0);
                remove_pud_table(pud_base, addr, next, direct);
-               free_pud_table(pud_base, p4d);
+               /*
+                * For 4-level page tables we do not want to free PUDs, but in the
+                * 5-level case we should free them. This code will have to change
+                * to adapt for boot-time switching between 4 and 5 level page tables.
+                */
+               if (CONFIG_PGTABLE_LEVELS == 5)
+                       free_pud_table(pud_base, p4d);
        }
 
        if (direct)
index 90568c3..fefb4b6 100644 (file)
@@ -1,4 +1,6 @@
 #
 # Arch-specific network modules
 #
+OBJECT_FILES_NON_STANDARD_bpf_jit.o += y
+
 obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
index f1d83b3..2f56e1e 100644 (file)
@@ -1,4 +1,5 @@
 OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y
 
 obj-$(CONFIG_EFI)              += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
 obj-$(CONFIG_EARLY_PRINTK_EFI) += early_printk.o
index a6a198c..0504187 100644 (file)
@@ -1,3 +1,5 @@
+OBJECT_FILES_NON_STANDARD_hibernate_asm_$(BITS).o := y
+
 # __restore_processor_state() restores %gs after S3 resume and so should not
 # itself be stack-protected
 nostackp := $(call cc-option, -fno-stack-protector)
index fffb0a1..bced7a3 100644 (file)
@@ -1,3 +1,6 @@
+OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_xen-pvh.o := y
+
 ifdef CONFIG_FUNCTION_TRACER
 # Do not profile debug and lowlevel utilities
 CFLAGS_REMOVE_spinlock.o = -pg
index f71f88e..19707db 100644 (file)
@@ -29,7 +29,8 @@ static inline void variant_irq_disable(unsigned int irq) { }
 # define PLATFORM_NR_IRQS 0
 #endif
 #define XTENSA_NR_IRQS XCHAL_NUM_INTERRUPTS
-#define NR_IRQS (XTENSA_NR_IRQS + VARIANT_NR_IRQS + PLATFORM_NR_IRQS)
+#define NR_IRQS (XTENSA_NR_IRQS + VARIANT_NR_IRQS + PLATFORM_NR_IRQS + 1)
+#define XTENSA_PIC_LINUX_IRQ(hwirq) ((hwirq) + 1)
 
 #if VARIANT_NR_IRQS == 0
 static inline void variant_init_irq(void) { }
index 003eeee..30ee8c6 100644 (file)
@@ -213,8 +213,6 @@ struct mm_struct;
 #define release_segments(mm)   do { } while(0)
 #define forget_segments()      do { } while (0)
 
-#define thread_saved_pc(tsk)   (task_pt_regs(tsk)->pc)
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk)          (task_pt_regs(tsk)->pc)
index a265edd..9934102 100644 (file)
@@ -34,11 +34,6 @@ asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
 {
        int irq = irq_find_mapping(NULL, hwirq);
 
-       if (hwirq >= NR_IRQS) {
-               printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
-                               __func__, hwirq);
-       }
-
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
        /* Debugging check for stack overflow: is there less than 1KB free? */
        {
index 394ef08..33bfa52 100644 (file)
@@ -593,8 +593,7 @@ c_show(struct seq_file *f, void *slot)
                      (ccount_freq/10000) % 100,
                      loops_per_jiffy/(500000/HZ),
                      (loops_per_jiffy/(5000/HZ)) % 100);
-
-       seq_printf(f,"flags\t\t: "
+       seq_puts(f, "flags\t\t: "
 #if XCHAL_HAVE_NMI
                     "nmi "
 #endif
index 0693792..74afbf0 100644 (file)
@@ -88,7 +88,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
                /* At this point:  (!vmm || addr < vmm->vm_end). */
                if (TASK_SIZE - len < addr)
                        return -ENOMEM;
-               if (!vmm || addr + len <= vmm->vm_start)
+               if (!vmm || addr + len <= vm_start_gap(vmm))
                        return addr;
                addr = vmm->vm_end;
                if (flags & MAP_SHARED)
index 30d9fc2..162c77e 100644 (file)
@@ -118,7 +118,7 @@ SECTIONS
   SECTION_VECTOR (.KernelExceptionVector.text, KERNEL_VECTOR_VADDR)
   SECTION_VECTOR (.UserExceptionVector.literal, USER_VECTOR_VADDR - 4)
   SECTION_VECTOR (.UserExceptionVector.text, USER_VECTOR_VADDR)
-  SECTION_VECTOR (.DoubleExceptionVector.literal, DOUBLEEXC_VECTOR_VADDR - 48)
+  SECTION_VECTOR (.DoubleExceptionVector.literal, DOUBLEEXC_VECTOR_VADDR - 20)
   SECTION_VECTOR (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR)
 #endif
 
@@ -306,13 +306,13 @@ SECTIONS
                  .UserExceptionVector.literal)
   SECTION_VECTOR (_DoubleExceptionVector_literal,
                  .DoubleExceptionVector.literal,
-                 DOUBLEEXC_VECTOR_VADDR - 48,
+                 DOUBLEEXC_VECTOR_VADDR - 20,
                  SIZEOF(.UserExceptionVector.text),
                  .UserExceptionVector.text)
   SECTION_VECTOR (_DoubleExceptionVector_text,
                  .DoubleExceptionVector.text,
                  DOUBLEEXC_VECTOR_VADDR,
-                 48,
+                 20,
                  .DoubleExceptionVector.literal)
 
   . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3;
index 02e94bb..c45b90b 100644 (file)
@@ -317,8 +317,7 @@ static int __init simdisk_init(void)
        if (simdisk_count > MAX_SIMDISK_COUNT)
                simdisk_count = MAX_SIMDISK_COUNT;
 
-       sddev = kmalloc(simdisk_count * sizeof(struct simdisk),
-                       GFP_KERNEL);
+       sddev = kmalloc_array(simdisk_count, sizeof(*sddev), GFP_KERNEL);
        if (sddev == NULL)
                goto out_unregister;
 
index dbeea2b..1fda7e2 100644 (file)
 
 /* Interrupt configuration. */
 
-#define PLATFORM_NR_IRQS       10
+#define PLATFORM_NR_IRQS       0
 
 /* Default assignment of LX60 devices to external interrupts. */
 
 #ifdef CONFIG_XTENSA_MX
 #define DUART16552_INTNUM      XCHAL_EXTINT3_NUM
 #define OETH_IRQ               XCHAL_EXTINT4_NUM
+#define C67X00_IRQ             XCHAL_EXTINT8_NUM
 #else
 #define DUART16552_INTNUM      XCHAL_EXTINT0_NUM
 #define OETH_IRQ               XCHAL_EXTINT1_NUM
+#define C67X00_IRQ             XCHAL_EXTINT5_NUM
 #endif
 
 /*
@@ -63,5 +65,5 @@
 
 #define C67X00_PADDR           (XCHAL_KIO_PADDR + 0x0D0D0000)
 #define C67X00_SIZE            0x10
-#define C67X00_IRQ             5
+
 #endif /* __XTENSA_XTAVNET_HARDWARE_H */
index 779be72..42285f3 100644 (file)
@@ -175,8 +175,8 @@ static struct resource ethoc_res[] = {
                .flags = IORESOURCE_MEM,
        },
        [2] = { /* IRQ number */
-               .start = OETH_IRQ,
-               .end   = OETH_IRQ,
+               .start = XTENSA_PIC_LINUX_IRQ(OETH_IRQ),
+               .end   = XTENSA_PIC_LINUX_IRQ(OETH_IRQ),
                .flags = IORESOURCE_IRQ,
        },
 };
@@ -213,8 +213,8 @@ static struct resource c67x00_res[] = {
                .flags = IORESOURCE_MEM,
        },
        [1] = { /* IRQ number */
-               .start = C67X00_IRQ,
-               .end   = C67X00_IRQ,
+               .start = XTENSA_PIC_LINUX_IRQ(C67X00_IRQ),
+               .end   = XTENSA_PIC_LINUX_IRQ(C67X00_IRQ),
                .flags = IORESOURCE_IRQ,
        },
 };
@@ -247,7 +247,7 @@ static struct resource serial_resource = {
 static struct plat_serial8250_port serial_platform_data[] = {
        [0] = {
                .mapbase        = DUART16552_PADDR,
-               .irq            = DUART16552_INTNUM,
+               .irq            = XTENSA_PIC_LINUX_IRQ(DUART16552_INTNUM),
                .flags          = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
                                  UPF_IOREMAP,
                .iotype         = XCHAL_HAVE_BE ? UPIO_MEM32BE : UPIO_MEM32,
index 6ebcef2..43c7116 100644 (file)
@@ -533,6 +533,7 @@ ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
        case 3:
                if (newline != '\n')
                        return -EINVAL;
+               /* fall through */
        case 2:
                if (length <= 0)
                        return -EINVAL;
index ed93da2..12bbc6b 100644 (file)
@@ -725,8 +725,12 @@ static void bfq_updated_next_req(struct bfq_data *bfqd,
 }
 
 static void
-bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
+                     struct bfq_io_cq *bic, bool bfq_already_existing)
 {
+       unsigned int old_wr_coeff = bfqq->wr_coeff;
+       bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq);
+
        if (bic->saved_idle_window)
                bfq_mark_bfqq_idle_window(bfqq);
        else
@@ -754,6 +758,14 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
 
        /* make sure weight will be updated, however we got here */
        bfqq->entity.prio_changed = 1;
+
+       if (likely(!busy))
+               return;
+
+       if (old_wr_coeff == 1 && bfqq->wr_coeff > 1)
+               bfqd->wr_busy_queues++;
+       else if (old_wr_coeff > 1 && bfqq->wr_coeff == 1)
+               bfqd->wr_busy_queues--;
 }
 
 static int bfqq_process_refs(struct bfq_queue *bfqq)
@@ -4290,10 +4302,16 @@ static void bfq_put_rq_priv_body(struct bfq_queue *bfqq)
        bfq_put_queue(bfqq);
 }
 
-static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
+static void bfq_finish_request(struct request *rq)
 {
-       struct bfq_queue *bfqq = RQ_BFQQ(rq);
-       struct bfq_data *bfqd = bfqq->bfqd;
+       struct bfq_queue *bfqq;
+       struct bfq_data *bfqd;
+
+       if (!rq->elv.icq)
+               return;
+
+       bfqq = RQ_BFQQ(rq);
+       bfqd = bfqq->bfqd;
 
        if (rq->rq_flags & RQF_STARTED)
                bfqg_stats_update_completion(bfqq_group(bfqq),
@@ -4324,7 +4342,7 @@ static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
                 */
 
                if (!RB_EMPTY_NODE(&rq->rb_node))
-                       bfq_remove_request(q, rq);
+                       bfq_remove_request(rq->q, rq);
                bfq_put_rq_priv_body(bfqq);
        }
 
@@ -4394,20 +4412,21 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
 /*
  * Allocate bfq data structures associated with this request.
  */
-static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
-                             struct bio *bio)
+static void bfq_prepare_request(struct request *rq, struct bio *bio)
 {
+       struct request_queue *q = rq->q;
        struct bfq_data *bfqd = q->elevator->elevator_data;
-       struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq);
+       struct bfq_io_cq *bic;
        const int is_sync = rq_is_sync(rq);
        struct bfq_queue *bfqq;
        bool new_queue = false;
-       bool split = false;
+       bool bfqq_already_existing = false, split = false;
 
-       spin_lock_irq(&bfqd->lock);
+       if (!rq->elv.icq)
+               return;
+       bic = icq_to_bic(rq->elv.icq);
 
-       if (!bic)
-               goto queue_fail;
+       spin_lock_irq(&bfqd->lock);
 
        bfq_check_ioprio_change(bic, bio);
 
@@ -4432,6 +4451,8 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
                                bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio,
                                                                 true, is_sync,
                                                                 NULL);
+                       else
+                               bfqq_already_existing = true;
                }
        }
 
@@ -4457,7 +4478,8 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
                         * queue: restore the idle window and the
                         * possible weight raising period.
                         */
-                       bfq_bfqq_resume_state(bfqq, bic);
+                       bfq_bfqq_resume_state(bfqq, bfqd, bic,
+                                             bfqq_already_existing);
                }
        }
 
@@ -4465,13 +4487,6 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
                bfq_handle_burst(bfqd, bfqq);
 
        spin_unlock_irq(&bfqd->lock);
-
-       return 0;
-
-queue_fail:
-       spin_unlock_irq(&bfqd->lock);
-
-       return 1;
 }
 
 static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
@@ -4950,8 +4965,8 @@ static struct elv_fs_entry bfq_attrs[] = {
 
 static struct elevator_type iosched_bfq_mq = {
        .ops.mq = {
-               .get_rq_priv            = bfq_get_rq_private,
-               .put_rq_priv            = bfq_put_rq_private,
+               .prepare_request        = bfq_prepare_request,
+               .finish_request         = bfq_finish_request,
                .exit_icq               = bfq_exit_icq,
                .insert_requests        = bfq_insert_requests,
                .dispatch_request       = bfq_dispatch_request,
index b5009a8..b8a3a65 100644 (file)
@@ -224,7 +224,7 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
  * @bio:       bio to generate/verify integrity metadata for
  * @proc_fn:   Pointer to the relevant processing function
  */
-static int bio_integrity_process(struct bio *bio,
+static blk_status_t bio_integrity_process(struct bio *bio,
                                 integrity_processing_fn *proc_fn)
 {
        struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
@@ -232,7 +232,7 @@ static int bio_integrity_process(struct bio *bio,
        struct bvec_iter bviter;
        struct bio_vec bv;
        struct bio_integrity_payload *bip = bio_integrity(bio);
-       unsigned int ret = 0;
+       blk_status_t ret = BLK_STS_OK;
        void *prot_buf = page_address(bip->bip_vec->bv_page) +
                bip->bip_vec->bv_offset;
 
@@ -369,7 +369,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
        struct bio *bio = bip->bip_bio;
        struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 
-       bio->bi_error = bio_integrity_process(bio, bi->profile->verify_fn);
+       bio->bi_status = bio_integrity_process(bio, bi->profile->verify_fn);
 
        /* Restore original bio completion handler */
        bio->bi_end_io = bip->bip_end_io;
@@ -398,7 +398,7 @@ void bio_integrity_endio(struct bio *bio)
         * integrity metadata.  Restore original bio end_io handler
         * and run it.
         */
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                bio->bi_end_io = bip->bip_end_io;
                bio_endio(bio);
 
index 888e780..1cfcd0d 100644 (file)
@@ -240,20 +240,21 @@ fallback:
        return bvl;
 }
 
-static void __bio_free(struct bio *bio)
+void bio_uninit(struct bio *bio)
 {
        bio_disassociate_task(bio);
 
        if (bio_integrity(bio))
                bio_integrity_free(bio);
 }
+EXPORT_SYMBOL(bio_uninit);
 
 static void bio_free(struct bio *bio)
 {
        struct bio_set *bs = bio->bi_pool;
        void *p;
 
-       __bio_free(bio);
+       bio_uninit(bio);
 
        if (bs) {
                bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
@@ -271,6 +272,11 @@ static void bio_free(struct bio *bio)
        }
 }
 
+/*
+ * Users of this function have their own bio allocation. Subsequently,
+ * they must remember to pair any call to bio_init() with bio_uninit()
+ * when IO has completed, or when the bio is released.
+ */
 void bio_init(struct bio *bio, struct bio_vec *table,
              unsigned short max_vecs)
 {
@@ -297,7 +303,7 @@ void bio_reset(struct bio *bio)
 {
        unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
 
-       __bio_free(bio);
+       bio_uninit(bio);
 
        memset(bio, 0, BIO_RESET_BYTES);
        bio->bi_flags = flags;
@@ -309,8 +315,8 @@ static struct bio *__bio_chain_endio(struct bio *bio)
 {
        struct bio *parent = bio->bi_private;
 
-       if (!parent->bi_error)
-               parent->bi_error = bio->bi_error;
+       if (!parent->bi_status)
+               parent->bi_status = bio->bi_status;
        bio_put(bio);
        return parent;
 }
@@ -363,6 +369,8 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
        struct bio_list punt, nopunt;
        struct bio *bio;
 
+       if (WARN_ON_ONCE(!bs->rescue_workqueue))
+               return;
        /*
         * In order to guarantee forward progress we must punt only bios that
         * were allocated from this bio_set; otherwise, if there was a bio on
@@ -474,7 +482,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
 
                if (current->bio_list &&
                    (!bio_list_empty(&current->bio_list[0]) ||
-                    !bio_list_empty(&current->bio_list[1])))
+                    !bio_list_empty(&current->bio_list[1])) &&
+                   bs->rescue_workqueue)
                        gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 
                p = mempool_alloc(bs->bio_pool, gfp_mask);
@@ -544,7 +553,7 @@ EXPORT_SYMBOL(zero_fill_bio);
  *
  * Description:
  *   Put a reference to a &struct bio, either one you have gotten with
- *   bio_alloc, bio_get or bio_clone. The last put of a bio will free it.
+ *   bio_alloc, bio_get or bio_clone_*. The last put of a bio will free it.
  **/
 void bio_put(struct bio *bio)
 {
@@ -593,6 +602,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
        bio->bi_bdev = bio_src->bi_bdev;
        bio_set_flag(bio, BIO_CLONED);
        bio->bi_opf = bio_src->bi_opf;
+       bio->bi_write_hint = bio_src->bi_write_hint;
        bio->bi_iter = bio_src->bi_iter;
        bio->bi_io_vec = bio_src->bi_io_vec;
 
@@ -676,6 +686,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
                return NULL;
        bio->bi_bdev            = bio_src->bi_bdev;
        bio->bi_opf             = bio_src->bi_opf;
+       bio->bi_write_hint      = bio_src->bi_write_hint;
        bio->bi_iter.bi_sector  = bio_src->bi_iter.bi_sector;
        bio->bi_iter.bi_size    = bio_src->bi_iter.bi_size;
 
@@ -918,7 +929,7 @@ static void submit_bio_wait_endio(struct bio *bio)
 {
        struct submit_bio_ret *ret = bio->bi_private;
 
-       ret->error = bio->bi_error;
+       ret->error = blk_status_to_errno(bio->bi_status);
        complete(&ret->event);
 }
 
@@ -1817,8 +1828,8 @@ again:
        }
 
        if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
-               trace_block_bio_complete(bdev_get_queue(bio->bi_bdev),
-                                        bio, bio->bi_error);
+               trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio,
+                                        blk_status_to_errno(bio->bi_status));
                bio_clear_flag(bio, BIO_TRACE_COMPLETION);
        }
 
@@ -1921,9 +1932,29 @@ void bioset_free(struct bio_set *bs)
 }
 EXPORT_SYMBOL(bioset_free);
 
-static struct bio_set *__bioset_create(unsigned int pool_size,
-                                      unsigned int front_pad,
-                                      bool create_bvec_pool)
+/**
+ * bioset_create  - Create a bio_set
+ * @pool_size: Number of bio and bio_vecs to cache in the mempool
+ * @front_pad: Number of bytes to allocate in front of the returned bio
+ * @flags:     Flags to modify behavior, currently %BIOSET_NEED_BVECS
+ *              and %BIOSET_NEED_RESCUER
+ *
+ * Description:
+ *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
+ *    to ask for a number of bytes to be allocated in front of the bio.
+ *    Front pad allocation is useful for embedding the bio inside
+ *    another structure, to avoid allocating extra data to go with the bio.
+ *    Note that the bio must be embedded at the END of that structure always,
+ *    or things will break badly.
+ *    If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
+ *    for allocating iovecs.  This pool is not needed e.g. for bio_clone_fast().
+ *    If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used to
+ *    dispatch queued requests when the mempool runs out of space.
+ *
+ */
+struct bio_set *bioset_create(unsigned int pool_size,
+                             unsigned int front_pad,
+                             int flags)
 {
        unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
        struct bio_set *bs;
@@ -1948,12 +1979,15 @@ static struct bio_set *__bioset_create(unsigned int pool_size,
        if (!bs->bio_pool)
                goto bad;
 
-       if (create_bvec_pool) {
+       if (flags & BIOSET_NEED_BVECS) {
                bs->bvec_pool = biovec_create_pool(pool_size);
                if (!bs->bvec_pool)
                        goto bad;
        }
 
+       if (!(flags & BIOSET_NEED_RESCUER))
+               return bs;
+
        bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
        if (!bs->rescue_workqueue)
                goto bad;
@@ -1963,41 +1997,8 @@ bad:
        bioset_free(bs);
        return NULL;
 }
-
-/**
- * bioset_create  - Create a bio_set
- * @pool_size: Number of bio and bio_vecs to cache in the mempool
- * @front_pad: Number of bytes to allocate in front of the returned bio
- *
- * Description:
- *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
- *    to ask for a number of bytes to be allocated in front of the bio.
- *    Front pad allocation is useful for embedding the bio inside
- *    another structure, to avoid allocating extra data to go with the bio.
- *    Note that the bio must be embedded at the END of that structure always,
- *    or things will break badly.
- */
-struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
-{
-       return __bioset_create(pool_size, front_pad, true);
-}
 EXPORT_SYMBOL(bioset_create);
 
-/**
- * bioset_create_nobvec  - Create a bio_set without bio_vec mempool
- * @pool_size: Number of bio to cache in the mempool
- * @front_pad: Number of bytes to allocate in front of the returned bio
- *
- * Description:
- *    Same functionality as bioset_create() except that mempool is not
- *    created for bio_vecs. Saving some memory for bio_clone_fast() users.
- */
-struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
-{
-       return __bioset_create(pool_size, front_pad, false);
-}
-EXPORT_SYMBOL(bioset_create_nobvec);
-
 #ifdef CONFIG_BLK_CGROUP
 
 /**
@@ -2112,7 +2113,7 @@ static int __init init_bio(void)
        bio_integrity_init();
        biovec_init_slabs();
 
-       fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
+       fs_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
        if (!fs_bio_set)
                panic("bio: can't allocate bios\n");
 
index a7421b7..af393d5 100644 (file)
@@ -129,11 +129,70 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 }
 EXPORT_SYMBOL(blk_rq_init);
 
+static const struct {
+       int             errno;
+       const char      *name;
+} blk_errors[] = {
+       [BLK_STS_OK]            = { 0,          "" },
+       [BLK_STS_NOTSUPP]       = { -EOPNOTSUPP, "operation not supported" },
+       [BLK_STS_TIMEOUT]       = { -ETIMEDOUT, "timeout" },
+       [BLK_STS_NOSPC]         = { -ENOSPC,    "critical space allocation" },
+       [BLK_STS_TRANSPORT]     = { -ENOLINK,   "recoverable transport" },
+       [BLK_STS_TARGET]        = { -EREMOTEIO, "critical target" },
+       [BLK_STS_NEXUS]         = { -EBADE,     "critical nexus" },
+       [BLK_STS_MEDIUM]        = { -ENODATA,   "critical medium" },
+       [BLK_STS_PROTECTION]    = { -EILSEQ,    "protection" },
+       [BLK_STS_RESOURCE]      = { -ENOMEM,    "kernel resource" },
+       [BLK_STS_AGAIN]         = { -EAGAIN,    "nonblocking retry" },
+
+       /* device mapper special case, should not leak out: */
+       [BLK_STS_DM_REQUEUE]    = { -EREMCHG, "dm internal retry" },
+
+       /* everything else not covered above: */
+       [BLK_STS_IOERR]         = { -EIO,       "I/O" },
+};
+
+blk_status_t errno_to_blk_status(int errno)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(blk_errors); i++) {
+               if (blk_errors[i].errno == errno)
+                       return (__force blk_status_t)i;
+       }
+
+       return BLK_STS_IOERR;
+}
+EXPORT_SYMBOL_GPL(errno_to_blk_status);
+
+int blk_status_to_errno(blk_status_t status)
+{
+       int idx = (__force int)status;
+
+       if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
+               return -EIO;
+       return blk_errors[idx].errno;
+}
+EXPORT_SYMBOL_GPL(blk_status_to_errno);
+
+static void print_req_error(struct request *req, blk_status_t status)
+{
+       int idx = (__force int)status;
+
+       if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
+               return;
+
+       printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
+                          __func__, blk_errors[idx].name, req->rq_disk ?
+                          req->rq_disk->disk_name : "?",
+                          (unsigned long long)blk_rq_pos(req));
+}
+
 static void req_bio_endio(struct request *rq, struct bio *bio,
-                         unsigned int nbytes, int error)
+                         unsigned int nbytes, blk_status_t error)
 {
        if (error)
-               bio->bi_error = error;
+               bio->bi_status = error;
 
        if (unlikely(rq->rq_flags & RQF_QUIET))
                bio_set_flag(bio, BIO_QUIET);
@@ -177,10 +236,13 @@ static void blk_delay_work(struct work_struct *work)
  * Description:
  *   Sometimes queueing needs to be postponed for a little while, to allow
  *   resources to come back. This function will make sure that queueing is
- *   restarted around the specified time. Queue lock must be held.
+ *   restarted around the specified time.
  */
 void blk_delay_queue(struct request_queue *q, unsigned long msecs)
 {
+       lockdep_assert_held(q->queue_lock);
+       WARN_ON_ONCE(q->mq_ops);
+
        if (likely(!blk_queue_dead(q)))
                queue_delayed_work(kblockd_workqueue, &q->delay_work,
                                   msecs_to_jiffies(msecs));
@@ -198,6 +260,9 @@ EXPORT_SYMBOL(blk_delay_queue);
  **/
 void blk_start_queue_async(struct request_queue *q)
 {
+       lockdep_assert_held(q->queue_lock);
+       WARN_ON_ONCE(q->mq_ops);
+
        queue_flag_clear(QUEUE_FLAG_STOPPED, q);
        blk_run_queue_async(q);
 }
@@ -210,11 +275,13 @@ EXPORT_SYMBOL(blk_start_queue_async);
  * Description:
  *   blk_start_queue() will clear the stop flag on the queue, and call
  *   the request_fn for the queue if it was in a stopped state when
- *   entered. Also see blk_stop_queue(). Queue lock must be held.
+ *   entered. Also see blk_stop_queue().
  **/
 void blk_start_queue(struct request_queue *q)
 {
+       lockdep_assert_held(q->queue_lock);
        WARN_ON(!irqs_disabled());
+       WARN_ON_ONCE(q->mq_ops);
 
        queue_flag_clear(QUEUE_FLAG_STOPPED, q);
        __blk_run_queue(q);
@@ -233,10 +300,13 @@ EXPORT_SYMBOL(blk_start_queue);
  *   or if it simply chooses not to queue more I/O at one point, it can
  *   call this function to prevent the request_fn from being called until
  *   the driver has signalled it's ready to go again. This happens by calling
- *   blk_start_queue() to restart queue operations. Queue lock must be held.
+ *   blk_start_queue() to restart queue operations.
  **/
 void blk_stop_queue(struct request_queue *q)
 {
+       lockdep_assert_held(q->queue_lock);
+       WARN_ON_ONCE(q->mq_ops);
+
        cancel_delayed_work(&q->delay_work);
        queue_flag_set(QUEUE_FLAG_STOPPED, q);
 }
@@ -289,6 +359,9 @@ EXPORT_SYMBOL(blk_sync_queue);
  */
 inline void __blk_run_queue_uncond(struct request_queue *q)
 {
+       lockdep_assert_held(q->queue_lock);
+       WARN_ON_ONCE(q->mq_ops);
+
        if (unlikely(blk_queue_dead(q)))
                return;
 
@@ -310,11 +383,13 @@ EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
  * @q: The queue to run
  *
  * Description:
- *    See @blk_run_queue. This variant must be called with the queue lock
- *    held and interrupts disabled.
+ *    See @blk_run_queue.
  */
 void __blk_run_queue(struct request_queue *q)
 {
+       lockdep_assert_held(q->queue_lock);
+       WARN_ON_ONCE(q->mq_ops);
+
        if (unlikely(blk_queue_stopped(q)))
                return;
 
@@ -328,10 +403,18 @@ EXPORT_SYMBOL(__blk_run_queue);
  *
  * Description:
  *    Tells kblockd to perform the equivalent of @blk_run_queue on behalf
- *    of us. The caller must hold the queue lock.
+ *    of us.
+ *
+ * Note:
+ *    Since it is not allowed to run q->delay_work after blk_cleanup_queue()
+ *    has canceled q->delay_work, callers must hold the queue lock to avoid
+ *    race conditions between blk_cleanup_queue() and blk_run_queue_async().
  */
 void blk_run_queue_async(struct request_queue *q)
 {
+       lockdep_assert_held(q->queue_lock);
+       WARN_ON_ONCE(q->mq_ops);
+
        if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
                mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
 }
@@ -349,6 +432,8 @@ void blk_run_queue(struct request_queue *q)
 {
        unsigned long flags;
 
+       WARN_ON_ONCE(q->mq_ops);
+
        spin_lock_irqsave(q->queue_lock, flags);
        __blk_run_queue(q);
        spin_unlock_irqrestore(q->queue_lock, flags);
@@ -377,6 +462,7 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
        int i;
 
        lockdep_assert_held(q->queue_lock);
+       WARN_ON_ONCE(q->mq_ops);
 
        while (true) {
                bool drain = false;
@@ -455,6 +541,8 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
  */
 void blk_queue_bypass_start(struct request_queue *q)
 {
+       WARN_ON_ONCE(q->mq_ops);
+
        spin_lock_irq(q->queue_lock);
        q->bypass_depth++;
        queue_flag_set(QUEUE_FLAG_BYPASS, q);
@@ -481,6 +569,9 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_start);
  * @q: queue of interest
  *
  * Leave bypass mode and restore the normal queueing behavior.
+ *
+ * Note: although blk_queue_bypass_start() is only called for blk-sq queues,
+ * this function is called for both blk-sq and blk-mq queues.
  */
 void blk_queue_bypass_end(struct request_queue *q)
 {
@@ -732,7 +823,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        if (q->id < 0)
                goto fail_q;
 
-       q->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+       q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
        if (!q->bio_split)
                goto fail_id;
 
@@ -878,6 +969,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
 
 int blk_init_allocated_queue(struct request_queue *q)
 {
+       WARN_ON_ONCE(q->mq_ops);
+
        q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size);
        if (!q->fq)
                return -ENOMEM;
@@ -1015,6 +1108,8 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
        struct request_list *rl;
        int on_thresh, off_thresh;
 
+       WARN_ON_ONCE(q->mq_ops);
+
        spin_lock_irq(q->queue_lock);
        q->nr_requests = nr;
        blk_queue_congestion_threshold(q);
@@ -1077,6 +1172,8 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
        int may_queue;
        req_flags_t rq_flags = RQF_ALLOCED;
 
+       lockdep_assert_held(q->queue_lock);
+
        if (unlikely(blk_queue_dying(q)))
                return ERR_PTR(-ENODEV);
 
@@ -1250,12 +1347,20 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
        struct request_list *rl;
        struct request *rq;
 
+       lockdep_assert_held(q->queue_lock);
+       WARN_ON_ONCE(q->mq_ops);
+
        rl = blk_get_rl(q, bio);        /* transferred to @rq on success */
 retry:
        rq = __get_request(rl, op, bio, gfp_mask);
        if (!IS_ERR(rq))
                return rq;
 
+       if (op & REQ_NOWAIT) {
+               blk_put_rl(rl);
+               return ERR_PTR(-EAGAIN);
+       }
+
        if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
                blk_put_rl(rl);
                return rq;
@@ -1283,16 +1388,18 @@ retry:
        goto retry;
 }
 
-static struct request *blk_old_get_request(struct request_queue *q, int rw,
-               gfp_t gfp_mask)
+static struct request *blk_old_get_request(struct request_queue *q,
+                                          unsigned int op, gfp_t gfp_mask)
 {
        struct request *rq;
 
+       WARN_ON_ONCE(q->mq_ops);
+
        /* create ioc upfront */
        create_io_context(gfp_mask, q->node);
 
        spin_lock_irq(q->queue_lock);
-       rq = get_request(q, rw, NULL, gfp_mask);
+       rq = get_request(q, op, NULL, gfp_mask);
        if (IS_ERR(rq)) {
                spin_unlock_irq(q->queue_lock);
                return rq;
@@ -1305,14 +1412,24 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
        return rq;
 }
 
-struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
+struct request *blk_get_request(struct request_queue *q, unsigned int op,
+                               gfp_t gfp_mask)
 {
-       if (q->mq_ops)
-               return blk_mq_alloc_request(q, rw,
+       struct request *req;
+
+       if (q->mq_ops) {
+               req = blk_mq_alloc_request(q, op,
                        (gfp_mask & __GFP_DIRECT_RECLAIM) ?
                                0 : BLK_MQ_REQ_NOWAIT);
-       else
-               return blk_old_get_request(q, rw, gfp_mask);
+               if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
+                       q->mq_ops->initialize_rq_fn(req);
+       } else {
+               req = blk_old_get_request(q, op, gfp_mask);
+               if (!IS_ERR(req) && q->initialize_rq_fn)
+                       q->initialize_rq_fn(req);
+       }
+
+       return req;
 }
 EXPORT_SYMBOL(blk_get_request);
 
@@ -1328,6 +1445,9 @@ EXPORT_SYMBOL(blk_get_request);
  */
 void blk_requeue_request(struct request_queue *q, struct request *rq)
 {
+       lockdep_assert_held(q->queue_lock);
+       WARN_ON_ONCE(q->mq_ops);
+
        blk_delete_timer(rq);
        blk_clear_rq_complete(rq);
        trace_block_rq_requeue(q, rq);
@@ -1402,9 +1522,6 @@ static void blk_pm_put_request(struct request *rq)
 static inline void blk_pm_put_request(struct request *rq) {}
 #endif
 
-/*
- * queue lock must be held
- */
 void __blk_put_request(struct request_queue *q, struct request *req)
 {
        req_flags_t rq_flags = req->rq_flags;
@@ -1417,6 +1534,8 @@ void __blk_put_request(struct request_queue *q, struct request *req)
                return;
        }
 
+       lockdep_assert_held(q->queue_lock);
+
        blk_pm_put_request(req);
 
        elv_completed_request(q, req);
@@ -1646,6 +1765,7 @@ void blk_init_request_from_bio(struct request *req, struct bio *bio)
                req->ioprio = ioc->ioprio;
        else
                req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+       req->write_hint = bio->bi_write_hint;
        blk_rq_bio_prep(req->q, req, bio);
 }
 EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
@@ -1665,10 +1785,10 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
         */
        blk_queue_bounce(q, &bio);
 
-       blk_queue_split(q, &bio, q->bio_split);
+       blk_queue_split(q, &bio);
 
        if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-               bio->bi_error = -EIO;
+               bio->bi_status = BLK_STS_IOERR;
                bio_endio(bio);
                return BLK_QC_T_NONE;
        }
@@ -1726,7 +1846,10 @@ get_rq:
        req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
        if (IS_ERR(req)) {
                __wbt_done(q->rq_wb, wb_acct);
-               bio->bi_error = PTR_ERR(req);
+               if (PTR_ERR(req) == -ENOMEM)
+                       bio->bi_status = BLK_STS_RESOURCE;
+               else
+                       bio->bi_status = BLK_STS_IOERR;
                bio_endio(bio);
                goto out_unlock;
        }
@@ -1881,7 +2004,7 @@ generic_make_request_checks(struct bio *bio)
 {
        struct request_queue *q;
        int nr_sectors = bio_sectors(bio);
-       int err = -EIO;
+       blk_status_t status = BLK_STS_IOERR;
        char b[BDEVNAME_SIZE];
        struct hd_struct *part;
 
@@ -1900,6 +2023,14 @@ generic_make_request_checks(struct bio *bio)
                goto end_io;
        }
 
+       /*
+        * For a REQ_NOWAIT based request, return -EOPNOTSUPP
+        * if queue is not a request based queue.
+        */
+
+       if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
+               goto not_supported;
+
        part = bio->bi_bdev->bd_part;
        if (should_fail_request(part, bio->bi_iter.bi_size) ||
            should_fail_request(&part_to_disk(part)->part0,
@@ -1924,7 +2055,7 @@ generic_make_request_checks(struct bio *bio)
            !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
                bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
                if (!nr_sectors) {
-                       err = 0;
+                       status = BLK_STS_OK;
                        goto end_io;
                }
        }
@@ -1976,9 +2107,9 @@ generic_make_request_checks(struct bio *bio)
        return true;
 
 not_supported:
-       err = -EOPNOTSUPP;
+       status = BLK_STS_NOTSUPP;
 end_io:
-       bio->bi_error = err;
+       bio->bi_status = status;
        bio_endio(bio);
        return false;
 }
@@ -2057,7 +2188,7 @@ blk_qc_t generic_make_request(struct bio *bio)
        do {
                struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
-               if (likely(blk_queue_enter(q, false) == 0)) {
+               if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
                        struct bio_list lower, same;
 
                        /* Create a fresh bio_list for all subordinate requests */
@@ -2082,7 +2213,11 @@ blk_qc_t generic_make_request(struct bio *bio)
                        bio_list_merge(&bio_list_on_stack[0], &same);
                        bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
                } else {
-                       bio_io_error(bio);
+                       if (unlikely(!blk_queue_dying(q) &&
+                                       (bio->bi_opf & REQ_NOWAIT)))
+                               bio_wouldblock_error(bio);
+                       else
+                               bio_io_error(bio);
                }
                bio = bio_list_pop(&bio_list_on_stack[0]);
        } while (bio);
@@ -2183,29 +2318,29 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
  * @q:  the queue to submit the request
  * @rq: the request being queued
  */
-int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
+blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 {
        unsigned long flags;
        int where = ELEVATOR_INSERT_BACK;
 
        if (blk_cloned_rq_check_limits(q, rq))
-               return -EIO;
+               return BLK_STS_IOERR;
 
        if (rq->rq_disk &&
            should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
-               return -EIO;
+               return BLK_STS_IOERR;
 
        if (q->mq_ops) {
                if (blk_queue_io_stat(q))
                        blk_account_io_start(rq, true);
                blk_mq_sched_insert_request(rq, false, true, false, false);
-               return 0;
+               return BLK_STS_OK;
        }
 
        spin_lock_irqsave(q->queue_lock, flags);
        if (unlikely(blk_queue_dying(q))) {
                spin_unlock_irqrestore(q->queue_lock, flags);
-               return -ENODEV;
+               return BLK_STS_IOERR;
        }
 
        /*
@@ -2222,7 +2357,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
                __blk_run_queue(q);
        spin_unlock_irqrestore(q->queue_lock, flags);
 
-       return 0;
+       return BLK_STS_OK;
 }
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 
@@ -2238,9 +2373,6 @@ EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
  *
  * Return:
  *     The number of bytes to fail.
- *
- * Context:
- *     queue_lock must be held.
  */
 unsigned int blk_rq_err_bytes(const struct request *rq)
 {
@@ -2380,15 +2512,15 @@ void blk_account_io_start(struct request *rq, bool new_io)
  * Return:
  *     Pointer to the request at the top of @q if available.  Null
  *     otherwise.
- *
- * Context:
- *     queue_lock must be held.
  */
 struct request *blk_peek_request(struct request_queue *q)
 {
        struct request *rq;
        int ret;
 
+       lockdep_assert_held(q->queue_lock);
+       WARN_ON_ONCE(q->mq_ops);
+
        while ((rq = __elv_next_request(q)) != NULL) {
 
                rq = blk_pm_peek_request(q, rq);
@@ -2456,15 +2588,14 @@ struct request *blk_peek_request(struct request_queue *q)
                        rq = NULL;
                        break;
                } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
-                       int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO;
-
                        rq->rq_flags |= RQF_QUIET;
                        /*
                         * Mark this request as started so we don't trigger
                         * any debug logic in the end I/O path.
                         */
                        blk_start_request(rq);
-                       __blk_end_request_all(rq, err);
+                       __blk_end_request_all(rq, ret == BLKPREP_INVALID ?
+                                       BLK_STS_TARGET : BLK_STS_IOERR);
                } else {
                        printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
                        break;
@@ -2505,12 +2636,12 @@ void blk_dequeue_request(struct request *rq)
  *
  *     Block internal functions which don't want to start timer should
  *     call blk_dequeue_request().
- *
- * Context:
- *     queue_lock must be held.
  */
 void blk_start_request(struct request *req)
 {
+       lockdep_assert_held(req->q->queue_lock);
+       WARN_ON_ONCE(req->q->mq_ops);
+
        blk_dequeue_request(req);
 
        if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
@@ -2535,14 +2666,14 @@ EXPORT_SYMBOL(blk_start_request);
  * Return:
  *     Pointer to the request at the top of @q if available.  Null
  *     otherwise.
- *
- * Context:
- *     queue_lock must be held.
  */
 struct request *blk_fetch_request(struct request_queue *q)
 {
        struct request *rq;
 
+       lockdep_assert_held(q->queue_lock);
+       WARN_ON_ONCE(q->mq_ops);
+
        rq = blk_peek_request(q);
        if (rq)
                blk_start_request(rq);
@@ -2553,7 +2684,7 @@ EXPORT_SYMBOL(blk_fetch_request);
 /**
  * blk_update_request - Special helper function for request stacking drivers
  * @req:      the request being processed
- * @error:    %0 for success, < %0 for error
+ * @error:    block status code
  * @nr_bytes: number of bytes to complete @req
  *
  * Description:
@@ -2572,49 +2703,19 @@ EXPORT_SYMBOL(blk_fetch_request);
  *     %false - this request doesn't have any more data
  *     %true  - this request has more data
  **/
-bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
+bool blk_update_request(struct request *req, blk_status_t error,
+               unsigned int nr_bytes)
 {
        int total_bytes;
 
-       trace_block_rq_complete(req, error, nr_bytes);
+       trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
 
        if (!req->bio)
                return false;
 
-       if (error && !blk_rq_is_passthrough(req) &&
-           !(req->rq_flags & RQF_QUIET)) {
-               char *error_type;
-
-               switch (error) {
-               case -ENOLINK:
-                       error_type = "recoverable transport";
-                       break;
-               case -EREMOTEIO:
-                       error_type = "critical target";
-                       break;
-               case -EBADE:
-                       error_type = "critical nexus";
-                       break;
-               case -ETIMEDOUT:
-                       error_type = "timeout";
-                       break;
-               case -ENOSPC:
-                       error_type = "critical space allocation";
-                       break;
-               case -ENODATA:
-                       error_type = "critical medium";
-                       break;
-               case -EIO:
-               default:
-                       error_type = "I/O";
-                       break;
-               }
-               printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
-                                  __func__, error_type, req->rq_disk ?
-                                  req->rq_disk->disk_name : "?",
-                                  (unsigned long long)blk_rq_pos(req));
-
-       }
+       if (unlikely(error && !blk_rq_is_passthrough(req) &&
+                    !(req->rq_flags & RQF_QUIET)))
+               print_req_error(req, error);
 
        blk_account_io_completion(req, nr_bytes);
 
@@ -2680,7 +2781,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 }
 EXPORT_SYMBOL_GPL(blk_update_request);
 
-static bool blk_update_bidi_request(struct request *rq, int error,
+static bool blk_update_bidi_request(struct request *rq, blk_status_t error,
                                    unsigned int nr_bytes,
                                    unsigned int bidi_bytes)
 {
@@ -2718,13 +2819,13 @@ void blk_unprep_request(struct request *req)
 }
 EXPORT_SYMBOL_GPL(blk_unprep_request);
 
-/*
- * queue lock must be held
- */
-void blk_finish_request(struct request *req, int error)
+void blk_finish_request(struct request *req, blk_status_t error)
 {
        struct request_queue *q = req->q;
 
+       lockdep_assert_held(req->q->queue_lock);
+       WARN_ON_ONCE(q->mq_ops);
+
        if (req->rq_flags & RQF_STATS)
                blk_stat_add(req);
 
@@ -2758,7 +2859,7 @@ EXPORT_SYMBOL(blk_finish_request);
 /**
  * blk_end_bidi_request - Complete a bidi request
  * @rq:         the request to complete
- * @error:      %0 for success, < %0 for error
+ * @error:      block status code
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
@@ -2772,12 +2873,14 @@ EXPORT_SYMBOL(blk_finish_request);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-static bool blk_end_bidi_request(struct request *rq, int error,
+static bool blk_end_bidi_request(struct request *rq, blk_status_t error,
                                 unsigned int nr_bytes, unsigned int bidi_bytes)
 {
        struct request_queue *q = rq->q;
        unsigned long flags;
 
+       WARN_ON_ONCE(q->mq_ops);
+
        if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
                return true;
 
@@ -2791,7 +2894,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
 /**
  * __blk_end_bidi_request - Complete a bidi request with queue lock held
  * @rq:         the request to complete
- * @error:      %0 for success, < %0 for error
+ * @error:      block status code
  * @nr_bytes:   number of bytes to complete @rq
  * @bidi_bytes: number of bytes to complete @rq->next_rq
  *
@@ -2803,9 +2906,12 @@ static bool blk_end_bidi_request(struct request *rq, int error,
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-static bool __blk_end_bidi_request(struct request *rq, int error,
+static bool __blk_end_bidi_request(struct request *rq, blk_status_t error,
                                   unsigned int nr_bytes, unsigned int bidi_bytes)
 {
+       lockdep_assert_held(rq->q->queue_lock);
+       WARN_ON_ONCE(rq->q->mq_ops);
+
        if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
                return true;
 
@@ -2817,7 +2923,7 @@ static bool __blk_end_bidi_request(struct request *rq, int error,
 /**
  * blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
+ * @error:    block status code
  * @nr_bytes: number of bytes to complete
  *
  * Description:
@@ -2828,8 +2934,10 @@ static bool __blk_end_bidi_request(struct request *rq, int error,
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+bool blk_end_request(struct request *rq, blk_status_t error,
+               unsigned int nr_bytes)
 {
+       WARN_ON_ONCE(rq->q->mq_ops);
        return blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL(blk_end_request);
@@ -2837,12 +2945,12 @@ EXPORT_SYMBOL(blk_end_request);
 /**
  * blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
- * @error: %0 for success, < %0 for error
+ * @error: block status code
  *
  * Description:
  *     Completely finish @rq.
  */
-void blk_end_request_all(struct request *rq, int error)
+void blk_end_request_all(struct request *rq, blk_status_t error)
 {
        bool pending;
        unsigned int bidi_bytes = 0;
@@ -2858,7 +2966,7 @@ EXPORT_SYMBOL(blk_end_request_all);
 /**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
- * @error:    %0 for success, < %0 for error
+ * @error:    block status code
  * @nr_bytes: number of bytes to complete
  *
  * Description:
@@ -2868,8 +2976,12 @@ EXPORT_SYMBOL(blk_end_request_all);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  **/
-bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+bool __blk_end_request(struct request *rq, blk_status_t error,
+               unsigned int nr_bytes)
 {
+       lockdep_assert_held(rq->q->queue_lock);
+       WARN_ON_ONCE(rq->q->mq_ops);
+
        return __blk_end_bidi_request(rq, error, nr_bytes, 0);
 }
 EXPORT_SYMBOL(__blk_end_request);
@@ -2877,16 +2989,19 @@ EXPORT_SYMBOL(__blk_end_request);
 /**
  * __blk_end_request_all - Helper function for drives to finish the request.
  * @rq: the request to finish
- * @error: %0 for success, < %0 for error
+ * @error:    block status code
  *
  * Description:
  *     Completely finish @rq.  Must be called with queue lock held.
  */
-void __blk_end_request_all(struct request *rq, int error)
+void __blk_end_request_all(struct request *rq, blk_status_t error)
 {
        bool pending;
        unsigned int bidi_bytes = 0;
 
+       lockdep_assert_held(rq->q->queue_lock);
+       WARN_ON_ONCE(rq->q->mq_ops);
+
        if (unlikely(blk_bidi_rq(rq)))
                bidi_bytes = blk_rq_bytes(rq->next_rq);
 
@@ -2898,7 +3013,7 @@ EXPORT_SYMBOL(__blk_end_request_all);
 /**
  * __blk_end_request_cur - Helper function to finish the current request chunk.
  * @rq: the request to finish the current chunk for
- * @error: %0 for success, < %0 for error
+ * @error:    block status code
  *
  * Description:
  *     Complete the current consecutively mapped chunk from @rq.  Must
@@ -2908,7 +3023,7 @@ EXPORT_SYMBOL(__blk_end_request_all);
  *     %false - we are done with this request
  *     %true  - still buffers pending for this request
  */
-bool __blk_end_request_cur(struct request *rq, int error)
+bool __blk_end_request_cur(struct request *rq, blk_status_t error)
 {
        return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
 }
@@ -3151,6 +3266,8 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
                            bool from_schedule)
        __releases(q->queue_lock)
 {
+       lockdep_assert_held(q->queue_lock);
+
        trace_block_unplug(q, depth, !from_schedule);
 
        if (from_schedule)
@@ -3249,7 +3366,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
                 * Short-circuit if @q is dead
                 */
                if (unlikely(blk_queue_dying(q))) {
-                       __blk_end_request_all(rq, -ENODEV);
+                       __blk_end_request_all(rq, BLK_STS_IOERR);
                        continue;
                }
 
index a9451e3..5c0f3dc 100644 (file)
@@ -16,7 +16,7 @@
  * @rq: request to complete
  * @error: end I/O status of the request
  */
-static void blk_end_sync_rq(struct request *rq, int error)
+static void blk_end_sync_rq(struct request *rq, blk_status_t error)
 {
        struct completion *waiting = rq->end_io_data;
 
@@ -69,7 +69,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 
        if (unlikely(blk_queue_dying(q))) {
                rq->rq_flags |= RQF_QUIET;
-               __blk_end_request_all(rq, -ENXIO);
+               __blk_end_request_all(rq, BLK_STS_IOERR);
                spin_unlock_irq(q->queue_lock);
                return;
        }
index c4e0880..ed5fe32 100644 (file)
@@ -164,7 +164,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
  */
 static bool blk_flush_complete_seq(struct request *rq,
                                   struct blk_flush_queue *fq,
-                                  unsigned int seq, int error)
+                                  unsigned int seq, blk_status_t error)
 {
        struct request_queue *q = rq->q;
        struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
@@ -216,7 +216,7 @@ static bool blk_flush_complete_seq(struct request *rq,
        return kicked | queued;
 }
 
-static void flush_end_io(struct request *flush_rq, int error)
+static void flush_end_io(struct request *flush_rq, blk_status_t error)
 {
        struct request_queue *q = flush_rq->q;
        struct list_head *running;
@@ -341,11 +341,13 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
        return blk_flush_queue_rq(flush_rq, false);
 }
 
-static void flush_data_end_io(struct request *rq, int error)
+static void flush_data_end_io(struct request *rq, blk_status_t error)
 {
        struct request_queue *q = rq->q;
        struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
 
+       lockdep_assert_held(q->queue_lock);
+
        /*
         * Updating q->in_flight[] here for making this tag usable
         * early. Because in blk_queue_start_tag(),
@@ -382,7 +384,7 @@ static void flush_data_end_io(struct request *rq, int error)
                blk_run_queue_async(q);
 }
 
-static void mq_flush_data_end_io(struct request *rq, int error)
+static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
 {
        struct request_queue *q = rq->q;
        struct blk_mq_hw_ctx *hctx;
@@ -411,9 +413,6 @@ static void mq_flush_data_end_io(struct request *rq, int error)
  * or __blk_mq_run_hw_queue() to dispatch request.
  * @rq is being submitted.  Analyze what needs to be done and put it on the
  * right queue.
- *
- * CONTEXT:
- * spin_lock_irq(q->queue_lock) in !mq case
  */
 void blk_insert_flush(struct request *rq)
 {
@@ -422,6 +421,9 @@ void blk_insert_flush(struct request *rq)
        unsigned int policy = blk_flush_policy(fflags, rq);
        struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
 
+       if (!q->mq_ops)
+               lockdep_assert_held(q->queue_lock);
+
        /*
         * @policy now records what operations need to be done.  Adjust
         * REQ_PREFLUSH and FUA for the driver.
index 0f891a9..feb3057 100644 (file)
@@ -384,9 +384,9 @@ static struct kobj_type integrity_ktype = {
        .sysfs_ops      = &integrity_ops,
 };
 
-static int blk_integrity_nop_fn(struct blk_integrity_iter *iter)
+static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter)
 {
-       return 0;
+       return BLK_STS_OK;
 }
 
 static const struct blk_integrity_profile nop_profile = {
index 3b5cb86..2547016 100644 (file)
@@ -16,6 +16,8 @@
  */
 int blk_rq_append_bio(struct request *rq, struct bio *bio)
 {
+       blk_queue_bounce(rq->q, &bio);
+
        if (!rq->bio) {
                blk_rq_bio_prep(rq->q, rq, bio);
        } else {
@@ -72,15 +74,13 @@ static int __blk_rq_map_user_iov(struct request *rq,
                map_data->offset += bio->bi_iter.bi_size;
 
        orig_bio = bio;
-       blk_queue_bounce(q, &bio);
 
        /*
         * We link the bounce buffer in and could have to traverse it
         * later so we have to get a ref to prevent it from being freed
         */
-       bio_get(bio);
-
        ret = blk_rq_append_bio(rq, bio);
+       bio_get(bio);
        if (ret) {
                bio_endio(bio);
                __blk_rq_unmap_user(orig_bio);
@@ -249,7 +249,6 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
                return ret;
        }
 
-       blk_queue_bounce(q, &rq->bio);
        return 0;
 }
 EXPORT_SYMBOL(blk_rq_map_kern);
index 3990ae4..9903883 100644 (file)
@@ -108,31 +108,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
        bool do_split = true;
        struct bio *new = NULL;
        const unsigned max_sectors = get_max_io_size(q, bio);
-       unsigned bvecs = 0;
 
        bio_for_each_segment(bv, bio, iter) {
                /*
-                * With arbitrary bio size, the incoming bio may be very
-                * big. We have to split the bio into small bios so that
-                * each holds at most BIO_MAX_PAGES bvecs because
-                * bio_clone() can fail to allocate big bvecs.
-                *
-                * It should have been better to apply the limit per
-                * request queue in which bio_clone() is involved,
-                * instead of globally. The biggest blocker is the
-                * bio_clone() in bio bounce.
-                *
-                * If bio is splitted by this reason, we should have
-                * allowed to continue bios merging, but don't do
-                * that now for making the change simple.
-                *
-                * TODO: deal with bio bounce's bio_clone() gracefully
-                * and convert the global limit into per-queue limit.
-                */
-               if (bvecs++ >= BIO_MAX_PAGES)
-                       goto split;
-
-               /*
                 * If the queue doesn't support SG gaps and adding this
                 * offset would create a gap, disallow it.
                 */
@@ -202,8 +180,7 @@ split:
        return do_split ? new : NULL;
 }
 
-void blk_queue_split(struct request_queue *q, struct bio **bio,
-                    struct bio_set *bs)
+void blk_queue_split(struct request_queue *q, struct bio **bio)
 {
        struct bio *split, *res;
        unsigned nsegs;
@@ -211,13 +188,13 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
        switch (bio_op(*bio)) {
        case REQ_OP_DISCARD:
        case REQ_OP_SECURE_ERASE:
-               split = blk_bio_discard_split(q, *bio, bs, &nsegs);
+               split = blk_bio_discard_split(q, *bio, q->bio_split, &nsegs);
                break;
        case REQ_OP_WRITE_ZEROES:
-               split = blk_bio_write_zeroes_split(q, *bio, bs, &nsegs);
+               split = blk_bio_write_zeroes_split(q, *bio, q->bio_split, &nsegs);
                break;
        case REQ_OP_WRITE_SAME:
-               split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
+               split = blk_bio_write_same_split(q, *bio, q->bio_split, &nsegs);
                break;
        default:
                split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
@@ -671,6 +648,9 @@ static void blk_account_io_merge(struct request *req)
 static struct request *attempt_merge(struct request_queue *q,
                                     struct request *req, struct request *next)
 {
+       if (!q->mq_ops)
+               lockdep_assert_held(q->queue_lock);
+
        if (!rq_mergeable(req) || !rq_mergeable(next))
                return NULL;
 
@@ -693,6 +673,13 @@ static struct request *attempt_merge(struct request_queue *q,
                return NULL;
 
        /*
+        * Don't allow merge of different write hints, or for a hint with
+        * non-hint IO.
+        */
+       if (req->write_hint != next->write_hint)
+               return NULL;
+
+       /*
         * If we are allowed to merge, then append bio list
         * from next to rq and release next. merge_requests_fn
         * will have updated segment counts, update sector
@@ -811,6 +798,13 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
            !blk_write_same_mergeable(rq->bio, bio))
                return false;
 
+       /*
+        * Don't allow merge of different write hints, or for a hint with
+        * non-hint IO.
+        */
+       if (rq->write_hint != bio->bi_write_hint)
+               return false;
+
        return true;
 }
 
index 8e61e86..2cca4fc 100644 (file)
 #include "blk.h"
 #include "blk-mq.h"
 
-static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues,
-                             const int cpu)
+static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
+                             const struct cpumask *online_mask)
 {
-       return cpu * nr_queues / nr_cpus;
+       /*
+        * Non online CPU will be mapped to queue index 0.
+        */
+       if (!cpumask_test_cpu(cpu, online_mask))
+               return 0;
+       return cpu % nr_queues;
 }
 
 static int get_first_sibling(unsigned int cpu)
@@ -36,55 +41,26 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
        unsigned int *map = set->mq_map;
        unsigned int nr_queues = set->nr_hw_queues;
        const struct cpumask *online_mask = cpu_online_mask;
-       unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
-       cpumask_var_t cpus;
-
-       if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
-               return -ENOMEM;
-
-       cpumask_clear(cpus);
-       nr_cpus = nr_uniq_cpus = 0;
-       for_each_cpu(i, online_mask) {
-               nr_cpus++;
-               first_sibling = get_first_sibling(i);
-               if (!cpumask_test_cpu(first_sibling, cpus))
-                       nr_uniq_cpus++;
-               cpumask_set_cpu(i, cpus);
-       }
-
-       queue = 0;
-       for_each_possible_cpu(i) {
-               if (!cpumask_test_cpu(i, online_mask)) {
-                       map[i] = 0;
-                       continue;
-               }
+       unsigned int cpu, first_sibling;
 
+       for_each_possible_cpu(cpu) {
                /*
-                * Easy case - we have equal or more hardware queues. Or
-                * there are no thread siblings to take into account. Do
-                * 1:1 if enough, or sequential mapping if less.
+                * First do sequential mapping between CPUs and queues.
+                * In case we still have CPUs to map, and we have some number of
+                * threads per cores then map sibling threads to the same queue for
+                * performace optimizations.
                 */
-               if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) {
-                       map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue);
-                       queue++;
-                       continue;
+               if (cpu < nr_queues) {
+                       map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+               } else {
+                       first_sibling = get_first_sibling(cpu);
+                       if (first_sibling == cpu)
+                               map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+                       else
+                               map[cpu] = map[first_sibling];
                }
-
-               /*
-                * Less then nr_cpus queues, and we have some number of
-                * threads per cores. Map sibling threads to the same
-                * queue.
-                */
-               first_sibling = get_first_sibling(i);
-               if (first_sibling == i) {
-                       map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues,
-                                                       queue);
-                       queue++;
-               } else
-                       map[i] = map[first_sibling];
        }
 
-       free_cpumask_var(cpus);
        return 0;
 }
 EXPORT_SYMBOL_GPL(blk_mq_map_queues);
index 803aed4..9ebc294 100644 (file)
@@ -114,10 +114,12 @@ static ssize_t queue_state_write(void *data, const char __user *buf,
                blk_mq_run_hw_queues(q, true);
        } else if (strcmp(op, "start") == 0) {
                blk_mq_start_stopped_hw_queues(q, true);
+       } else if (strcmp(op, "kick") == 0) {
+               blk_mq_kick_requeue_list(q);
        } else {
                pr_err("%s: unsupported operation '%s'\n", __func__, op);
 inval:
-               pr_err("%s: use either 'run' or 'start'\n", __func__);
+               pr_err("%s: use 'run', 'start' or 'kick'\n", __func__);
                return -EINVAL;
        }
        return count;
@@ -133,6 +135,29 @@ static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
        }
 }
 
+static int queue_write_hint_show(void *data, struct seq_file *m)
+{
+       struct request_queue *q = data;
+       int i;
+
+       for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
+               seq_printf(m, "hint%d: %llu\n", i, q->write_hints[i]);
+
+       return 0;
+}
+
+static ssize_t queue_write_hint_store(void *data, const char __user *buf,
+                                     size_t count, loff_t *ppos)
+{
+       struct request_queue *q = data;
+       int i;
+
+       for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
+               q->write_hints[i] = 0;
+
+       return count;
+}
+
 static int queue_poll_stat_show(void *data, struct seq_file *m)
 {
        struct request_queue *q = data;
@@ -267,6 +292,14 @@ static const char *const rqf_name[] = {
 };
 #undef RQF_NAME
 
+#define RQAF_NAME(name) [REQ_ATOM_##name] = #name
+static const char *const rqaf_name[] = {
+       RQAF_NAME(COMPLETE),
+       RQAF_NAME(STARTED),
+       RQAF_NAME(POLL_SLEPT),
+};
+#undef RQAF_NAME
+
 int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
 {
        const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
@@ -283,6 +316,8 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
        seq_puts(m, ", .rq_flags=");
        blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name,
                       ARRAY_SIZE(rqf_name));
+       seq_puts(m, ", .atomic_flags=");
+       blk_flags_show(m, rq->atomic_flags, rqaf_name, ARRAY_SIZE(rqaf_name));
        seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag,
                   rq->internal_tag);
        if (mq_ops->show_rq)
@@ -298,6 +333,37 @@ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
 }
 EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show);
 
+static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
+       __acquires(&q->requeue_lock)
+{
+       struct request_queue *q = m->private;
+
+       spin_lock_irq(&q->requeue_lock);
+       return seq_list_start(&q->requeue_list, *pos);
+}
+
+static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       struct request_queue *q = m->private;
+
+       return seq_list_next(v, &q->requeue_list, pos);
+}
+
+static void queue_requeue_list_stop(struct seq_file *m, void *v)
+       __releases(&q->requeue_lock)
+{
+       struct request_queue *q = m->private;
+
+       spin_unlock_irq(&q->requeue_lock);
+}
+
+static const struct seq_operations queue_requeue_list_seq_ops = {
+       .start  = queue_requeue_list_start,
+       .next   = queue_requeue_list_next,
+       .stop   = queue_requeue_list_stop,
+       .show   = blk_mq_debugfs_rq_show,
+};
+
 static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
        __acquires(&hctx->lock)
 {
@@ -329,6 +395,36 @@ static const struct seq_operations hctx_dispatch_seq_ops = {
        .show   = blk_mq_debugfs_rq_show,
 };
 
+struct show_busy_params {
+       struct seq_file         *m;
+       struct blk_mq_hw_ctx    *hctx;
+};
+
+/*
+ * Note: the state of a request may change while this function is in progress,
+ * e.g. due to a concurrent blk_mq_finish_request() call.
+ */
+static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved)
+{
+       const struct show_busy_params *params = data;
+
+       if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx &&
+           test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
+               __blk_mq_debugfs_rq_show(params->m,
+                                        list_entry_rq(&rq->queuelist));
+}
+
+static int hctx_busy_show(void *data, struct seq_file *m)
+{
+       struct blk_mq_hw_ctx *hctx = data;
+       struct show_busy_params params = { .m = m, .hctx = hctx };
+
+       blk_mq_tagset_busy_iter(hctx->queue->tag_set, hctx_show_busy_rq,
+                               &params);
+
+       return 0;
+}
+
 static int hctx_ctx_map_show(void *data, struct seq_file *m)
 {
        struct blk_mq_hw_ctx *hctx = data;
@@ -655,7 +751,9 @@ const struct file_operations blk_mq_debugfs_fops = {
 
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
        {"poll_stat", 0400, queue_poll_stat_show},
+       {"requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops},
        {"state", 0600, queue_state_show, queue_state_write},
+       {"write_hints", 0600, queue_write_hint_show, queue_write_hint_store},
        {},
 };
 
@@ -663,6 +761,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
        {"state", 0400, hctx_state_show},
        {"flags", 0400, hctx_flags_show},
        {"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops},
+       {"busy", 0400, hctx_busy_show},
        {"ctx_map", 0400, hctx_ctx_map_show},
        {"tags", 0400, hctx_tags_show},
        {"tags_bitmap", 0400, hctx_tags_bitmap_show},
index 1f5b692..7f0dc48 100644 (file)
@@ -31,11 +31,10 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q,
 }
 EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
 
-static void __blk_mq_sched_assign_ioc(struct request_queue *q,
-                                     struct request *rq,
-                                     struct bio *bio,
-                                     struct io_context *ioc)
+void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
 {
+       struct request_queue *q = rq->q;
+       struct io_context *ioc = rq_ioc(bio);
        struct io_cq *icq;
 
        spin_lock_irq(q->queue_lock);
@@ -47,90 +46,47 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q,
                if (!icq)
                        return;
        }
-
+       get_io_context(icq->ioc);
        rq->elv.icq = icq;
-       if (!blk_mq_sched_get_rq_priv(q, rq, bio)) {
-               rq->rq_flags |= RQF_ELVPRIV;
-               get_io_context(icq->ioc);
-               return;
-       }
-
-       rq->elv.icq = NULL;
 }
 
-static void blk_mq_sched_assign_ioc(struct request_queue *q,
-                                   struct request *rq, struct bio *bio)
+/*
+ * Mark a hardware queue as needing a restart. For shared queues, maintain
+ * a count of how many hardware queues are marked for restart.
+ */
+static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
 {
-       struct io_context *ioc;
+       if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+               return;
+
+       if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+               struct request_queue *q = hctx->queue;
 
-       ioc = rq_ioc(bio);
-       if (ioc)
-               __blk_mq_sched_assign_ioc(q, rq, bio, ioc);
+               if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+                       atomic_inc(&q->shared_hctx_restart);
+       } else
+               set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 }
 
-struct request *blk_mq_sched_get_request(struct request_queue *q,
-                                        struct bio *bio,
-                                        unsigned int op,
-                                        struct blk_mq_alloc_data *data)
+static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
 {
-       struct elevator_queue *e = q->elevator;
-       struct request *rq;
-
-       blk_queue_enter_live(q);
-       data->q = q;
-       if (likely(!data->ctx))
-               data->ctx = blk_mq_get_ctx(q);
-       if (likely(!data->hctx))
-               data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
-
-       if (e) {
-               data->flags |= BLK_MQ_REQ_INTERNAL;
-
-               /*
-                * Flush requests are special and go directly to the
-                * dispatch list.
-                */
-               if (!op_is_flush(op) && e->type->ops.mq.get_request) {
-                       rq = e->type->ops.mq.get_request(q, op, data);
-                       if (rq)
-                               rq->rq_flags |= RQF_QUEUED;
-               } else
-                       rq = __blk_mq_alloc_request(data, op);
-       } else {
-               rq = __blk_mq_alloc_request(data, op);
-       }
-
-       if (rq) {
-               if (!op_is_flush(op)) {
-                       rq->elv.icq = NULL;
-                       if (e && e->type->icq_cache)
-                               blk_mq_sched_assign_ioc(q, rq, bio);
-               }
-               data->hctx->queued++;
-               return rq;
-       }
+       if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+               return false;
 
-       blk_queue_exit(q);
-       return NULL;
-}
+       if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+               struct request_queue *q = hctx->queue;
 
-void blk_mq_sched_put_request(struct request *rq)
-{
-       struct request_queue *q = rq->q;
-       struct elevator_queue *e = q->elevator;
+               if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+                       atomic_dec(&q->shared_hctx_restart);
+       } else
+               clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 
-       if (rq->rq_flags & RQF_ELVPRIV) {
-               blk_mq_sched_put_rq_priv(rq->q, rq);
-               if (rq->elv.icq) {
-                       put_io_context(rq->elv.icq->ioc);
-                       rq->elv.icq = NULL;
-               }
+       if (blk_mq_hctx_has_pending(hctx)) {
+               blk_mq_run_hw_queue(hctx, true);
+               return true;
        }
 
-       if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
-               e->type->ops.mq.put_request(rq);
-       else
-               blk_mq_finish_request(rq);
+       return false;
 }
 
 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
@@ -141,7 +97,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
        bool did_work = false;
        LIST_HEAD(rq_list);
 
-       if (unlikely(blk_mq_hctx_stopped(hctx)))
+       /* RCU or SRCU read lock is needed before checking quiesced flag */
+       if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
                return;
 
        hctx->run++;
@@ -221,19 +178,73 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
 }
 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
 
+/*
+ * Reverse check our software queue for entries that we could potentially
+ * merge with. Currently includes a hand-wavy stop count of 8, to not spend
+ * too much time checking for merges.
+ */
+static bool blk_mq_attempt_merge(struct request_queue *q,
+                                struct blk_mq_ctx *ctx, struct bio *bio)
+{
+       struct request *rq;
+       int checked = 8;
+
+       lockdep_assert_held(&ctx->lock);
+
+       list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
+               bool merged = false;
+
+               if (!checked--)
+                       break;
+
+               if (!blk_rq_merge_ok(rq, bio))
+                       continue;
+
+               switch (blk_try_merge(rq, bio)) {
+               case ELEVATOR_BACK_MERGE:
+                       if (blk_mq_sched_allow_merge(q, rq, bio))
+                               merged = bio_attempt_back_merge(q, rq, bio);
+                       break;
+               case ELEVATOR_FRONT_MERGE:
+                       if (blk_mq_sched_allow_merge(q, rq, bio))
+                               merged = bio_attempt_front_merge(q, rq, bio);
+                       break;
+               case ELEVATOR_DISCARD_MERGE:
+                       merged = bio_attempt_discard_merge(q, rq, bio);
+                       break;
+               default:
+                       continue;
+               }
+
+               if (merged)
+                       ctx->rq_merged++;
+               return merged;
+       }
+
+       return false;
+}
+
 bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
 {
        struct elevator_queue *e = q->elevator;
+       struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
+       struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+       bool ret = false;
 
-       if (e->type->ops.mq.bio_merge) {
-               struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
-               struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
-
+       if (e && e->type->ops.mq.bio_merge) {
                blk_mq_put_ctx(ctx);
                return e->type->ops.mq.bio_merge(hctx, bio);
        }
 
-       return false;
+       if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) {
+               /* default per sw-queue merge */
+               spin_lock(&ctx->lock);
+               ret = blk_mq_attempt_merge(q, ctx, bio);
+               spin_unlock(&ctx->lock);
+       }
+
+       blk_mq_put_ctx(ctx);
+       return ret;
 }
 
 bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
@@ -266,18 +277,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
        return true;
 }
 
-static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
-{
-       if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
-               clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-               if (blk_mq_hctx_has_pending(hctx)) {
-                       blk_mq_run_hw_queue(hctx, true);
-                       return true;
-               }
-       }
-       return false;
-}
-
 /**
  * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
  * @pos:    loop cursor.
@@ -309,6 +308,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
        unsigned int i, j;
 
        if (set->flags & BLK_MQ_F_TAG_SHARED) {
+               /*
+                * If this is 0, then we know that no hardware queues
+                * have RESTART marked. We're done.
+                */
+               if (!atomic_read(&queue->shared_hctx_restart))
+                       return;
+
                rcu_read_lock();
                list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
                                           tag_set_list) {
index edafb53..9267d0b 100644 (file)
@@ -7,8 +7,7 @@
 void blk_mq_sched_free_hctx_data(struct request_queue *q,
                                 void (*exit)(struct blk_mq_hw_ctx *));
 
-struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data);
-void blk_mq_sched_put_request(struct request *rq);
+void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio);
 
 void blk_mq_sched_request_inserted(struct request *rq);
 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
@@ -38,35 +37,12 @@ int blk_mq_sched_init(struct request_queue *q);
 static inline bool
 blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
 {
-       struct elevator_queue *e = q->elevator;
-
-       if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio))
+       if (blk_queue_nomerges(q) || !bio_mergeable(bio))
                return false;
 
        return __blk_mq_sched_bio_merge(q, bio);
 }
 
-static inline int blk_mq_sched_get_rq_priv(struct request_queue *q,
-                                          struct request *rq,
-                                          struct bio *bio)
-{
-       struct elevator_queue *e = q->elevator;
-
-       if (e && e->type->ops.mq.get_rq_priv)
-               return e->type->ops.mq.get_rq_priv(q, rq, bio);
-
-       return 0;
-}
-
-static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
-                                           struct request *rq)
-{
-       struct elevator_queue *e = q->elevator;
-
-       if (e && e->type->ops.mq.put_rq_priv)
-               e->type->ops.mq.put_rq_priv(q, rq);
-}
-
 static inline bool
 blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
                         struct bio *bio)
@@ -115,15 +91,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
        return false;
 }
 
-/*
- * Mark a hardware queue as needing a restart.
- */
-static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
-{
-       if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-               set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-}
-
 static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
 {
        return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
index bb66c96..05dfa3f 100644 (file)
@@ -42,7 +42,6 @@ static LIST_HEAD(all_q_list);
 
 static void blk_mq_poll_stats_start(struct request_queue *q);
 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
-static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync);
 
 static int blk_mq_poll_stats_bkt(const struct request *rq)
 {
@@ -154,13 +153,28 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
 
+/*
+ * FIXME: replace the scsi_internal_device_*block_nowait() calls in the
+ * mpt3sas driver such that this function can be removed.
+ */
+void blk_mq_quiesce_queue_nowait(struct request_queue *q)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
+
 /**
- * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished
+ * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
  * @q: request queue.
  *
  * Note: this function does not prevent that the struct request end_io()
- * callback function is invoked. Additionally, it is not prevented that
- * new queue_rq() calls occur unless the queue has been stopped first.
+ * callback function is invoked. Once this function is returned, we make
+ * sure no dispatch can happen until the queue is unquiesced via
+ * blk_mq_unquiesce_queue().
  */
 void blk_mq_quiesce_queue(struct request_queue *q)
 {
@@ -168,11 +182,11 @@ void blk_mq_quiesce_queue(struct request_queue *q)
        unsigned int i;
        bool rcu = false;
 
-       __blk_mq_stop_hw_queues(q, true);
+       blk_mq_quiesce_queue_nowait(q);
 
        queue_for_each_hw_ctx(q, hctx, i) {
                if (hctx->flags & BLK_MQ_F_BLOCKING)
-                       synchronize_srcu(&hctx->queue_rq_srcu);
+                       synchronize_srcu(hctx->queue_rq_srcu);
                else
                        rcu = true;
        }
@@ -181,6 +195,26 @@ void blk_mq_quiesce_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
 
+/*
+ * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
+ * @q: request queue.
+ *
+ * This function recovers queue into the state before quiescing
+ * which is done by blk_mq_quiesce_queue.
+ */
+void blk_mq_unquiesce_queue(struct request_queue *q)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+
+       /* dispatch requests which are inserted during quiescing */
+       blk_mq_run_hw_queues(q, true);
+}
+EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
+
 void blk_mq_wake_waiters(struct request_queue *q)
 {
        struct blk_mq_hw_ctx *hctx;
@@ -204,15 +238,33 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 }
 EXPORT_SYMBOL(blk_mq_can_queue);
 
-void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
-                       struct request *rq, unsigned int op)
+static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
+               unsigned int tag, unsigned int op)
 {
+       struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
+       struct request *rq = tags->static_rqs[tag];
+
+       rq->rq_flags = 0;
+
+       if (data->flags & BLK_MQ_REQ_INTERNAL) {
+               rq->tag = -1;
+               rq->internal_tag = tag;
+       } else {
+               if (blk_mq_tag_busy(data->hctx)) {
+                       rq->rq_flags = RQF_MQ_INFLIGHT;
+                       atomic_inc(&data->hctx->nr_active);
+               }
+               rq->tag = tag;
+               rq->internal_tag = -1;
+               data->hctx->tags->rqs[rq->tag] = rq;
+       }
+
        INIT_LIST_HEAD(&rq->queuelist);
        /* csd/requeue_work/fifo_time is initialized before use */
-       rq->q = q;
-       rq->mq_ctx = ctx;
+       rq->q = data->q;
+       rq->mq_ctx = data->ctx;
        rq->cmd_flags = op;
-       if (blk_queue_io_stat(q))
+       if (blk_queue_io_stat(data->q))
                rq->rq_flags |= RQF_IO_STAT;
        /* do not touch atomic flags, it needs atomic ops against the timer */
        rq->cpu = -1;
@@ -241,44 +293,60 @@ void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
        rq->end_io_data = NULL;
        rq->next_rq = NULL;
 
-       ctx->rq_dispatched[op_is_sync(op)]++;
+       data->ctx->rq_dispatched[op_is_sync(op)]++;
+       return rq;
 }
-EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init);
 
-struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
-                                      unsigned int op)
+static struct request *blk_mq_get_request(struct request_queue *q,
+               struct bio *bio, unsigned int op,
+               struct blk_mq_alloc_data *data)
 {
+       struct elevator_queue *e = q->elevator;
        struct request *rq;
        unsigned int tag;
 
-       tag = blk_mq_get_tag(data);
-       if (tag != BLK_MQ_TAG_FAIL) {
-               struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
+       blk_queue_enter_live(q);
+       data->q = q;
+       if (likely(!data->ctx))
+               data->ctx = blk_mq_get_ctx(q);
+       if (likely(!data->hctx))
+               data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
+       if (op & REQ_NOWAIT)
+               data->flags |= BLK_MQ_REQ_NOWAIT;
 
-               rq = tags->static_rqs[tag];
+       if (e) {
+               data->flags |= BLK_MQ_REQ_INTERNAL;
 
-               if (data->flags & BLK_MQ_REQ_INTERNAL) {
-                       rq->tag = -1;
-                       rq->internal_tag = tag;
-               } else {
-                       if (blk_mq_tag_busy(data->hctx)) {
-                               rq->rq_flags = RQF_MQ_INFLIGHT;
-                               atomic_inc(&data->hctx->nr_active);
-                       }
-                       rq->tag = tag;
-                       rq->internal_tag = -1;
-                       data->hctx->tags->rqs[rq->tag] = rq;
-               }
+               /*
+                * Flush requests are special and go directly to the
+                * dispatch list.
+                */
+               if (!op_is_flush(op) && e->type->ops.mq.limit_depth)
+                       e->type->ops.mq.limit_depth(op, data);
+       }
 
-               blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
-               return rq;
+       tag = blk_mq_get_tag(data);
+       if (tag == BLK_MQ_TAG_FAIL) {
+               blk_queue_exit(q);
+               return NULL;
        }
 
-       return NULL;
+       rq = blk_mq_rq_ctx_init(data, tag, op);
+       if (!op_is_flush(op)) {
+               rq->elv.icq = NULL;
+               if (e && e->type->ops.mq.prepare_request) {
+                       if (e->type->icq_cache && rq_ioc(bio))
+                               blk_mq_sched_assign_ioc(rq, bio);
+
+                       e->type->ops.mq.prepare_request(rq, bio);
+                       rq->rq_flags |= RQF_ELVPRIV;
+               }
+       }
+       data->hctx->queued++;
+       return rq;
 }
-EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
 
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
                unsigned int flags)
 {
        struct blk_mq_alloc_data alloc_data = { .flags = flags };
@@ -289,7 +357,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
        if (ret)
                return ERR_PTR(ret);
 
-       rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
+       rq = blk_mq_get_request(q, NULL, op, &alloc_data);
 
        blk_mq_put_ctx(alloc_data.ctx);
        blk_queue_exit(q);
@@ -304,8 +372,8 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 }
 EXPORT_SYMBOL(blk_mq_alloc_request);
 
-struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
-               unsigned int flags, unsigned int hctx_idx)
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
+               unsigned int op, unsigned int flags, unsigned int hctx_idx)
 {
        struct blk_mq_alloc_data alloc_data = { .flags = flags };
        struct request *rq;
@@ -340,7 +408,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
        cpu = cpumask_first(alloc_data.hctx->cpumask);
        alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
 
-       rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
+       rq = blk_mq_get_request(q, NULL, op, &alloc_data);
 
        blk_queue_exit(q);
 
@@ -351,17 +419,28 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
 }
 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
 
-void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
-                            struct request *rq)
+void blk_mq_free_request(struct request *rq)
 {
-       const int sched_tag = rq->internal_tag;
        struct request_queue *q = rq->q;
+       struct elevator_queue *e = q->elevator;
+       struct blk_mq_ctx *ctx = rq->mq_ctx;
+       struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+       const int sched_tag = rq->internal_tag;
+
+       if (rq->rq_flags & RQF_ELVPRIV) {
+               if (e && e->type->ops.mq.finish_request)
+                       e->type->ops.mq.finish_request(rq);
+               if (rq->elv.icq) {
+                       put_io_context(rq->elv.icq->ioc);
+                       rq->elv.icq = NULL;
+               }
+       }
 
+       ctx->rq_completed[rq_is_sync(rq)]++;
        if (rq->rq_flags & RQF_MQ_INFLIGHT)
                atomic_dec(&hctx->nr_active);
 
        wbt_done(q->rq_wb, &rq->issue_stat);
-       rq->rq_flags = 0;
 
        clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
        clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
@@ -372,29 +451,9 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
        blk_mq_sched_restart(hctx);
        blk_queue_exit(q);
 }
-
-static void blk_mq_finish_hctx_request(struct blk_mq_hw_ctx *hctx,
-                                    struct request *rq)
-{
-       struct blk_mq_ctx *ctx = rq->mq_ctx;
-
-       ctx->rq_completed[rq_is_sync(rq)]++;
-       __blk_mq_finish_request(hctx, ctx, rq);
-}
-
-void blk_mq_finish_request(struct request *rq)
-{
-       blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
-}
-EXPORT_SYMBOL_GPL(blk_mq_finish_request);
-
-void blk_mq_free_request(struct request *rq)
-{
-       blk_mq_sched_put_request(rq);
-}
 EXPORT_SYMBOL_GPL(blk_mq_free_request);
 
-inline void __blk_mq_end_request(struct request *rq, int error)
+inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 {
        blk_account_io_done(rq);
 
@@ -409,7 +468,7 @@ inline void __blk_mq_end_request(struct request *rq, int error)
 }
 EXPORT_SYMBOL(__blk_mq_end_request);
 
-void blk_mq_end_request(struct request *rq, int error)
+void blk_mq_end_request(struct request *rq, blk_status_t error)
 {
        if (blk_update_request(rq, error, blk_rq_bytes(rq)))
                BUG();
@@ -753,50 +812,6 @@ static void blk_mq_timeout_work(struct work_struct *work)
        blk_queue_exit(q);
 }
 
-/*
- * Reverse check our software queue for entries that we could potentially
- * merge with. Currently includes a hand-wavy stop count of 8, to not spend
- * too much time checking for merges.
- */
-static bool blk_mq_attempt_merge(struct request_queue *q,
-                                struct blk_mq_ctx *ctx, struct bio *bio)
-{
-       struct request *rq;
-       int checked = 8;
-
-       list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
-               bool merged = false;
-
-               if (!checked--)
-                       break;
-
-               if (!blk_rq_merge_ok(rq, bio))
-                       continue;
-
-               switch (blk_try_merge(rq, bio)) {
-               case ELEVATOR_BACK_MERGE:
-                       if (blk_mq_sched_allow_merge(q, rq, bio))
-                               merged = bio_attempt_back_merge(q, rq, bio);
-                       break;
-               case ELEVATOR_FRONT_MERGE:
-                       if (blk_mq_sched_allow_merge(q, rq, bio))
-                               merged = bio_attempt_front_merge(q, rq, bio);
-                       break;
-               case ELEVATOR_DISCARD_MERGE:
-                       merged = bio_attempt_discard_merge(q, rq, bio);
-                       break;
-               default:
-                       continue;
-               }
-
-               if (merged)
-                       ctx->rq_merged++;
-               return merged;
-       }
-
-       return false;
-}
-
 struct flush_busy_ctx_data {
        struct blk_mq_hw_ctx *hctx;
        struct list_head *list;
@@ -968,7 +983,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 {
        struct blk_mq_hw_ctx *hctx;
        struct request *rq;
-       int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK;
+       int errors, queued;
 
        if (list_empty(list))
                return false;
@@ -979,6 +994,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
        errors = queued = 0;
        do {
                struct blk_mq_queue_data bd;
+               blk_status_t ret;
 
                rq = list_first_entry(list, struct request, queuelist);
                if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
@@ -1019,25 +1035,20 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
                }
 
                ret = q->mq_ops->queue_rq(hctx, &bd);
-               switch (ret) {
-               case BLK_MQ_RQ_QUEUE_OK:
-                       queued++;
-                       break;
-               case BLK_MQ_RQ_QUEUE_BUSY:
+               if (ret == BLK_STS_RESOURCE) {
                        blk_mq_put_driver_tag_hctx(hctx, rq);
                        list_add(&rq->queuelist, list);
                        __blk_mq_requeue_request(rq);
                        break;
-               default:
-                       pr_err("blk-mq: bad return on queue: %d\n", ret);
-               case BLK_MQ_RQ_QUEUE_ERROR:
+               }
+
+               if (unlikely(ret != BLK_STS_OK)) {
                        errors++;
-                       blk_mq_end_request(rq, -EIO);
-                       break;
+                       blk_mq_end_request(rq, BLK_STS_IOERR);
+                       continue;
                }
 
-               if (ret == BLK_MQ_RQ_QUEUE_BUSY)
-                       break;
+               queued++;
        } while (!list_empty(list));
 
        hctx->dispatched[queued_to_index(queued)]++;
@@ -1075,7 +1086,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
                 * - blk_mq_run_hw_queue() checks whether or not a queue has
                 *   been stopped before rerunning a queue.
                 * - Some but not all block drivers stop a queue before
-                *   returning BLK_MQ_RQ_QUEUE_BUSY. Two exceptions are scsi-mq
+                *   returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
                 *   and dm-rq.
                 */
                if (!blk_mq_sched_needs_restart(hctx) &&
@@ -1100,9 +1111,9 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
        } else {
                might_sleep();
 
-               srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
+               srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
                blk_mq_sched_dispatch_requests(hctx);
-               srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+               srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
        }
 }
 
@@ -1134,8 +1145,10 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
 static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
                                        unsigned long msecs)
 {
-       if (unlikely(blk_mq_hctx_stopped(hctx) ||
-                    !blk_mq_hw_queue_mapped(hctx)))
+       if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
+               return;
+
+       if (unlikely(blk_mq_hctx_stopped(hctx)))
                return;
 
        if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
@@ -1201,34 +1214,39 @@ bool blk_mq_queue_stopped(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_mq_queue_stopped);
 
-static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync)
+/*
+ * This function is often used for pausing .queue_rq() by driver when
+ * there isn't enough resource or some conditions aren't satisfied, and
+ * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
+ *
+ * We do not guarantee that dispatch can be drained or blocked
+ * after blk_mq_stop_hw_queue() returns. Please use
+ * blk_mq_quiesce_queue() for that requirement.
+ */
+void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
 {
-       if (sync)
-               cancel_delayed_work_sync(&hctx->run_work);
-       else
-               cancel_delayed_work(&hctx->run_work);
+       cancel_delayed_work(&hctx->run_work);
 
        set_bit(BLK_MQ_S_STOPPED, &hctx->state);
 }
-
-void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
-{
-       __blk_mq_stop_hw_queue(hctx, false);
-}
 EXPORT_SYMBOL(blk_mq_stop_hw_queue);
 
-static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
+/*
+ * This function is often used for pausing .queue_rq() by driver when
+ * there isn't enough resource or some conditions aren't satisfied, and
+ * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
+ *
+ * We do not guarantee that dispatch can be drained or blocked
+ * after blk_mq_stop_hw_queues() returns. Please use
+ * blk_mq_quiesce_queue() for that requirement.
+ */
+void blk_mq_stop_hw_queues(struct request_queue *q)
 {
        struct blk_mq_hw_ctx *hctx;
        int i;
 
        queue_for_each_hw_ctx(q, hctx, i)
-               __blk_mq_stop_hw_queue(hctx, sync);
-}
-
-void blk_mq_stop_hw_queues(struct request_queue *q)
-{
-       __blk_mq_stop_hw_queues(q, false);
+               blk_mq_stop_hw_queue(hctx);
 }
 EXPORT_SYMBOL(blk_mq_stop_hw_queues);
 
@@ -1295,7 +1313,7 @@ static void blk_mq_run_work_fn(struct work_struct *work)
 
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
 {
-       if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
+       if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
                return;
 
        /*
@@ -1317,6 +1335,8 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
 {
        struct blk_mq_ctx *ctx = rq->mq_ctx;
 
+       lockdep_assert_held(&ctx->lock);
+
        trace_block_rq_insert(hctx->queue, rq);
 
        if (at_head)
@@ -1330,6 +1350,8 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 {
        struct blk_mq_ctx *ctx = rq->mq_ctx;
 
+       lockdep_assert_held(&ctx->lock);
+
        __blk_mq_insert_req_list(hctx, rq, at_head);
        blk_mq_hctx_mark_pending(hctx, ctx);
 }
@@ -1427,30 +1449,13 @@ static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx)
                !blk_queue_nomerges(hctx->queue);
 }
 
-static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
-                                        struct blk_mq_ctx *ctx,
-                                        struct request *rq, struct bio *bio)
+static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx,
+                                  struct blk_mq_ctx *ctx,
+                                  struct request *rq)
 {
-       if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) {
-               blk_mq_bio_to_request(rq, bio);
-               spin_lock(&ctx->lock);
-insert_rq:
-               __blk_mq_insert_request(hctx, rq, false);
-               spin_unlock(&ctx->lock);
-               return false;
-       } else {
-               struct request_queue *q = hctx->queue;
-
-               spin_lock(&ctx->lock);
-               if (!blk_mq_attempt_merge(q, ctx, bio)) {
-                       blk_mq_bio_to_request(rq, bio);
-                       goto insert_rq;
-               }
-
-               spin_unlock(&ctx->lock);
-               __blk_mq_finish_request(hctx, ctx, rq);
-               return true;
-       }
+       spin_lock(&ctx->lock);
+       __blk_mq_insert_request(hctx, rq, false);
+       spin_unlock(&ctx->lock);
 }
 
 static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
@@ -1471,10 +1476,11 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
                .last = true,
        };
        blk_qc_t new_cookie;
-       int ret;
+       blk_status_t ret;
        bool run_queue = true;
 
-       if (blk_mq_hctx_stopped(hctx)) {
+       /* RCU or SRCU read lock is needed before checking quiesced flag */
+       if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
                run_queue = false;
                goto insert;
        }
@@ -1493,18 +1499,19 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
         * would have done
         */
        ret = q->mq_ops->queue_rq(hctx, &bd);
-       if (ret == BLK_MQ_RQ_QUEUE_OK) {
+       switch (ret) {
+       case BLK_STS_OK:
                *cookie = new_cookie;
                return;
-       }
-
-       if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
+       case BLK_STS_RESOURCE:
+               __blk_mq_requeue_request(rq);
+               goto insert;
+       default:
                *cookie = BLK_QC_T_NONE;
-               blk_mq_end_request(rq, -EIO);
+               blk_mq_end_request(rq, ret);
                return;
        }
 
-       __blk_mq_requeue_request(rq);
 insert:
        blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep);
 }
@@ -1521,9 +1528,9 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 
                might_sleep();
 
-               srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
+               srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
                __blk_mq_try_issue_directly(hctx, rq, cookie, true);
-               srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+               srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
        }
 }
 
@@ -1541,7 +1548,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
        blk_queue_bounce(q, &bio);
 
-       blk_queue_split(q, &bio, q->bio_split);
+       blk_queue_split(q, &bio);
 
        if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
                bio_io_error(bio);
@@ -1559,9 +1566,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
        trace_block_getrq(q, bio, bio->bi_opf);
 
-       rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
+       rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
        if (unlikely(!rq)) {
                __wbt_done(q->rq_wb, wb_acct);
+               if (bio->bi_opf & REQ_NOWAIT)
+                       bio_wouldblock_error(bio);
                return BLK_QC_T_NONE;
        }
 
@@ -1639,11 +1648,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
                blk_mq_put_ctx(data.ctx);
                blk_mq_bio_to_request(rq, bio);
                blk_mq_sched_insert_request(rq, false, true, true, true);
-       } else if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
+       } else {
                blk_mq_put_ctx(data.ctx);
+               blk_mq_bio_to_request(rq, bio);
+               blk_mq_queue_io(data.hctx, data.ctx, rq);
                blk_mq_run_hw_queue(data.hctx, true);
-       } else
-               blk_mq_put_ctx(data.ctx);
+       }
 
        return cookie;
 }
@@ -1866,7 +1876,7 @@ static void blk_mq_exit_hctx(struct request_queue *q,
                set->ops->exit_hctx(hctx, hctx_idx);
 
        if (hctx->flags & BLK_MQ_F_BLOCKING)
-               cleanup_srcu_struct(&hctx->queue_rq_srcu);
+               cleanup_srcu_struct(hctx->queue_rq_srcu);
 
        blk_mq_remove_cpuhp(hctx);
        blk_free_flush_queue(hctx->fq);
@@ -1900,7 +1910,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
        spin_lock_init(&hctx->lock);
        INIT_LIST_HEAD(&hctx->dispatch);
        hctx->queue = q;
-       hctx->queue_num = hctx_idx;
        hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
 
        cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
@@ -1939,7 +1948,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
                goto free_fq;
 
        if (hctx->flags & BLK_MQ_F_BLOCKING)
-               init_srcu_struct(&hctx->queue_rq_srcu);
+               init_srcu_struct(hctx->queue_rq_srcu);
 
        blk_mq_debugfs_register_hctx(q, hctx);
 
@@ -2103,20 +2112,30 @@ static void blk_mq_map_swqueue(struct request_queue *q,
        }
 }
 
+/*
+ * Caller needs to ensure that we're either frozen/quiesced, or that
+ * the queue isn't live yet.
+ */
 static void queue_set_hctx_shared(struct request_queue *q, bool shared)
 {
        struct blk_mq_hw_ctx *hctx;
        int i;
 
        queue_for_each_hw_ctx(q, hctx, i) {
-               if (shared)
+               if (shared) {
+                       if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+                               atomic_inc(&q->shared_hctx_restart);
                        hctx->flags |= BLK_MQ_F_TAG_SHARED;
-               else
+               } else {
+                       if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+                               atomic_dec(&q->shared_hctx_restart);
                        hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
+               }
        }
 }
 
-static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
+static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
+                                       bool shared)
 {
        struct request_queue *q;
 
@@ -2214,6 +2233,20 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 }
 EXPORT_SYMBOL(blk_mq_init_queue);
 
+static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
+{
+       int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
+
+       BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, queue_rq_srcu),
+                          __alignof__(struct blk_mq_hw_ctx)) !=
+                    sizeof(struct blk_mq_hw_ctx));
+
+       if (tag_set->flags & BLK_MQ_F_BLOCKING)
+               hw_ctx_size += sizeof(struct srcu_struct);
+
+       return hw_ctx_size;
+}
+
 static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
                                                struct request_queue *q)
 {
@@ -2228,7 +2261,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
                        continue;
 
                node = blk_mq_hw_queue_to_node(q->mq_map, i);
-               hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
+               hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set),
                                        GFP_KERNEL, node);
                if (!hctxs[i])
                        break;
index cc67b48..1a06fdf 100644 (file)
@@ -128,17 +128,6 @@ static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data
        return data->hctx->tags;
 }
 
-/*
- * Internal helpers for request allocation/init/free
- */
-void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
-                       struct request *rq, unsigned int op);
-void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
-                               struct request *rq);
-void blk_mq_finish_request(struct request *rq);
-struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
-                                       unsigned int op);
-
 static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
 {
        return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
index 4fa81ed..be1f115 100644 (file)
@@ -172,11 +172,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
        q->nr_batching = BLK_BATCH_REQ;
 
        blk_set_default_limits(&q->limits);
-
-       /*
-        * by default assume old behaviour and bounce for any highmem page
-        */
-       blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 }
 EXPORT_SYMBOL(blk_queue_make_request);
 
index 283da7f..27aceab 100644 (file)
@@ -777,24 +777,25 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
 }
 
 /**
- * blk_release_queue: - release a &struct request_queue when it is no longer needed
- * @kobj:    the kobj belonging to the request queue to be released
+ * __blk_release_queue - release a request queue when it is no longer needed
+ * @work: pointer to the release_work member of the request queue to be released
  *
  * Description:
- *     blk_release_queue is the pair to blk_init_queue() or
- *     blk_queue_make_request().  It should be called when a request queue is
- *     being released; typically when a block device is being de-registered.
- *     Currently, its primary task it to free all the &struct request
- *     structures that were allocated to the queue and the queue itself.
+ *     blk_release_queue is the counterpart of blk_init_queue(). It should be
+ *     called when a request queue is being released; typically when a block
+ *     device is being de-registered. Its primary task it to free the queue
+ *     itself.
  *
- * Note:
+ * Notes:
  *     The low level driver must have finished any outstanding requests first
  *     via blk_cleanup_queue().
- **/
-static void blk_release_queue(struct kobject *kobj)
+ *
+ *     Although blk_release_queue() may be called with preemption disabled,
+ *     __blk_release_queue() may sleep.
+ */
+static void __blk_release_queue(struct work_struct *work)
 {
-       struct request_queue *q =
-               container_of(kobj, struct request_queue, kobj);
+       struct request_queue *q = container_of(work, typeof(*q), release_work);
 
        if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
                blk_stat_remove_callback(q, q->poll_cb);
@@ -834,6 +835,15 @@ static void blk_release_queue(struct kobject *kobj)
        call_rcu(&q->rcu_head, blk_free_queue_rcu);
 }
 
+static void blk_release_queue(struct kobject *kobj)
+{
+       struct request_queue *q =
+               container_of(kobj, struct request_queue, kobj);
+
+       INIT_WORK(&q->release_work, __blk_release_queue);
+       schedule_work(&q->release_work);
+}
+
 static const struct sysfs_ops queue_sysfs_ops = {
        .show   = queue_attr_show,
        .store  = queue_attr_store,
index 07cc329..2290f65 100644 (file)
@@ -258,15 +258,14 @@ EXPORT_SYMBOL(blk_queue_resize_tags);
  *    all transfers have been done for a request. It's important to call
  *    this function before end_that_request_last(), as that will put the
  *    request back on the free list thus corrupting the internal tag list.
- *
- *  Notes:
- *   queue lock must be held.
  **/
 void blk_queue_end_tag(struct request_queue *q, struct request *rq)
 {
        struct blk_queue_tag *bqt = q->queue_tags;
        unsigned tag = rq->tag; /* negative tags invalid */
 
+       lockdep_assert_held(q->queue_lock);
+
        BUG_ON(tag >= bqt->real_max_depth);
 
        list_del_init(&rq->queuelist);
@@ -307,9 +306,6 @@ EXPORT_SYMBOL(blk_queue_end_tag);
  *    calling this function.  The request will also be removed from
  *    the request queue, so it's the drivers responsibility to readd
  *    it if it should need to be restarted for some reason.
- *
- *  Notes:
- *   queue lock must be held.
  **/
 int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 {
@@ -317,6 +313,8 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
        unsigned max_depth;
        int tag;
 
+       lockdep_assert_held(q->queue_lock);
+
        if (unlikely((rq->rq_flags & RQF_QUEUED))) {
                printk(KERN_ERR
                       "%s: request %p for device [%s] already tagged %d",
@@ -389,14 +387,13 @@ EXPORT_SYMBOL(blk_queue_start_tag);
  *   Hardware conditions may dictate a need to stop all pending requests.
  *   In this case, we will safely clear the block side of the tag queue and
  *   readd all requests to the request queue in the right order.
- *
- *  Notes:
- *   queue lock must be held.
  **/
 void blk_queue_invalidate_tags(struct request_queue *q)
 {
        struct list_head *tmp, *n;
 
+       lockdep_assert_held(q->queue_lock);
+
        list_for_each_safe(tmp, n, &q->tag_busy_list)
                blk_requeue_request(q, list_entry_rq(tmp));
 }
index cbff183..17ec83b 100644 (file)
@@ -189,13 +189,15 @@ unsigned long blk_rq_timeout(unsigned long timeout)
  * Notes:
  *    Each request has its own timer, and as it is added to the queue, we
  *    set up the timer. When the request completes, we cancel the timer.
- *    Queue lock must be held for the non-mq case, mq case doesn't care.
  */
 void blk_add_timer(struct request *req)
 {
        struct request_queue *q = req->q;
        unsigned long expiry;
 
+       if (!q->mq_ops)
+               lockdep_assert_held(q->queue_lock);
+
        /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
        if (!q->mq_ops && !q->rq_timed_out_fn)
                return;
index 83c8e11..01ebb81 100644 (file)
@@ -143,6 +143,8 @@ static inline struct request *__elv_next_request(struct request_queue *q)
        struct request *rq;
        struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
 
+       WARN_ON_ONCE(q->mq_ops);
+
        while (1) {
                if (!list_empty(&q->queue_head)) {
                        rq = list_entry_rq(q->queue_head.next);
@@ -334,4 +336,17 @@ static inline void blk_throtl_bio_endio(struct bio *bio) { }
 static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
 #endif
 
+#ifdef CONFIG_BOUNCE
+extern int init_emergency_isa_pool(void);
+extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
+#else
+static inline int init_emergency_isa_pool(void)
+{
+       return 0;
+}
+static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
+{
+}
+#endif /* CONFIG_BOUNCE */
+
 #endif /* BLK_INTERNAL_H */
index 1cb5dd3..5793c2d 100644 (file)
 #include <asm/tlbflush.h>
 
 #include <trace/events/block.h>
+#include "blk.h"
 
 #define POOL_SIZE      64
 #define ISA_POOL_SIZE  16
 
+static struct bio_set *bounce_bio_set, *bounce_bio_split;
 static mempool_t *page_pool, *isa_page_pool;
 
 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_NEED_BOUNCE_POOL)
@@ -40,6 +42,14 @@ static __init int init_emergency_pool(void)
        BUG_ON(!page_pool);
        pr_info("pool size: %d pages\n", POOL_SIZE);
 
+       bounce_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+       BUG_ON(!bounce_bio_set);
+       if (bioset_integrity_create(bounce_bio_set, BIO_POOL_SIZE))
+               BUG_ON(1);
+
+       bounce_bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
+       BUG_ON(!bounce_bio_split);
+
        return 0;
 }
 
@@ -143,7 +153,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool)
                mempool_free(bvec->bv_page, pool);
        }
 
-       bio_orig->bi_error = bio->bi_error;
+       bio_orig->bi_status = bio->bi_status;
        bio_endio(bio_orig);
        bio_put(bio);
 }
@@ -163,7 +173,7 @@ static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
 {
        struct bio *bio_orig = bio->bi_private;
 
-       if (!bio->bi_error)
+       if (!bio->bi_status)
                copy_to_high_bio_irq(bio_orig, bio);
 
        bounce_end_io(bio, pool);
@@ -186,20 +196,31 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
        int rw = bio_data_dir(*bio_orig);
        struct bio_vec *to, from;
        struct bvec_iter iter;
-       unsigned i;
-
-       bio_for_each_segment(from, *bio_orig, iter)
-               if (page_to_pfn(from.bv_page) > queue_bounce_pfn(q))
-                       goto bounce;
+       unsigned i = 0;
+       bool bounce = false;
+       int sectors = 0;
+
+       bio_for_each_segment(from, *bio_orig, iter) {
+               if (i++ < BIO_MAX_PAGES)
+                       sectors += from.bv_len >> 9;
+               if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn)
+                       bounce = true;
+       }
+       if (!bounce)
+               return;
 
-       return;
-bounce:
-       bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set);
+       if (sectors < bio_sectors(*bio_orig)) {
+               bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
+               bio_chain(bio, *bio_orig);
+               generic_make_request(*bio_orig);
+               *bio_orig = bio;
+       }
+       bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
 
        bio_for_each_segment_all(to, bio, i) {
                struct page *page = to->bv_page;
 
-               if (page_to_pfn(page) <= queue_bounce_pfn(q))
+               if (page_to_pfn(page) <= q->limits.bounce_pfn)
                        continue;
 
                to->bv_page = mempool_alloc(pool, q->bounce_gfp);
@@ -251,7 +272,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
         * don't waste time iterating over bio segments
         */
        if (!(q->bounce_gfp & GFP_DMA)) {
-               if (queue_bounce_pfn(q) >= blk_max_pfn)
+               if (q->limits.bounce_pfn >= blk_max_pfn)
                        return;
                pool = page_pool;
        } else {
@@ -264,5 +285,3 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
         */
        __blk_queue_bounce(q, bio_orig, pool);
 }
-
-EXPORT_SYMBOL(blk_queue_bounce);
index 0a23dbb..c4513b2 100644 (file)
@@ -37,7 +37,7 @@ static void bsg_destroy_job(struct kref *kref)
        struct bsg_job *job = container_of(kref, struct bsg_job, kref);
        struct request *rq = job->req;
 
-       blk_end_request_all(rq, scsi_req(rq)->result);
+       blk_end_request_all(rq, BLK_STS_OK);
 
        put_device(job->dev);   /* release reference for the request */
 
@@ -202,7 +202,7 @@ static void bsg_request_fn(struct request_queue *q)
                ret = bsg_create_job(dev, req);
                if (ret) {
                        scsi_req(req)->result = ret;
-                       blk_end_request_all(req, ret);
+                       blk_end_request_all(req, BLK_STS_OK);
                        spin_lock_irq(q->queue_lock);
                        continue;
                }
@@ -246,6 +246,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name,
        q->bsg_job_size = dd_job_size;
        q->bsg_job_fn = job_fn;
        queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+       queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
        blk_queue_softirq_done(q, bsg_softirq_done);
        blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 
index 6fd0854..37663b6 100644 (file)
@@ -236,7 +236,6 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
        rq = blk_get_request(q, op, GFP_KERNEL);
        if (IS_ERR(rq))
                return rq;
-       scsi_req_init(rq);
 
        ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
        if (ret)
@@ -294,14 +293,14 @@ out:
  * async completion call-back from the block layer, when scsi/ide/whatever
  * calls end_that_request_last() on a request
  */
-static void bsg_rq_end_io(struct request *rq, int uptodate)
+static void bsg_rq_end_io(struct request *rq, blk_status_t status)
 {
        struct bsg_command *bc = rq->end_io_data;
        struct bsg_device *bd = bc->bd;
        unsigned long flags;
 
-       dprintk("%s: finished rq %p bc %p, bio %p stat %d\n",
-               bd->name, rq, bc, bc->bio, uptodate);
+       dprintk("%s: finished rq %p bc %p, bio %p\n",
+               bd->name, rq, bc, bc->bio);
 
        bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration);
 
@@ -750,6 +749,12 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
 #ifdef BSG_DEBUG
        unsigned char buf[32];
 #endif
+
+       if (!blk_queue_scsi_passthrough(rq)) {
+               WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
+               return ERR_PTR(-EINVAL);
+       }
+
        if (!blk_get_queue(rq))
                return ERR_PTR(-ENXIO);
 
index b7e9c7f..3d5c289 100644 (file)
@@ -982,15 +982,6 @@ static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
        return min_vdisktime;
 }
 
-static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
-{
-       s64 delta = (s64)(vdisktime - min_vdisktime);
-       if (delta < 0)
-               min_vdisktime = vdisktime;
-
-       return min_vdisktime;
-}
-
 static void update_min_vdisktime(struct cfq_rb_root *st)
 {
        struct cfq_group *cfqg;
index dac99fb..4bb2f0c 100644 (file)
@@ -681,6 +681,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
                 */
                if (elv_attempt_insert_merge(q, rq))
                        break;
+               /* fall through */
        case ELEVATOR_INSERT_SORT:
                BUG_ON(blk_rq_is_passthrough(rq));
                rq->rq_flags |= RQF_SORTED;
index d252d29..7f520fa 100644 (file)
@@ -36,7 +36,7 @@ struct kobject *block_depr;
 static DEFINE_SPINLOCK(ext_devt_lock);
 static DEFINE_IDR(ext_devt_idr);
 
-static struct device_type disk_type;
+static const struct device_type disk_type;
 
 static void disk_check_events(struct disk_events *ev,
                              unsigned int *clearing_ptr);
@@ -1183,7 +1183,7 @@ static char *block_devnode(struct device *dev, umode_t *mode,
        return NULL;
 }
 
-static struct device_type disk_type = {
+static const struct device_type disk_type = {
        .name           = "disk",
        .groups         = disk_attr_groups,
        .release        = disk_release,
index 4b120c9..6f5d0b6 100644 (file)
@@ -75,7 +75,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
                case IOPRIO_CLASS_RT:
                        if (!capable(CAP_SYS_ADMIN))
                                return -EPERM;
-                       /* fall through, rt has prio field too */
+                       /* fall through */
+                       /* rt has prio field too */
                case IOPRIO_CLASS_BE:
                        if (data >= IOPRIO_BE_NR || data < 0)
                                return -EINVAL;
index b9faabc..a9f6fd3 100644 (file)
@@ -426,33 +426,29 @@ static void rq_clear_domain_token(struct kyber_queue_data *kqd,
        }
 }
 
-static struct request *kyber_get_request(struct request_queue *q,
-                                        unsigned int op,
-                                        struct blk_mq_alloc_data *data)
+static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
 {
-       struct kyber_queue_data *kqd = q->elevator->elevator_data;
-       struct request *rq;
-
        /*
         * We use the scheduler tags as per-hardware queue queueing tokens.
         * Async requests can be limited at this stage.
         */
-       if (!op_is_sync(op))
+       if (!op_is_sync(op)) {
+               struct kyber_queue_data *kqd = data->q->elevator->elevator_data;
+
                data->shallow_depth = kqd->async_depth;
+       }
+}
 
-       rq = __blk_mq_alloc_request(data, op);
-       if (rq)
-               rq_set_domain_token(rq, -1);
-       return rq;
+static void kyber_prepare_request(struct request *rq, struct bio *bio)
+{
+       rq_set_domain_token(rq, -1);
 }
 
-static void kyber_put_request(struct request *rq)
+static void kyber_finish_request(struct request *rq)
 {
-       struct request_queue *q = rq->q;
-       struct kyber_queue_data *kqd = q->elevator->elevator_data;
+       struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
 
        rq_clear_domain_token(kqd, rq);
-       blk_mq_finish_request(rq);
 }
 
 static void kyber_completed_request(struct request *rq)
@@ -815,8 +811,9 @@ static struct elevator_type kyber_sched = {
                .exit_sched = kyber_exit_sched,
                .init_hctx = kyber_init_hctx,
                .exit_hctx = kyber_exit_hctx,
-               .get_request = kyber_get_request,
-               .put_request = kyber_put_request,
+               .limit_depth = kyber_limit_depth,
+               .prepare_request = kyber_prepare_request,
+               .finish_request = kyber_finish_request,
                .completed_request = kyber_completed_request,
                .dispatch_request = kyber_dispatch_request,
                .has_work = kyber_has_work,
index edcea70..2a365c7 100644 (file)
@@ -115,7 +115,7 @@ static bool ldm_parse_privhead(const u8 *data, struct privhead *ph)
                ldm_error("PRIVHEAD disk size doesn't match real disk size");
                return false;
        }
-       if (uuid_be_to_bin(data + 0x0030, (uuid_be *)ph->disk_id)) {
+       if (uuid_parse(data + 0x0030, &ph->disk_id)) {
                ldm_error("PRIVHEAD contains an invalid GUID.");
                return false;
        }
@@ -234,7 +234,7 @@ static bool ldm_compare_privheads (const struct privhead *ph1,
                (ph1->logical_disk_size  == ph2->logical_disk_size)     &&
                (ph1->config_start       == ph2->config_start)          &&
                (ph1->config_size        == ph2->config_size)           &&
-               !memcmp (ph1->disk_id, ph2->disk_id, GUID_SIZE));
+               uuid_equal(&ph1->disk_id, &ph2->disk_id));
 }
 
 /**
@@ -557,7 +557,7 @@ static struct vblk * ldm_get_disk_objid (const struct ldmdb *ldb)
 
        list_for_each (item, &ldb->v_disk) {
                struct vblk *v = list_entry (item, struct vblk, list);
-               if (!memcmp (v->vblk.disk.disk_id, ldb->ph.disk_id, GUID_SIZE))
+               if (uuid_equal(&v->vblk.disk.disk_id, &ldb->ph.disk_id))
                        return v;
        }
 
@@ -892,7 +892,7 @@ static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb)
        disk = &vb->vblk.disk;
        ldm_get_vstr (buffer + 0x18 + r_diskid, disk->alt_name,
                sizeof (disk->alt_name));
-       if (uuid_be_to_bin(buffer + 0x19 + r_name, (uuid_be *)disk->disk_id))
+       if (uuid_parse(buffer + 0x19 + r_name, &disk->disk_id))
                return false;
 
        return true;
@@ -927,7 +927,7 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb)
                return false;
 
        disk = &vb->vblk.disk;
-       memcpy (disk->disk_id, buffer + 0x18 + r_name, GUID_SIZE);
+       uuid_copy(&disk->disk_id, (uuid_t *)(buffer + 0x18 + r_name));
        return true;
 }
 
index 374242c..f4c6055 100644 (file)
@@ -112,8 +112,6 @@ struct frag {                               /* VBLK Fragment handling */
 
 /* In memory LDM database structures. */
 
-#define GUID_SIZE              16
-
 struct privhead {                      /* Offsets and sizes are in sectors. */
        u16     ver_major;
        u16     ver_minor;
@@ -121,7 +119,7 @@ struct privhead {                   /* Offsets and sizes are in sectors. */
        u64     logical_disk_size;
        u64     config_start;
        u64     config_size;
-       u8      disk_id[GUID_SIZE];
+       uuid_t  disk_id;
 };
 
 struct tocblock {                      /* We have exactly two bitmaps. */
@@ -154,7 +152,7 @@ struct vblk_dgrp {                  /* VBLK Disk Group */
 };
 
 struct vblk_disk {                     /* VBLK Disk */
-       u8      disk_id[GUID_SIZE];
+       uuid_t  disk_id;
        u8      alt_name[128];
 };
 
index 4a294a5..7440de4 100644 (file)
@@ -326,7 +326,6 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
        if (IS_ERR(rq))
                return PTR_ERR(rq);
        req = scsi_req(rq);
-       scsi_req_init(rq);
 
        if (hdr->cmd_len > BLK_MAX_CDB) {
                req->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL);
@@ -456,7 +455,6 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
                goto error_free_buffer;
        }
        req = scsi_req(rq);
-       scsi_req_init(rq);
 
        cmdlen = COMMAND_SIZE(opcode);
 
@@ -542,7 +540,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
        rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM);
        if (IS_ERR(rq))
                return PTR_ERR(rq);
-       scsi_req_init(rq);
        rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
        scsi_req(rq)->cmd[0] = cmd;
        scsi_req(rq)->cmd[4] = data;
@@ -744,10 +741,14 @@ int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode,
 }
 EXPORT_SYMBOL(scsi_cmd_blk_ioctl);
 
-void scsi_req_init(struct request *rq)
+/**
+ * scsi_req_init - initialize certain fields of a scsi_request structure
+ * @req: Pointer to a scsi_request structure.
+ * Initializes .__cmd[], .cmd, .cmd_len and .sense_len but no other members
+ * of struct scsi_request.
+ */
+void scsi_req_init(struct scsi_request *req)
 {
-       struct scsi_request *req = scsi_req(rq);
-
        memset(req->__cmd, 0, sizeof(req->__cmd));
        req->cmd = req->__cmd;
        req->cmd_len = BLK_MAX_CDB;
index 680c6d6..3416dad 100644 (file)
@@ -46,8 +46,8 @@ static __be16 t10_pi_ip_fn(void *data, unsigned int len)
  * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
  * tag.
  */
-static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
-                          unsigned int type)
+static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
+               csum_fn *fn, unsigned int type)
 {
        unsigned int i;
 
@@ -67,11 +67,11 @@ static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
                iter->seed++;
        }
 
-       return 0;
+       return BLK_STS_OK;
 }
 
-static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
-                               unsigned int type)
+static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
+               csum_fn *fn, unsigned int type)
 {
        unsigned int i;
 
@@ -91,7 +91,7 @@ static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
                                       "(rcvd %u)\n", iter->disk_name,
                                       (unsigned long long)
                                       iter->seed, be32_to_cpu(pi->ref_tag));
-                               return -EILSEQ;
+                               return BLK_STS_PROTECTION;
                        }
                        break;
                case 3:
@@ -108,7 +108,7 @@ static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
                               "(rcvd %04x, want %04x)\n", iter->disk_name,
                               (unsigned long long)iter->seed,
                               be16_to_cpu(pi->guard_tag), be16_to_cpu(csum));
-                       return -EILSEQ;
+                       return BLK_STS_PROTECTION;
                }
 
 next:
@@ -117,45 +117,45 @@ next:
                iter->seed++;
        }
 
-       return 0;
+       return BLK_STS_OK;
 }
 
-static int t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
 {
        return t10_pi_generate(iter, t10_pi_crc_fn, 1);
 }
 
-static int t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
 {
        return t10_pi_generate(iter, t10_pi_ip_fn, 1);
 }
 
-static int t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
 {
        return t10_pi_verify(iter, t10_pi_crc_fn, 1);
 }
 
-static int t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
 {
        return t10_pi_verify(iter, t10_pi_ip_fn, 1);
 }
 
-static int t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
 {
        return t10_pi_generate(iter, t10_pi_crc_fn, 3);
 }
 
-static int t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
 {
        return t10_pi_generate(iter, t10_pi_ip_fn, 3);
 }
 
-static int t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
 {
        return t10_pi_verify(iter, t10_pi_crc_fn, 3);
 }
 
-static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
 {
        return t10_pi_verify(iter, t10_pi_ip_fn, 3);
 }
index 502ea4d..560fdae 100644 (file)
@@ -141,9 +141,9 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
        int     cpu = mce->extcpu;
        struct acpi_hest_generic_status *estatus, *tmp;
        struct acpi_hest_generic_data *gdata;
-       const uuid_le *fru_id = &NULL_UUID_LE;
+       const guid_t *fru_id = &guid_null;
        char *fru_text = "";
-       uuid_le *sec_type;
+       guid_t *sec_type;
        static u32 err_seq;
 
        estatus = extlog_elog_entry_check(cpu, bank);
@@ -165,11 +165,11 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
        err_seq++;
        gdata = (struct acpi_hest_generic_data *)(tmp + 1);
        if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
-               fru_id = (uuid_le *)gdata->fru_id;
+               fru_id = (guid_t *)gdata->fru_id;
        if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
                fru_text = gdata->fru_text;
-       sec_type = (uuid_le *)gdata->section_type;
-       if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
+       sec_type = (guid_t *)gdata->section_type;
+       if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
                struct cper_sec_mem_err *mem = (void *)(gdata + 1);
                if (gdata->error_data_length >= sizeof(*mem))
                        trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
@@ -182,17 +182,17 @@ out:
 
 static bool __init extlog_get_l1addr(void)
 {
-       u8 uuid[16];
+       guid_t guid;
        acpi_handle handle;
        union acpi_object *obj;
 
-       acpi_str_to_uuid(extlog_dsm_uuid, uuid);
-
+       if (guid_parse(extlog_dsm_uuid, &guid))
+               return false;
        if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
                return false;
-       if (!acpi_check_dsm(handle, uuid, EXTLOG_DSM_REV, 1 << EXTLOG_FN_ADDR))
+       if (!acpi_check_dsm(handle, &guid, EXTLOG_DSM_REV, 1 << EXTLOG_FN_ADDR))
                return false;
-       obj = acpi_evaluate_dsm_typed(handle, uuid, EXTLOG_DSM_REV,
+       obj = acpi_evaluate_dsm_typed(handle, &guid, EXTLOG_DSM_REV,
                                      EXTLOG_FN_ADDR, NULL, ACPI_TYPE_INTEGER);
        if (!obj) {
                return false;
index 7abe665..0d2e989 100644 (file)
@@ -416,9 +416,18 @@ acpi_tb_get_table(struct acpi_table_desc *table_desc,
                }
        }
 
-       table_desc->validation_count++;
-       if (table_desc->validation_count == 0) {
-               table_desc->validation_count--;
+       if (table_desc->validation_count < ACPI_MAX_TABLE_VALIDATIONS) {
+               table_desc->validation_count++;
+
+               /*
+                * Detect validation_count overflows to ensure that the warning
+                * message will only be printed once.
+                */
+               if (table_desc->validation_count >= ACPI_MAX_TABLE_VALIDATIONS) {
+                       ACPI_WARNING((AE_INFO,
+                                     "Table %p, Validation count overflows\n",
+                                     table_desc));
+               }
        }
 
        *out_table = table_desc->pointer;
@@ -445,13 +454,20 @@ void acpi_tb_put_table(struct acpi_table_desc *table_desc)
 
        ACPI_FUNCTION_TRACE(acpi_tb_put_table);
 
-       if (table_desc->validation_count == 0) {
-               ACPI_WARNING((AE_INFO,
-                             "Table %p, Validation count is zero before decrement\n",
-                             table_desc));
-               return_VOID;
+       if (table_desc->validation_count < ACPI_MAX_TABLE_VALIDATIONS) {
+               table_desc->validation_count--;
+
+               /*
+                * Detect validation_count underflows to ensure that the warning
+                * message will only be printed once.
+                */
+               if (table_desc->validation_count >= ACPI_MAX_TABLE_VALIDATIONS) {
+                       ACPI_WARNING((AE_INFO,
+                                     "Table %p, Validation count underflows\n",
+                                     table_desc));
+                       return_VOID;
+               }
        }
-       table_desc->validation_count--;
 
        if (table_desc->validation_count == 0) {
 
index e0587c8..ff096d9 100644 (file)
@@ -474,15 +474,6 @@ acpi_ut_walk_aml_resources(struct acpi_walk_state *walk_state,
                                return_ACPI_STATUS(AE_AML_NO_RESOURCE_END_TAG);
                        }
 
-                       /*
-                        * The end_tag opcode must be followed by a zero byte.
-                        * Although this byte is technically defined to be a checksum,
-                        * in practice, all ASL compilers set this byte to zero.
-                        */
-                       if (*(aml + 1) != 0) {
-                               return_ACPI_STATUS(AE_AML_NO_RESOURCE_END_TAG);
-                       }
-
                        /* Return the pointer to the end_tag if requested */
 
                        if (!user_function) {
index d0855c0..980515e 100644 (file)
@@ -431,12 +431,13 @@ static void ghes_do_proc(struct ghes *ghes,
 {
        int sev, sec_sev;
        struct acpi_hest_generic_data *gdata;
+       guid_t *sec_type;
 
        sev = ghes_severity(estatus->error_severity);
        apei_estatus_for_each_section(estatus, gdata) {
+               sec_type = (guid_t *)gdata->section_type;
                sec_sev = ghes_severity(gdata->error_severity);
-               if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
-                                CPER_SEC_PLATFORM_MEM)) {
+               if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
                        struct cper_sec_mem_err *mem_err;
                        mem_err = (struct cper_sec_mem_err *)(gdata+1);
                        ghes_edac_report_mem_error(ghes, sev, mem_err);
@@ -445,8 +446,7 @@ static void ghes_do_proc(struct ghes *ghes,
                        ghes_handle_memory_failure(gdata, sev);
                }
 #ifdef CONFIG_ACPI_APEI_PCIEAER
-               else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
-                                     CPER_SEC_PCIE)) {
+               else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
                        struct cper_sec_pcie *pcie_err;
                        pcie_err = (struct cper_sec_pcie *)(gdata+1);
                        if (sev == GHES_SEV_RECOVERABLE &&
index 784bda6..5a6fbe0 100644 (file)
@@ -196,42 +196,19 @@ static void acpi_print_osc_error(acpi_handle handle,
        pr_debug("\n");
 }
 
-acpi_status acpi_str_to_uuid(char *str, u8 *uuid)
-{
-       int i;
-       static int opc_map_to_uuid[16] = {6, 4, 2, 0, 11, 9, 16, 14, 19, 21,
-               24, 26, 28, 30, 32, 34};
-
-       if (strlen(str) != 36)
-               return AE_BAD_PARAMETER;
-       for (i = 0; i < 36; i++) {
-               if (i == 8 || i == 13 || i == 18 || i == 23) {
-                       if (str[i] != '-')
-                               return AE_BAD_PARAMETER;
-               } else if (!isxdigit(str[i]))
-                       return AE_BAD_PARAMETER;
-       }
-       for (i = 0; i < 16; i++) {
-               uuid[i] = hex_to_bin(str[opc_map_to_uuid[i]]) << 4;
-               uuid[i] |= hex_to_bin(str[opc_map_to_uuid[i] + 1]);
-       }
-       return AE_OK;
-}
-EXPORT_SYMBOL_GPL(acpi_str_to_uuid);
-
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context)
 {
        acpi_status status;
        struct acpi_object_list input;
        union acpi_object in_params[4];
        union acpi_object *out_obj;
-       u8 uuid[16];
+       guid_t guid;
        u32 errors;
        struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
 
        if (!context)
                return AE_ERROR;
-       if (ACPI_FAILURE(acpi_str_to_uuid(context->uuid_str, uuid)))
+       if (guid_parse(context->uuid_str, &guid))
                return AE_ERROR;
        context->ret.length = ACPI_ALLOCATE_BUFFER;
        context->ret.pointer = NULL;
@@ -241,7 +218,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context)
        input.pointer = in_params;
        in_params[0].type               = ACPI_TYPE_BUFFER;
        in_params[0].buffer.length      = 16;
-       in_params[0].buffer.pointer     = uuid;
+       in_params[0].buffer.pointer     = (u8 *)&guid;
        in_params[1].type               = ACPI_TYPE_INTEGER;
        in_params[1].integer.value      = context->rev;
        in_params[2].type               = ACPI_TYPE_INTEGER;
index 656acb5..097eff0 100644 (file)
@@ -74,11 +74,11 @@ struct nfit_table_prev {
        struct list_head flushes;
 };
 
-static u8 nfit_uuid[NFIT_UUID_MAX][16];
+static guid_t nfit_uuid[NFIT_UUID_MAX];
 
-const u8 *to_nfit_uuid(enum nfit_uuids id)
+const guid_t *to_nfit_uuid(enum nfit_uuids id)
 {
-       return nfit_uuid[id];
+       return &nfit_uuid[id];
 }
 EXPORT_SYMBOL(to_nfit_uuid);
 
@@ -222,7 +222,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
        u32 offset, fw_status = 0;
        acpi_handle handle;
        unsigned int func;
-       const u8 *uuid;
+       const guid_t *guid;
        int rc, i;
 
        func = cmd;
@@ -245,7 +245,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
                cmd_mask = nvdimm_cmd_mask(nvdimm);
                dsm_mask = nfit_mem->dsm_mask;
                desc = nd_cmd_dimm_desc(cmd);
-               uuid = to_nfit_uuid(nfit_mem->family);
+               guid = to_nfit_uuid(nfit_mem->family);
                handle = adev->handle;
        } else {
                struct acpi_device *adev = to_acpi_dev(acpi_desc);
@@ -254,7 +254,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
                cmd_mask = nd_desc->cmd_mask;
                dsm_mask = cmd_mask;
                desc = nd_cmd_bus_desc(cmd);
-               uuid = to_nfit_uuid(NFIT_DEV_BUS);
+               guid = to_nfit_uuid(NFIT_DEV_BUS);
                handle = adev->handle;
                dimm_name = "bus";
        }
@@ -289,7 +289,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
                        in_buf.buffer.pointer,
                        min_t(u32, 256, in_buf.buffer.length), true);
 
-       out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj);
+       out_obj = acpi_evaluate_dsm(handle, guid, 1, func, &in_obj);
        if (!out_obj) {
                dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
                                cmd_name);
@@ -409,7 +409,7 @@ int nfit_spa_type(struct acpi_nfit_system_address *spa)
        int i;
 
        for (i = 0; i < NFIT_UUID_MAX; i++)
-               if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0)
+               if (guid_equal(to_nfit_uuid(i), (guid_t *)&spa->range_guid))
                        return i;
        return -1;
 }
@@ -1415,7 +1415,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
        struct acpi_device *adev, *adev_dimm;
        struct device *dev = acpi_desc->dev;
        unsigned long dsm_mask;
-       const u8 *uuid;
+       const guid_t *guid;
        int i;
        int family = -1;
 
@@ -1444,7 +1444,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
        /*
         * Until standardization materializes we need to consider 4
         * different command sets.  Note, that checking for function0 (bit0)
-        * tells us if any commands are reachable through this uuid.
+        * tells us if any commands are reachable through this GUID.
         */
        for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++)
                if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
@@ -1474,9 +1474,9 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
                return 0;
        }
 
-       uuid = to_nfit_uuid(nfit_mem->family);
+       guid = to_nfit_uuid(nfit_mem->family);
        for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
-               if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
+               if (acpi_check_dsm(adev_dimm->handle, guid, 1, 1ULL << i))
                        set_bit(i, &nfit_mem->dsm_mask);
 
        return 0;
@@ -1611,7 +1611,7 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
 {
        struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
-       const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
+       const guid_t *guid = to_nfit_uuid(NFIT_DEV_BUS);
        struct acpi_device *adev;
        int i;
 
@@ -1621,7 +1621,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
                return;
 
        for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
-               if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
+               if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i))
                        set_bit(i, &nd_desc->cmd_mask);
 }
 
@@ -3051,19 +3051,19 @@ static __init int nfit_init(void)
        BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
        BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
 
-       acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]);
-       acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]);
-       acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]);
-       acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]);
-       acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]);
-       acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]);
-       acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]);
-       acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
-       acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
-       acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
-       acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
-       acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
-       acpi_str_to_uuid(UUID_NFIT_DIMM_N_MSFT, nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
+       guid_parse(UUID_VOLATILE_MEMORY, &nfit_uuid[NFIT_SPA_VOLATILE]);
+       guid_parse(UUID_PERSISTENT_MEMORY, &nfit_uuid[NFIT_SPA_PM]);
+       guid_parse(UUID_CONTROL_REGION, &nfit_uuid[NFIT_SPA_DCR]);
+       guid_parse(UUID_DATA_REGION, &nfit_uuid[NFIT_SPA_BDW]);
+       guid_parse(UUID_VOLATILE_VIRTUAL_DISK, &nfit_uuid[NFIT_SPA_VDISK]);
+       guid_parse(UUID_VOLATILE_VIRTUAL_CD, &nfit_uuid[NFIT_SPA_VCD]);
+       guid_parse(UUID_PERSISTENT_VIRTUAL_DISK, &nfit_uuid[NFIT_SPA_PDISK]);
+       guid_parse(UUID_PERSISTENT_VIRTUAL_CD, &nfit_uuid[NFIT_SPA_PCD]);
+       guid_parse(UUID_NFIT_BUS, &nfit_uuid[NFIT_DEV_BUS]);
+       guid_parse(UUID_NFIT_DIMM, &nfit_uuid[NFIT_DEV_DIMM]);
+       guid_parse(UUID_NFIT_DIMM_N_HPE1, &nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
+       guid_parse(UUID_NFIT_DIMM_N_HPE2, &nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
+       guid_parse(UUID_NFIT_DIMM_N_MSFT, &nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
 
        nfit_wq = create_singlethread_workqueue("nfit");
        if (!nfit_wq)
index 58fb7d6..29bdd95 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/libnvdimm.h>
 #include <linux/ndctl.h>
 #include <linux/types.h>
-#include <linux/uuid.h>
 #include <linux/acpi.h>
 #include <acpi/acuuid.h>
 
@@ -237,7 +236,7 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
        return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
 }
 
-const u8 *to_nfit_uuid(enum nfit_uuids id);
+const guid_t *to_nfit_uuid(enum nfit_uuids id);
 int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
 void acpi_nfit_shutdown(void *data);
 void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event);
index 3a10d75..d531629 100644 (file)
@@ -1428,6 +1428,37 @@ static void acpi_init_coherency(struct acpi_device *adev)
        adev->flags.coherent_dma = cca;
 }
 
+static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data)
+{
+       bool *is_spi_i2c_slave_p = data;
+
+       if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
+               return 1;
+
+       /*
+        * devices that are connected to UART still need to be enumerated to
+        * platform bus
+        */
+       if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART)
+               *is_spi_i2c_slave_p = true;
+
+        /* no need to do more checking */
+       return -1;
+}
+
+static bool acpi_is_spi_i2c_slave(struct acpi_device *device)
+{
+       struct list_head resource_list;
+       bool is_spi_i2c_slave = false;
+
+       INIT_LIST_HEAD(&resource_list);
+       acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave,
+                              &is_spi_i2c_slave);
+       acpi_dev_free_resource_list(&resource_list);
+
+       return is_spi_i2c_slave;
+}
+
 void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
                             int type, unsigned long long sta)
 {
@@ -1443,6 +1474,7 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
        acpi_bus_get_flags(device);
        device->flags.match_driver = false;
        device->flags.initialized = true;
+       device->flags.spi_i2c_slave = acpi_is_spi_i2c_slave(device);
        acpi_device_clear_enumerated(device);
        device_initialize(&device->dev);
        dev_set_uevent_suppress(&device->dev, true);
@@ -1727,38 +1759,13 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used,
        return AE_OK;
 }
 
-static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data)
-{
-       bool *is_spi_i2c_slave_p = data;
-
-       if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
-               return 1;
-
-       /*
-        * devices that are connected to UART still need to be enumerated to
-        * platform bus
-        */
-       if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART)
-               *is_spi_i2c_slave_p = true;
-
-        /* no need to do more checking */
-       return -1;
-}
-
 static void acpi_default_enumeration(struct acpi_device *device)
 {
-       struct list_head resource_list;
-       bool is_spi_i2c_slave = false;
-
        /*
         * Do not enumerate SPI/I2C slaves as they will be enumerated by their
         * respective parents.
         */
-       INIT_LIST_HEAD(&resource_list);
-       acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave,
-                              &is_spi_i2c_slave);
-       acpi_dev_free_resource_list(&resource_list);
-       if (!is_spi_i2c_slave) {
+       if (!device->flags.spi_i2c_slave) {
                acpi_create_platform_device(device, NULL);
                acpi_device_set_enumerated(device);
        } else {
@@ -1854,7 +1861,7 @@ static void acpi_bus_attach(struct acpi_device *device)
                return;
 
        device->flags.match_driver = true;
-       if (ret > 0) {
+       if (ret > 0 && !device->flags.spi_i2c_slave) {
                acpi_device_set_enumerated(device);
                goto ok;
        }
@@ -1863,10 +1870,10 @@ static void acpi_bus_attach(struct acpi_device *device)
        if (ret < 0)
                return;
 
-       if (device->pnp.type.platform_id)
-               acpi_default_enumeration(device);
-       else
+       if (!device->pnp.type.platform_id && !device->flags.spi_i2c_slave)
                acpi_device_set_enumerated(device);
+       else
+               acpi_default_enumeration(device);
 
  ok:
        list_for_each_entry(child, &device->children, node)
index 27d0dcf..b9d956c 100644 (file)
@@ -613,19 +613,19 @@ acpi_status acpi_evaluate_lck(acpi_handle handle, int lock)
 /**
  * acpi_evaluate_dsm - evaluate device's _DSM method
  * @handle: ACPI device handle
- * @uuid: UUID of requested functions, should be 16 bytes
+ * @guid: GUID of requested functions, should be 16 bytes
  * @rev: revision number of requested function
  * @func: requested function number
  * @argv4: the function specific parameter
  *
- * Evaluate device's _DSM method with specified UUID, revision id and
+ * Evaluate device's _DSM method with specified GUID, revision id and
  * function number. Caller needs to free the returned object.
  *
  * Though ACPI defines the fourth parameter for _DSM should be a package,
  * some old BIOSes do expect a buffer or an integer etc.
  */
 union acpi_object *
-acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 func,
+acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 func,
                  union acpi_object *argv4)
 {
        acpi_status ret;
@@ -638,7 +638,7 @@ acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 func,
 
        params[0].type = ACPI_TYPE_BUFFER;
        params[0].buffer.length = 16;
-       params[0].buffer.pointer = (char *)uuid;
+       params[0].buffer.pointer = (u8 *)guid;
        params[1].type = ACPI_TYPE_INTEGER;
        params[1].integer.value = rev;
        params[2].type = ACPI_TYPE_INTEGER;
@@ -666,7 +666,7 @@ EXPORT_SYMBOL(acpi_evaluate_dsm);
 /**
  * acpi_check_dsm - check if _DSM method supports requested functions.
  * @handle: ACPI device handle
- * @uuid: UUID of requested functions, should be 16 bytes at least
+ * @guid: GUID of requested functions, should be 16 bytes at least
  * @rev: revision number of requested functions
  * @funcs: bitmap of requested functions
  *
@@ -674,7 +674,7 @@ EXPORT_SYMBOL(acpi_evaluate_dsm);
  * functions. Currently only support 64 functions at maximum, should be
  * enough for now.
  */
-bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
+bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs)
 {
        int i;
        u64 mask = 0;
@@ -683,7 +683,7 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
        if (funcs == 0)
                return false;
 
-       obj = acpi_evaluate_dsm(handle, uuid, rev, 0, NULL);
+       obj = acpi_evaluate_dsm(handle, guid, rev, 0, NULL);
        if (!obj)
                return false;
 
@@ -697,7 +697,7 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
 
        /*
         * Bit 0 indicates whether there's support for any functions other than
-        * function 0 for the specified UUID and revision.
+        * function 0 for the specified GUID and revision.
         */
        if ((mask & 0x1) && (mask & funcs) == funcs)
                return true;
index 26a51be..245a879 100644 (file)
@@ -3464,7 +3464,7 @@ static inline bool DAC960_ProcessCompletedRequest(DAC960_Command_T *Command,
                                                 bool SuccessfulIO)
 {
        struct request *Request = Command->Request;
-       int Error = SuccessfulIO ? 0 : -EIO;
+       blk_status_t Error = SuccessfulIO ? BLK_STS_OK : BLK_STS_IOERR;
 
        pci_unmap_sg(Command->Controller->PCIDevice, Command->cmd_sglist,
                Command->SegmentCount, Command->DmaDirection);
index a328f67..49908c7 100644 (file)
@@ -1378,7 +1378,7 @@ static void redo_fd_request(void)
        struct amiga_floppy_struct *floppy;
        char *data;
        unsigned long flags;
-       int err;
+       blk_status_t err;
 
 next_req:
        rq = set_next_request();
@@ -1392,7 +1392,7 @@ next_req:
 
 next_segment:
        /* Here someone could investigate to be more efficient */
-       for (cnt = 0, err = 0; cnt < blk_rq_cur_sectors(rq); cnt++) {
+       for (cnt = 0, err = BLK_STS_OK; cnt < blk_rq_cur_sectors(rq); cnt++) {
 #ifdef DEBUG
                printk("fd: sector %ld + %d requested for %s\n",
                       blk_rq_pos(rq), cnt,
@@ -1400,7 +1400,7 @@ next_segment:
 #endif
                block = blk_rq_pos(rq) + cnt;
                if ((int)block > floppy->blocks) {
-                       err = -EIO;
+                       err = BLK_STS_IOERR;
                        break;
                }
 
@@ -1413,7 +1413,7 @@ next_segment:
 #endif
 
                if (get_track(drive, track) == -1) {
-                       err = -EIO;
+                       err = BLK_STS_IOERR;
                        break;
                }
 
@@ -1424,7 +1424,7 @@ next_segment:
 
                        /* keep the drive spinning while writes are scheduled */
                        if (!fd_motor_on(drive)) {
-                               err = -EIO;
+                               err = BLK_STS_IOERR;
                                break;
                        }
                        /*
index 027b876..6797e6c 100644 (file)
@@ -388,6 +388,7 @@ aoeblk_gdalloc(void *vp)
                        d->aoemajor, d->aoeminor);
                goto err_mempool;
        }
+       blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 
        spin_lock_irqsave(&d->lock, flags);
        WARN_ON(!(d->flags & DEVFL_GD_NOW));
index 3c606c0..dc43254 100644 (file)
@@ -1070,8 +1070,8 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
                d->ip.rq = NULL;
        do {
                bio = rq->bio;
-               bok = !fastfail && !bio->bi_error;
-       } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size));
+               bok = !fastfail && !bio->bi_status;
+       } while (__blk_end_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
 
        /* cf. http://lkml.org/lkml/2006/10/31/28 */
        if (!fastfail)
@@ -1131,7 +1131,7 @@ ktiocomplete(struct frame *f)
                        ahout->cmdstat, ahin->cmdstat,
                        d->aoemajor, d->aoeminor);
 noskb:         if (buf)
-                       buf->bio->bi_error = -EIO;
+                       buf->bio->bi_status = BLK_STS_IOERR;
                goto out;
        }
 
@@ -1144,7 +1144,7 @@ noskb:            if (buf)
                                "aoe: runt data size in read from",
                                (long) d->aoemajor, d->aoeminor,
                               skb->len, n);
-                       buf->bio->bi_error = -EIO;
+                       buf->bio->bi_status = BLK_STS_IOERR;
                        break;
                }
                if (n > f->iter.bi_size) {
@@ -1152,7 +1152,7 @@ noskb:            if (buf)
                                "aoe: too-large data size in read from",
                                (long) d->aoemajor, d->aoeminor,
                                n, f->iter.bi_size);
-                       buf->bio->bi_error = -EIO;
+                       buf->bio->bi_status = BLK_STS_IOERR;
                        break;
                }
                bvcpy(skb, f->buf->bio, f->iter, n);
@@ -1654,7 +1654,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf)
        if (buf == NULL)
                return;
        buf->iter.bi_size = 0;
-       buf->bio->bi_error = -EIO;
+       buf->bio->bi_status = BLK_STS_IOERR;
        if (buf->nframesout == 0)
                aoe_end_buf(d, buf);
 }
index ffd1947..b28fefb 100644 (file)
@@ -170,7 +170,7 @@ aoe_failip(struct aoedev *d)
        if (rq == NULL)
                return;
        while ((bio = d->ip.nxbio)) {
-               bio->bi_error = -EIO;
+               bio->bi_status = BLK_STS_IOERR;
                d->ip.nxbio = bio->bi_next;
                n = (unsigned long) rq->special;
                rq->special = (void *) --n;
index fa69ecd..92da886 100644 (file)
@@ -378,7 +378,7 @@ static DEFINE_TIMER(readtrack_timer, fd_readtrack_check, 0, 0);
 static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0);
 static DEFINE_TIMER(fd_timer, check_change, 0, 0);
        
-static void fd_end_request_cur(int err)
+static void fd_end_request_cur(blk_status_t err)
 {
        if (!__blk_end_request_cur(fd_request, err))
                fd_request = NULL;
@@ -620,7 +620,7 @@ static void fd_error( void )
        fd_request->error_count++;
        if (fd_request->error_count >= MAX_ERRORS) {
                printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
-               fd_end_request_cur(-EIO);
+               fd_end_request_cur(BLK_STS_IOERR);
        }
        else if (fd_request->error_count == RECALIBRATE_ERRORS) {
                printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
@@ -739,7 +739,7 @@ static void do_fd_action( int drive )
                    }
                    else {
                        /* all sectors finished */
-                       fd_end_request_cur(0);
+                       fd_end_request_cur(BLK_STS_OK);
                        redo_fd_request();
                        return;
                    }
@@ -1144,7 +1144,7 @@ static void fd_rwsec_done1(int status)
        }
        else {
                /* all sectors finished */
-               fd_end_request_cur(0);
+               fd_end_request_cur(BLK_STS_OK);
                redo_fd_request();
        }
        return;
@@ -1445,7 +1445,7 @@ repeat:
        if (!UD.connected) {
                /* drive not connected */
                printk(KERN_ERR "Unknown Device: fd%d\n", drive );
-               fd_end_request_cur(-EIO);
+               fd_end_request_cur(BLK_STS_IOERR);
                goto repeat;
        }
                
@@ -1461,12 +1461,12 @@ repeat:
                /* user supplied disk type */
                if (--type >= NUM_DISK_MINORS) {
                        printk(KERN_WARNING "fd%d: invalid disk format", drive );
-                       fd_end_request_cur(-EIO);
+                       fd_end_request_cur(BLK_STS_IOERR);
                        goto repeat;
                }
                if (minor2disktype[type].drive_types > DriveType)  {
                        printk(KERN_WARNING "fd%d: unsupported disk format", drive );
-                       fd_end_request_cur(-EIO);
+                       fd_end_request_cur(BLK_STS_IOERR);
                        goto repeat;
                }
                type = minor2disktype[type].index;
@@ -1476,7 +1476,7 @@ repeat:
        }
        
        if (blk_rq_pos(fd_request) + 1 > UDT->blocks) {
-               fd_end_request_cur(-EIO);
+               fd_end_request_cur(BLK_STS_IOERR);
                goto repeat;
        }
 
index 57b574f..6112e99 100644 (file)
@@ -418,7 +418,6 @@ static struct brd_device *brd_alloc(int i)
 
        blk_queue_make_request(brd->brd_queue, brd_make_request);
        blk_queue_max_hw_sectors(brd->brd_queue, 1024);
-       blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
 
        /* This is so fdisk will align partitions on 4k, because of
         * direct_access API needing 4k alignment, returning a PFN
index cd37550..02a6119 100644 (file)
@@ -1864,7 +1864,8 @@ static void cciss_softirq_done(struct request *rq)
        /* set the residual count for pc requests */
        if (blk_rq_is_passthrough(rq))
                scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
-       blk_end_request_all(rq, scsi_req(rq)->result ? -EIO : 0);
+       blk_end_request_all(rq, scsi_req(rq)->result ?
+                       BLK_STS_IOERR : BLK_STS_OK);
 
        spin_lock_irqsave(&h->lock, flags);
        cmd_free(h, c);
@@ -1956,6 +1957,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
        disk->queue->cmd_size = sizeof(struct scsi_request);
        disk->queue->request_fn = do_cciss_request;
        disk->queue->queue_lock = &h->lock;
+       queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, disk->queue);
        if (blk_init_allocated_queue(disk->queue) < 0)
                goto cleanup_queue;
 
index 8d7bcfa..e02c45c 100644 (file)
@@ -178,7 +178,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
        else
                submit_bio(bio);
        wait_until_done_or_force_detached(device, bdev, &device->md_io.done);
-       if (!bio->bi_error)
+       if (!bio->bi_status)
                err = device->md_io.error;
 
  out:
index a804a41..809fd24 100644 (file)
@@ -959,16 +959,16 @@ static void drbd_bm_endio(struct bio *bio)
            !bm_test_page_unchanged(b->bm_pages[idx]))
                drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx);
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                /* ctx error will hold the completed-last non-zero error code,
                 * in case error codes differ. */
-               ctx->error = bio->bi_error;
+               ctx->error = blk_status_to_errno(bio->bi_status);
                bm_set_page_io_err(b->bm_pages[idx]);
                /* Not identical to on disk version of it.
                 * Is BM_PAGE_IO_ERROR enough? */
                if (__ratelimit(&drbd_ratelimit_state))
                        drbd_err(device, "IO ERROR %d on bitmap page idx %u\n",
-                                       bio->bi_error, idx);
+                                       bio->bi_status, idx);
        } else {
                bm_clear_page_io_err(b->bm_pages[idx]);
                dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx);
index d5da45b..d17b6e6 100644 (file)
@@ -1441,6 +1441,9 @@ extern struct bio_set *drbd_md_io_bio_set;
 /* to allocate from that set */
 extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
 
+/* And a bio_set for cloning */
+extern struct bio_set *drbd_io_bio_set;
+
 extern struct mutex resources_mutex;
 
 extern int conn_lowest_minor(struct drbd_connection *connection);
@@ -1627,7 +1630,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
        __release(local);
        if (!bio->bi_bdev) {
                drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
-               bio->bi_error = -ENODEV;
+               bio->bi_status = BLK_STS_IOERR;
                bio_endio(bio);
                return;
        }
index 84455c3..5fb99e0 100644 (file)
@@ -128,6 +128,7 @@ mempool_t *drbd_request_mempool;
 mempool_t *drbd_ee_mempool;
 mempool_t *drbd_md_io_page_pool;
 struct bio_set *drbd_md_io_bio_set;
+struct bio_set *drbd_io_bio_set;
 
 /* I do not use a standard mempool, because:
    1) I want to hand out the pre-allocated objects first.
@@ -2098,6 +2099,8 @@ static void drbd_destroy_mempools(void)
 
        /* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */
 
+       if (drbd_io_bio_set)
+               bioset_free(drbd_io_bio_set);
        if (drbd_md_io_bio_set)
                bioset_free(drbd_md_io_bio_set);
        if (drbd_md_io_page_pool)
@@ -2115,6 +2118,7 @@ static void drbd_destroy_mempools(void)
        if (drbd_al_ext_cache)
                kmem_cache_destroy(drbd_al_ext_cache);
 
+       drbd_io_bio_set      = NULL;
        drbd_md_io_bio_set   = NULL;
        drbd_md_io_page_pool = NULL;
        drbd_ee_mempool      = NULL;
@@ -2142,6 +2146,7 @@ static int drbd_create_mempools(void)
        drbd_pp_pool         = NULL;
        drbd_md_io_page_pool = NULL;
        drbd_md_io_bio_set   = NULL;
+       drbd_io_bio_set      = NULL;
 
        /* caches */
        drbd_request_cache = kmem_cache_create(
@@ -2165,7 +2170,13 @@ static int drbd_create_mempools(void)
                goto Enomem;
 
        /* mempools */
-       drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
+       drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_RESCUER);
+       if (drbd_io_bio_set == NULL)
+               goto Enomem;
+
+       drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
+                                          BIOSET_NEED_BVECS |
+                                          BIOSET_NEED_RESCUER);
        if (drbd_md_io_bio_set == NULL)
                goto Enomem;
 
@@ -2839,7 +2850,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
        /* Setting the max_hw_sectors to an odd value of 8kibyte here
           This triggers a max_bio_size message upon first attach or connect */
        blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
-       blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
        q->queue_lock = &resource->req_lock;
 
        device->md_io.page = alloc_page(GFP_KERNEL);
index 02255a0..ad0fcb4 100644 (file)
@@ -2294,7 +2294,7 @@ _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_
 static enum drbd_ret_code
 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
 {
-       static enum drbd_ret_code rv;
+       enum drbd_ret_code rv;
        struct drbd_peer_device *peer_device;
        int i;
 
index 1b0a2be..c7e95e6 100644 (file)
@@ -1229,9 +1229,9 @@ void one_flush_endio(struct bio *bio)
        struct drbd_device *device = octx->device;
        struct issue_flush_context *ctx = octx->ctx;
 
-       if (bio->bi_error) {
-               ctx->error = bio->bi_error;
-               drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_error);
+       if (bio->bi_status) {
+               ctx->error = blk_status_to_errno(bio->bi_status);
+               drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
        }
        kfree(octx);
        bio_put(bio);
index 6566243..f6e865b 100644 (file)
@@ -203,7 +203,7 @@ void start_new_tl_epoch(struct drbd_connection *connection)
 void complete_master_bio(struct drbd_device *device,
                struct bio_and_error *m)
 {
-       m->bio->bi_error = m->error;
+       m->bio->bi_status = errno_to_blk_status(m->error);
        bio_endio(m->bio);
        dec_ap_bio(device);
 }
@@ -1157,7 +1157,7 @@ static void drbd_process_discard_req(struct drbd_request *req)
 
        if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9,
                        GFP_NOIO, 0))
-               req->private_bio->bi_error = -EIO;
+               req->private_bio->bi_status = BLK_STS_IOERR;
        bio_endio(req->private_bio);
 }
 
@@ -1225,7 +1225,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
                /* only pass the error to the upper layers.
                 * if user cannot handle io errors, that's not our business. */
                drbd_err(device, "could not kmalloc() req\n");
-               bio->bi_error = -ENOMEM;
+               bio->bi_status = BLK_STS_RESOURCE;
                bio_endio(bio);
                return ERR_PTR(-ENOMEM);
        }
@@ -1560,7 +1560,7 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
        struct drbd_device *device = (struct drbd_device *) q->queuedata;
        unsigned long start_jif;
 
-       blk_queue_split(q, &bio, q->bio_split);
+       blk_queue_split(q, &bio);
 
        start_jif = jiffies;
 
index eb49e7f..9e1866a 100644 (file)
@@ -263,7 +263,7 @@ enum drbd_req_state_bits {
 static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
 {
        struct bio *bio;
-       bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
+       bio = bio_clone_fast(bio_src, GFP_NOIO, drbd_io_bio_set);
 
        req->private_bio = bio;
 
index 1afcb4e..1d8726a 100644 (file)
@@ -63,7 +63,7 @@ void drbd_md_endio(struct bio *bio)
        struct drbd_device *device;
 
        device = bio->bi_private;
-       device->md_io.error = bio->bi_error;
+       device->md_io.error = blk_status_to_errno(bio->bi_status);
 
        /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
         * to timeout on the lower level device, and eventually detach from it.
@@ -177,13 +177,13 @@ void drbd_peer_request_endio(struct bio *bio)
        bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES ||
                          bio_op(bio) == REQ_OP_DISCARD;
 
-       if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
+       if (bio->bi_status && __ratelimit(&drbd_ratelimit_state))
                drbd_warn(device, "%s: error=%d s=%llus\n",
                                is_write ? (is_discard ? "discard" : "write")
-                                       : "read", bio->bi_error,
+                                       : "read", bio->bi_status,
                                (unsigned long long)peer_req->i.sector);
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                set_bit(__EE_WAS_ERROR, &peer_req->flags);
 
        bio_put(bio); /* no need for the bio anymore */
@@ -243,16 +243,16 @@ void drbd_request_endio(struct bio *bio)
                if (__ratelimit(&drbd_ratelimit_state))
                        drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
 
-               if (!bio->bi_error)
+               if (!bio->bi_status)
                        drbd_panic_after_delayed_completion_of_aborted_request(device);
        }
 
        /* to avoid recursion in __req_mod */
-       if (unlikely(bio->bi_error)) {
+       if (unlikely(bio->bi_status)) {
                switch (bio_op(bio)) {
                case REQ_OP_WRITE_ZEROES:
                case REQ_OP_DISCARD:
-                       if (bio->bi_error == -EOPNOTSUPP)
+                       if (bio->bi_status == BLK_STS_NOTSUPP)
                                what = DISCARD_COMPLETED_NOTSUPP;
                        else
                                what = DISCARD_COMPLETED_WITH_ERROR;
@@ -272,7 +272,7 @@ void drbd_request_endio(struct bio *bio)
        }
 
        bio_put(req->private_bio);
-       req->private_bio = ERR_PTR(bio->bi_error);
+       req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status));
 
        /* not req_mod(), we need irqsave here! */
        spin_lock_irqsave(&device->resource->req_lock, flags);
index 60d4c76..ce82364 100644 (file)
@@ -2202,7 +2202,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
  * =============================
  */
 
-static void floppy_end_request(struct request *req, int error)
+static void floppy_end_request(struct request *req, blk_status_t error)
 {
        unsigned int nr_sectors = current_count_sectors;
        unsigned int drive = (unsigned long)req->rq_disk->private_data;
@@ -2263,7 +2263,7 @@ static void request_done(int uptodate)
                        DRWE->last_error_generation = DRS->generation;
                }
                spin_lock_irqsave(q->queue_lock, flags);
-               floppy_end_request(req, -EIO);
+               floppy_end_request(req, BLK_STS_IOERR);
                spin_unlock_irqrestore(q->queue_lock, flags);
        }
 }
@@ -3780,9 +3780,9 @@ static void floppy_rb0_cb(struct bio *bio)
        struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
        int drive = cbdata->drive;
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                pr_info("floppy: error %d while reading block 0\n",
-                       bio->bi_error);
+                       bio->bi_status);
                set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
        }
        complete(&cbdata->complete);
@@ -4203,6 +4203,7 @@ static int __init do_floppy_init(void)
                        goto out_put_disk;
                }
 
+               blk_queue_bounce_limit(disks[drive]->queue, BLK_BOUNCE_HIGH);
                blk_queue_max_hw_sectors(disks[drive]->queue, 64);
                disks[drive]->major = FLOPPY_MAJOR;
                disks[drive]->first_minor = TOMINOR(drive);
index ebbd0c3..0de1144 100644 (file)
@@ -221,7 +221,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
 }
 
 static int
-figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
+figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
+                loff_t logical_blocksize)
 {
        loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
        sector_t x = (sector_t)size;
@@ -233,6 +234,12 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
                lo->lo_offset = offset;
        if (lo->lo_sizelimit != sizelimit)
                lo->lo_sizelimit = sizelimit;
+       if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) {
+               lo->lo_logical_blocksize = logical_blocksize;
+               blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
+               blk_queue_logical_block_size(lo->lo_queue,
+                                            lo->lo_logical_blocksize);
+       }
        set_capacity(lo->lo_disk, x);
        bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
        /* let user-space know about the new size */
@@ -457,7 +464,7 @@ static void lo_complete_rq(struct request *rq)
                zero_fill_bio(bio);
        }
 
-       blk_mq_end_request(rq, cmd->ret < 0 ? -EIO : 0);
+       blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
 }
 
 static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
@@ -813,6 +820,7 @@ static void loop_config_discard(struct loop_device *lo)
        struct file *file = lo->lo_backing_file;
        struct inode *inode = file->f_mapping->host;
        struct request_queue *q = lo->lo_queue;
+       int lo_bits = 9;
 
        /*
         * We use punch hole to reclaim the free space used by the
@@ -832,8 +840,11 @@ static void loop_config_discard(struct loop_device *lo)
 
        q->limits.discard_granularity = inode->i_sb->s_blocksize;
        q->limits.discard_alignment = 0;
-       blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
-       blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
+       if (lo->lo_flags & LO_FLAGS_BLOCKSIZE)
+               lo_bits = blksize_bits(lo->lo_logical_blocksize);
+
+       blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits);
+       blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits);
        queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
 }
 
@@ -843,10 +854,16 @@ static void loop_unprepare_queue(struct loop_device *lo)
        kthread_stop(lo->worker_task);
 }
 
+static int loop_kthread_worker_fn(void *worker_ptr)
+{
+       current->flags |= PF_LESS_THROTTLE;
+       return kthread_worker_fn(worker_ptr);
+}
+
 static int loop_prepare_queue(struct loop_device *lo)
 {
        kthread_init_worker(&lo->worker);
-       lo->worker_task = kthread_run(kthread_worker_fn,
+       lo->worker_task = kthread_run(loop_kthread_worker_fn,
                        &lo->worker, "loop%d", lo->lo_number);
        if (IS_ERR(lo->worker_task))
                return -ENOMEM;
@@ -921,6 +938,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 
        lo->use_dio = false;
        lo->lo_blocksize = lo_blocksize;
+       lo->lo_logical_blocksize = 512;
        lo->lo_device = bdev;
        lo->lo_flags = lo_flags;
        lo->lo_backing_file = file;
@@ -1086,6 +1104,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
        int err;
        struct loop_func_table *xfer;
        kuid_t uid = current_uid();
+       int lo_flags = lo->lo_flags;
 
        if (lo->lo_encrypt_key_size &&
            !uid_eq(lo->lo_key_owner, uid) &&
@@ -1118,12 +1137,30 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
        if (err)
                goto exit;
 
+       if (info->lo_flags & LO_FLAGS_BLOCKSIZE) {
+               if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE))
+                       lo->lo_logical_blocksize = 512;
+               lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
+               if (LO_INFO_BLOCKSIZE(info) != 512 &&
+                   LO_INFO_BLOCKSIZE(info) != 1024 &&
+                   LO_INFO_BLOCKSIZE(info) != 2048 &&
+                   LO_INFO_BLOCKSIZE(info) != 4096)
+                       return -EINVAL;
+               if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize)
+                       return -EINVAL;
+       }
+
        if (lo->lo_offset != info->lo_offset ||
-           lo->lo_sizelimit != info->lo_sizelimit)
-               if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
+           lo->lo_sizelimit != info->lo_sizelimit ||
+           lo->lo_flags != lo_flags ||
+           ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) &&
+            lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) {
+               if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit,
+                                    LO_INFO_BLOCKSIZE(info))) {
                        err = -EFBIG;
                        goto exit;
                }
+       }
 
        loop_config_discard(lo);
 
@@ -1306,12 +1343,13 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
        return err;
 }
 
-static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
+static int loop_set_capacity(struct loop_device *lo)
 {
        if (unlikely(lo->lo_state != Lo_bound))
                return -ENXIO;
 
-       return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
+       return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit,
+                               lo->lo_logical_blocksize);
 }
 
 static int loop_set_dio(struct loop_device *lo, unsigned long arg)
@@ -1369,7 +1407,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
        case LOOP_SET_CAPACITY:
                err = -EPERM;
                if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
-                       err = loop_set_capacity(lo, bdev);
+                       err = loop_set_capacity(lo);
                break;
        case LOOP_SET_DIRECT_IO:
                err = -EPERM;
@@ -1645,7 +1683,7 @@ int loop_unregister_transfer(int number)
 EXPORT_SYMBOL(loop_register_transfer);
 EXPORT_SYMBOL(loop_unregister_transfer);
 
-static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
                const struct blk_mq_queue_data *bd)
 {
        struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -1654,7 +1692,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
        blk_mq_start_request(bd->rq);
 
        if (lo->lo_state != Lo_bound)
-               return BLK_MQ_RQ_QUEUE_ERROR;
+               return BLK_STS_IOERR;
 
        switch (req_op(cmd->rq)) {
        case REQ_OP_FLUSH:
@@ -1669,7 +1707,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
        kthread_queue_work(&lo->worker, &cmd->work);
 
-       return BLK_MQ_RQ_QUEUE_OK;
+       return BLK_STS_OK;
 }
 
 static void loop_handle_cmd(struct loop_cmd *cmd)
index fecd3f9..2c096b9 100644 (file)
@@ -49,6 +49,7 @@ struct loop_device {
        struct file *   lo_backing_file;
        struct block_device *lo_device;
        unsigned        lo_blocksize;
+       unsigned        lo_logical_blocksize;
        void            *key_data; 
 
        gfp_t           old_gfp_mask;
index 3a779a4..61b046f 100644 (file)
@@ -532,7 +532,7 @@ static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
 static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
                                                struct smart_attr *attrib);
 
-static void mtip_complete_command(struct mtip_cmd *cmd, int status)
+static void mtip_complete_command(struct mtip_cmd *cmd, blk_status_t status)
 {
        struct request *req = blk_mq_rq_from_pdu(cmd);
 
@@ -568,7 +568,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
        if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
                cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
                dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
-               mtip_complete_command(cmd, -EIO);
+               mtip_complete_command(cmd, BLK_STS_IOERR);
                return;
        }
 
@@ -667,7 +667,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
                                        tag,
                                        fail_reason != NULL ?
                                                fail_reason : "unknown");
-                                       mtip_complete_command(cmd, -ENODATA);
+                                       mtip_complete_command(cmd, BLK_STS_MEDIUM);
                                        continue;
                                }
                        }
@@ -690,7 +690,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
                        dev_warn(&port->dd->pdev->dev,
                                "retiring tag %d\n", tag);
 
-                       mtip_complete_command(cmd, -EIO);
+                       mtip_complete_command(cmd, BLK_STS_IOERR);
                }
        }
        print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
@@ -1063,23 +1063,10 @@ static int mtip_exec_internal_command(struct mtip_port *port,
        /* insert request and run queue */
        blk_execute_rq(rq->q, NULL, rq, true);
 
-       rv = int_cmd->status;
-       if (rv < 0) {
-               if (rv == -ERESTARTSYS) { /* interrupted */
-                       dev_err(&dd->pdev->dev,
-                               "Internal command [%02X] was interrupted after %u ms\n",
-                               fis->command,
-                               jiffies_to_msecs(jiffies - start));
-                       rv = -EINTR;
-                       goto exec_ic_exit;
-               } else if (rv == 0) /* timeout */
-                       dev_err(&dd->pdev->dev,
-                               "Internal command did not complete [%02X] within timeout of  %lu ms\n",
-                               fis->command, timeout);
-               else
-                       dev_err(&dd->pdev->dev,
-                               "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n",
-                               fis->command, rv, timeout);
+       if (int_cmd->status) {
+               dev_err(&dd->pdev->dev, "Internal command [%02X] failed %d\n",
+                               fis->command, int_cmd->status);
+               rv = -EIO;
 
                if (mtip_check_surprise_removal(dd->pdev) ||
                        test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
@@ -2753,7 +2740,7 @@ static void mtip_abort_cmd(struct request *req, void *data,
        dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
 
        clear_bit(req->tag, dd->port->cmds_to_issue);
-       cmd->status = -EIO;
+       cmd->status = BLK_STS_IOERR;
        mtip_softirq_done_fn(req);
 }
 
@@ -3597,7 +3584,7 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
                int err;
 
                err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
-               blk_mq_end_request(rq, err);
+               blk_mq_end_request(rq, err ? BLK_STS_IOERR : BLK_STS_OK);
                return 0;
        }
 
@@ -3633,8 +3620,8 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
        return false;
 }
 
-static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
-                                  struct request *rq)
+static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
+               struct request *rq)
 {
        struct driver_data *dd = hctx->queue->queuedata;
        struct mtip_int_cmd *icmd = rq->special;
@@ -3642,7 +3629,7 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
        struct mtip_cmd_sg *command_sg;
 
        if (mtip_commands_active(dd->port))
-               return BLK_MQ_RQ_QUEUE_BUSY;
+               return BLK_STS_RESOURCE;
 
        /* Populate the SG list */
        cmd->command_header->opts =
@@ -3666,10 +3653,10 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
 
        blk_mq_start_request(rq);
        mtip_issue_non_ncq_command(dd->port, rq->tag);
-       return BLK_MQ_RQ_QUEUE_OK;
+       return 0;
 }
 
-static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
                         const struct blk_mq_queue_data *bd)
 {
        struct request *rq = bd->rq;
@@ -3681,15 +3668,14 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
                return mtip_issue_reserved_cmd(hctx, rq);
 
        if (unlikely(mtip_check_unal_depth(hctx, rq)))
-               return BLK_MQ_RQ_QUEUE_BUSY;
+               return BLK_STS_RESOURCE;
 
        blk_mq_start_request(rq);
 
        ret = mtip_submit_request(hctx, rq);
        if (likely(!ret))
-               return BLK_MQ_RQ_QUEUE_OK;
-
-       return BLK_MQ_RQ_QUEUE_ERROR;
+               return BLK_STS_OK;
+       return BLK_STS_IOERR;
 }
 
 static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq,
@@ -3730,7 +3716,7 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
        if (reserved) {
                struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
 
-               cmd->status = -ETIME;
+               cmd->status = BLK_STS_TIMEOUT;
                return BLK_EH_HANDLED;
        }
 
@@ -3961,7 +3947,7 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
 {
        struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
 
-       cmd->status = -ENODEV;
+       cmd->status = BLK_STS_IOERR;
        blk_mq_complete_request(rq);
 }
 
index 37b8e3e..e8286af 100644 (file)
@@ -342,7 +342,7 @@ struct mtip_cmd {
        int retries; /* The number of retries left for this command. */
 
        int direction; /* Data transfer direction */
-       int status;
+       blk_status_t status;
 };
 
 /* Structure used to describe a port. */
index f3f191b..977ec96 100644 (file)
@@ -116,7 +116,7 @@ struct nbd_cmd {
        int index;
        int cookie;
        struct completion send_complete;
-       int status;
+       blk_status_t status;
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -286,7 +286,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
        struct nbd_config *config;
 
        if (!refcount_inc_not_zero(&nbd->config_refs)) {
-               cmd->status = -EIO;
+               cmd->status = BLK_STS_TIMEOUT;
                return BLK_EH_HANDLED;
        }
 
@@ -331,7 +331,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
                                    "Connection timed out\n");
        }
        set_bit(NBD_TIMEDOUT, &config->runtime_flags);
-       cmd->status = -EIO;
+       cmd->status = BLK_STS_IOERR;
        sock_shutdown(nbd);
        nbd_config_put(nbd);
 
@@ -400,6 +400,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
        unsigned long size = blk_rq_bytes(req);
        struct bio *bio;
        u32 type;
+       u32 nbd_cmd_flags = 0;
        u32 tag = blk_mq_unique_tag(req);
        int sent = nsock->sent, skip = 0;
 
@@ -429,6 +430,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
                return -EIO;
        }
 
+       if (req->cmd_flags & REQ_FUA)
+               nbd_cmd_flags |= NBD_CMD_FLAG_FUA;
+
        /* We did a partial send previously, and we at least sent the whole
         * request struct, so just go and send the rest of the pages in the
         * request.
@@ -442,7 +446,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
        }
        cmd->index = index;
        cmd->cookie = nsock->cookie;
-       request.type = htonl(type);
+       request.type = htonl(type | nbd_cmd_flags);
        if (type != NBD_CMD_FLUSH) {
                request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
                request.len = htonl(size);
@@ -465,7 +469,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
                                nsock->pending = req;
                                nsock->sent = sent;
                        }
-                       return BLK_MQ_RQ_QUEUE_BUSY;
+                       return BLK_STS_RESOURCE;
                }
                dev_err_ratelimited(disk_to_dev(nbd->disk),
                        "Send control failed (result %d)\n", result);
@@ -506,7 +510,7 @@ send_pages:
                                         */
                                        nsock->pending = req;
                                        nsock->sent = sent;
-                                       return BLK_MQ_RQ_QUEUE_BUSY;
+                                       return BLK_STS_RESOURCE;
                                }
                                dev_err(disk_to_dev(nbd->disk),
                                        "Send data failed (result %d)\n",
@@ -574,7 +578,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
        if (ntohl(reply.error)) {
                dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
                        ntohl(reply.error));
-               cmd->status = -EIO;
+               cmd->status = BLK_STS_IOERR;
                return cmd;
        }
 
@@ -599,7 +603,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
                                 */
                                if (nbd_disconnected(config) ||
                                    config->num_connections <= 1) {
-                                       cmd->status = -EIO;
+                                       cmd->status = BLK_STS_IOERR;
                                        return cmd;
                                }
                                return ERR_PTR(-EIO);
@@ -651,7 +655,7 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
        if (!blk_mq_request_started(req))
                return;
        cmd = blk_mq_rq_to_pdu(req);
-       cmd->status = -EIO;
+       cmd->status = BLK_STS_IOERR;
        blk_mq_complete_request(req);
 }
 
@@ -740,7 +744,7 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
                nbd_config_put(nbd);
                return -EINVAL;
        }
-       cmd->status = 0;
+       cmd->status = BLK_STS_OK;
 again:
        nsock = config->socks[index];
        mutex_lock(&nsock->tx_lock);
@@ -794,7 +798,7 @@ out:
        return ret;
 }
 
-static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
                        const struct blk_mq_queue_data *bd)
 {
        struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -818,13 +822,9 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
         * appropriate.
         */
        ret = nbd_handle_cmd(cmd, hctx->queue_num);
-       if (ret < 0)
-               ret = BLK_MQ_RQ_QUEUE_ERROR;
-       if (!ret)
-               ret = BLK_MQ_RQ_QUEUE_OK;
        complete(&cmd->send_complete);
 
-       return ret;
+       return ret < 0 ? BLK_STS_IOERR : BLK_STS_OK;
 }
 
 static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
@@ -910,6 +910,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
                        continue;
                }
                sk_set_memalloc(sock->sk);
+               sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
                atomic_inc(&config->recv_threads);
                refcount_inc(&nbd->config_refs);
                old = nsock->sock;
@@ -957,8 +958,12 @@ static void nbd_parse_flags(struct nbd_device *nbd)
                set_disk_ro(nbd->disk, false);
        if (config->flags & NBD_FLAG_SEND_TRIM)
                queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
-       if (config->flags & NBD_FLAG_SEND_FLUSH)
-               blk_queue_write_cache(nbd->disk->queue, true, false);
+       if (config->flags & NBD_FLAG_SEND_FLUSH) {
+               if (config->flags & NBD_FLAG_SEND_FUA)
+                       blk_queue_write_cache(nbd->disk->queue, true, true);
+               else
+                       blk_queue_write_cache(nbd->disk->queue, true, false);
+       }
        else
                blk_queue_write_cache(nbd->disk->queue, false, false);
 }
@@ -1071,6 +1076,7 @@ static int nbd_start_device(struct nbd_device *nbd)
                        return -ENOMEM;
                }
                sk_set_memalloc(config->socks[i]->sock->sk);
+               config->socks[i]->sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
                atomic_inc(&config->recv_threads);
                refcount_inc(&nbd->config_refs);
                INIT_WORK(&args->work, recv_work);
@@ -1305,6 +1311,8 @@ static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
                seq_puts(s, "NBD_FLAG_READ_ONLY\n");
        if (flags & NBD_FLAG_SEND_FLUSH)
                seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
+       if (flags & NBD_FLAG_SEND_FUA)
+               seq_puts(s, "NBD_FLAG_SEND_FUA\n");
        if (flags & NBD_FLAG_SEND_TRIM)
                seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
 
index d946e1e..71f4422 100644 (file)
@@ -35,7 +35,8 @@ struct nullb {
        struct request_queue *q;
        struct gendisk *disk;
        struct nvm_dev *ndev;
-       struct blk_mq_tag_set tag_set;
+       struct blk_mq_tag_set *tag_set;
+       struct blk_mq_tag_set __tag_set;
        struct hrtimer timer;
        unsigned int queue_depth;
        spinlock_t lock;
@@ -50,6 +51,7 @@ static struct mutex lock;
 static int null_major;
 static int nullb_indexes;
 static struct kmem_cache *ppa_cache;
+static struct blk_mq_tag_set tag_set;
 
 enum {
        NULL_IRQ_NONE           = 0,
@@ -109,7 +111,7 @@ static int bs = 512;
 module_param(bs, int, S_IRUGO);
 MODULE_PARM_DESC(bs, "Block size (in bytes)");
 
-static int nr_devices = 2;
+static int nr_devices = 1;
 module_param(nr_devices, int, S_IRUGO);
 MODULE_PARM_DESC(nr_devices, "Number of devices to register");
 
@@ -121,6 +123,10 @@ static bool blocking;
 module_param(blocking, bool, S_IRUGO);
 MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
 
+static bool shared_tags;
+module_param(shared_tags, bool, S_IRUGO);
+MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
+
 static int irqmode = NULL_IRQ_SOFTIRQ;
 
 static int null_set_irqmode(const char *str, const struct kernel_param *kp)
@@ -229,11 +235,11 @@ static void end_cmd(struct nullb_cmd *cmd)
 
        switch (queue_mode)  {
        case NULL_Q_MQ:
-               blk_mq_end_request(cmd->rq, 0);
+               blk_mq_end_request(cmd->rq, BLK_STS_OK);
                return;
        case NULL_Q_RQ:
                INIT_LIST_HEAD(&cmd->rq->queuelist);
-               blk_end_request_all(cmd->rq, 0);
+               blk_end_request_all(cmd->rq, BLK_STS_OK);
                break;
        case NULL_Q_BIO:
                bio_endio(cmd->bio);
@@ -356,7 +362,7 @@ static void null_request_fn(struct request_queue *q)
        }
 }
 
-static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
                         const struct blk_mq_queue_data *bd)
 {
        struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -373,34 +379,11 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
        blk_mq_start_request(bd->rq);
 
        null_handle_cmd(cmd);
-       return BLK_MQ_RQ_QUEUE_OK;
-}
-
-static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
-{
-       BUG_ON(!nullb);
-       BUG_ON(!nq);
-
-       init_waitqueue_head(&nq->wait);
-       nq->queue_depth = nullb->queue_depth;
-}
-
-static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
-                         unsigned int index)
-{
-       struct nullb *nullb = data;
-       struct nullb_queue *nq = &nullb->queues[index];
-
-       hctx->driver_data = nq;
-       null_init_queue(nullb, nq);
-       nullb->nr_queues++;
-
-       return 0;
+       return BLK_STS_OK;
 }
 
 static const struct blk_mq_ops null_mq_ops = {
        .queue_rq       = null_queue_rq,
-       .init_hctx      = null_init_hctx,
        .complete       = null_softirq_done_fn,
 };
 
@@ -422,11 +405,12 @@ static void cleanup_queues(struct nullb *nullb)
 
 #ifdef CONFIG_NVM
 
-static void null_lnvm_end_io(struct request *rq, int error)
+static void null_lnvm_end_io(struct request *rq, blk_status_t status)
 {
        struct nvm_rq *rqd = rq->end_io_data;
 
-       rqd->error = error;
+       /* XXX: lighnvm core seems to expect NVM_RSP_* values here.. */
+       rqd->error = status ? -EIO : 0;
        nvm_end_io(rqd);
 
        blk_put_request(rq);
@@ -591,8 +575,8 @@ static void null_del_dev(struct nullb *nullb)
        else
                del_gendisk(nullb->disk);
        blk_cleanup_queue(nullb->q);
-       if (queue_mode == NULL_Q_MQ)
-               blk_mq_free_tag_set(&nullb->tag_set);
+       if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+               blk_mq_free_tag_set(nullb->tag_set);
        if (!use_lightnvm)
                put_disk(nullb->disk);
        cleanup_queues(nullb);
@@ -614,6 +598,32 @@ static const struct block_device_operations null_fops = {
        .release =      null_release,
 };
 
+static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
+{
+       BUG_ON(!nullb);
+       BUG_ON(!nq);
+
+       init_waitqueue_head(&nq->wait);
+       nq->queue_depth = nullb->queue_depth;
+}
+
+static void null_init_queues(struct nullb *nullb)
+{
+       struct request_queue *q = nullb->q;
+       struct blk_mq_hw_ctx *hctx;
+       struct nullb_queue *nq;
+       int i;
+
+       queue_for_each_hw_ctx(q, hctx, i) {
+               if (!hctx->nr_ctx || !hctx->tags)
+                       continue;
+               nq = &nullb->queues[i];
+               hctx->driver_data = nq;
+               null_init_queue(nullb, nq);
+               nullb->nr_queues++;
+       }
+}
+
 static int setup_commands(struct nullb_queue *nq)
 {
        struct nullb_cmd *cmd;
@@ -694,6 +704,22 @@ static int null_gendisk_register(struct nullb *nullb)
        return 0;
 }
 
+static int null_init_tag_set(struct blk_mq_tag_set *set)
+{
+       set->ops = &null_mq_ops;
+       set->nr_hw_queues = submit_queues;
+       set->queue_depth = hw_queue_depth;
+       set->numa_node = home_node;
+       set->cmd_size   = sizeof(struct nullb_cmd);
+       set->flags = BLK_MQ_F_SHOULD_MERGE;
+       set->driver_data = NULL;
+
+       if (blocking)
+               set->flags |= BLK_MQ_F_BLOCKING;
+
+       return blk_mq_alloc_tag_set(set);
+}
+
 static int null_add_dev(void)
 {
        struct nullb *nullb;
@@ -715,26 +741,23 @@ static int null_add_dev(void)
                goto out_free_nullb;
 
        if (queue_mode == NULL_Q_MQ) {
-               nullb->tag_set.ops = &null_mq_ops;
-               nullb->tag_set.nr_hw_queues = submit_queues;
-               nullb->tag_set.queue_depth = hw_queue_depth;
-               nullb->tag_set.numa_node = home_node;
-               nullb->tag_set.cmd_size = sizeof(struct nullb_cmd);
-               nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
-               nullb->tag_set.driver_data = nullb;
-
-               if (blocking)
-                       nullb->tag_set.flags |= BLK_MQ_F_BLOCKING;
-
-               rv = blk_mq_alloc_tag_set(&nullb->tag_set);
+               if (shared_tags) {
+                       nullb->tag_set = &tag_set;
+                       rv = 0;
+               } else {
+                       nullb->tag_set = &nullb->__tag_set;
+                       rv = null_init_tag_set(nullb->tag_set);
+               }
+
                if (rv)
                        goto out_cleanup_queues;
 
-               nullb->q = blk_mq_init_queue(&nullb->tag_set);
+               nullb->q = blk_mq_init_queue(nullb->tag_set);
                if (IS_ERR(nullb->q)) {
                        rv = -ENOMEM;
                        goto out_cleanup_tags;
                }
+               null_init_queues(nullb);
        } else if (queue_mode == NULL_Q_BIO) {
                nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
                if (!nullb->q) {
@@ -787,8 +810,8 @@ static int null_add_dev(void)
 out_cleanup_blk_queue:
        blk_cleanup_queue(nullb->q);
 out_cleanup_tags:
-       if (queue_mode == NULL_Q_MQ)
-               blk_mq_free_tag_set(&nullb->tag_set);
+       if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+               blk_mq_free_tag_set(nullb->tag_set);
 out_cleanup_queues:
        cleanup_queues(nullb);
 out_free_nullb:
@@ -821,6 +844,9 @@ static int __init null_init(void)
                queue_mode = NULL_Q_MQ;
        }
 
+       if (queue_mode == NULL_Q_MQ && shared_tags)
+               null_init_tag_set(&tag_set);
+
        if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
                if (submit_queues < nr_online_nodes) {
                        pr_warn("null_blk: submit_queues param is set to %u.",
@@ -881,6 +907,9 @@ static void __exit null_exit(void)
        }
        mutex_unlock(&lock);
 
+       if (queue_mode == NULL_Q_MQ && shared_tags)
+               blk_mq_free_tag_set(&tag_set);
+
        kmem_cache_destroy(ppa_cache);
 }
 
index b1267ef..7b8c636 100644 (file)
@@ -305,6 +305,7 @@ static void pcd_init_units(void)
                        put_disk(disk);
                        continue;
                }
+               blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
                cd->disk = disk;
                cd->pi = &cd->pia;
                cd->present = 0;
@@ -783,7 +784,7 @@ static void pcd_request(void)
                        ps_set_intr(do_pcd_read, NULL, 0, nice);
                        return;
                } else {
-                       __blk_end_request_all(pcd_req, -EIO);
+                       __blk_end_request_all(pcd_req, BLK_STS_IOERR);
                        pcd_req = NULL;
                }
        }
@@ -794,7 +795,7 @@ static void do_pcd_request(struct request_queue *q)
        pcd_request();
 }
 
-static inline void next_request(int err)
+static inline void next_request(blk_status_t err)
 {
        unsigned long saved_flags;
 
@@ -837,7 +838,7 @@ static void pcd_start(void)
 
        if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) {
                pcd_bufblk = -1;
-               next_request(-EIO);
+               next_request(BLK_STS_IOERR);
                return;
        }
 
@@ -871,7 +872,7 @@ static void do_pcd_read_drq(void)
                        return;
                }
                pcd_bufblk = -1;
-               next_request(-EIO);
+               next_request(BLK_STS_IOERR);
                return;
        }
 
index 7d2402f..27a44b9 100644 (file)
@@ -438,7 +438,7 @@ static void run_fsm(void)
                                phase = NULL;
                                spin_lock_irqsave(&pd_lock, saved_flags);
                                if (!__blk_end_request_cur(pd_req,
-                                               res == Ok ? 0 : -EIO)) {
+                                               res == Ok ? 0 : BLK_STS_IOERR)) {
                                        if (!set_next_request())
                                                stop = 1;
                                }
@@ -863,6 +863,7 @@ static void pd_probe_drive(struct pd_unit *disk)
                return;
        }
        blk_queue_max_hw_sectors(p->queue, cluster);
+       blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH);
 
        if (disk->drive == -1) {
                for (disk->drive = 0; disk->drive <= 1; disk->drive++)
index f24ca73..eef7a91 100644 (file)
@@ -293,6 +293,7 @@ static void __init pf_init_units(void)
                        return;
                }
                blk_queue_max_segments(disk->queue, cluster);
+               blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
                pf->disk = disk;
                pf->pi = &pf->pia;
                pf->media_status = PF_NM;
@@ -801,7 +802,7 @@ static int set_next_request(void)
        return pf_req != NULL;
 }
 
-static void pf_end_request(int err)
+static void pf_end_request(blk_status_t err)
 {
        if (pf_req && !__blk_end_request_cur(pf_req, err))
                pf_req = NULL;
@@ -821,7 +822,7 @@ repeat:
        pf_count = blk_rq_cur_sectors(pf_req);
 
        if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
-               pf_end_request(-EIO);
+               pf_end_request(BLK_STS_IOERR);
                goto repeat;
        }
 
@@ -836,7 +837,7 @@ repeat:
                pi_do_claimed(pf_current->pi, do_pf_write);
        else {
                pf_busy = 0;
-               pf_end_request(-EIO);
+               pf_end_request(BLK_STS_IOERR);
                goto repeat;
        }
 }
@@ -868,7 +869,7 @@ static int pf_next_buf(void)
        return 0;
 }
 
-static inline void next_request(int err)
+static inline void next_request(blk_status_t err)
 {
        unsigned long saved_flags;
 
@@ -896,7 +897,7 @@ static void do_pf_read_start(void)
                        pi_do_claimed(pf_current->pi, do_pf_read_start);
                        return;
                }
-               next_request(-EIO);
+               next_request(BLK_STS_IOERR);
                return;
        }
        pf_mask = STAT_DRQ;
@@ -915,7 +916,7 @@ static void do_pf_read_drq(void)
                                pi_do_claimed(pf_current->pi, do_pf_read_start);
                                return;
                        }
-                       next_request(-EIO);
+                       next_request(BLK_STS_IOERR);
                        return;
                }
                pi_read_block(pf_current->pi, pf_buf, 512);
@@ -942,7 +943,7 @@ static void do_pf_write_start(void)
                        pi_do_claimed(pf_current->pi, do_pf_write_start);
                        return;
                }
-               next_request(-EIO);
+               next_request(BLK_STS_IOERR);
                return;
        }
 
@@ -955,7 +956,7 @@ static void do_pf_write_start(void)
                                pi_do_claimed(pf_current->pi, do_pf_write_start);
                                return;
                        }
-                       next_request(-EIO);
+                       next_request(BLK_STS_IOERR);
                        return;
                }
                pi_write_block(pf_current->pi, pf_buf, 512);
@@ -975,7 +976,7 @@ static void do_pf_write_done(void)
                        pi_do_claimed(pf_current->pi, do_pf_write_start);
                        return;
                }
-               next_request(-EIO);
+               next_request(BLK_STS_IOERR);
                return;
        }
        pi_disconnect(pf_current->pi);
index 205b865..467beca 100644 (file)
@@ -98,6 +98,7 @@ static int write_congestion_on  = PKT_WRITE_CONGESTION_ON;
 static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
 static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */
 static mempool_t *psd_pool;
+static struct bio_set *pkt_bio_set;
 
 static struct class    *class_pktcdvd = NULL;    /* /sys/class/pktcdvd */
 static struct dentry   *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
@@ -707,7 +708,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
                             REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
        if (IS_ERR(rq))
                return PTR_ERR(rq);
-       scsi_req_init(rq);
 
        if (cgc->buflen) {
                ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
@@ -952,9 +952,9 @@ static void pkt_end_io_read(struct bio *bio)
 
        pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
                bio, (unsigned long long)pkt->sector,
-               (unsigned long long)bio->bi_iter.bi_sector, bio->bi_error);
+               (unsigned long long)bio->bi_iter.bi_sector, bio->bi_status);
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                atomic_inc(&pkt->io_errors);
        if (atomic_dec_and_test(&pkt->io_wait)) {
                atomic_inc(&pkt->run_sm);
@@ -969,7 +969,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
        struct pktcdvd_device *pd = pkt->pd;
        BUG_ON(!pd);
 
-       pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_error);
+       pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status);
 
        pd->stats.pkt_ended++;
 
@@ -1305,16 +1305,16 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
        pkt_queue_bio(pd, pkt->w_bio);
 }
 
-static void pkt_finish_packet(struct packet_data *pkt, int error)
+static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status)
 {
        struct bio *bio;
 
-       if (error)
+       if (status)
                pkt->cache_valid = 0;
 
        /* Finish all bios corresponding to this packet */
        while ((bio = bio_list_pop(&pkt->orig_bios))) {
-               bio->bi_error = error;
+               bio->bi_status = status;
                bio_endio(bio);
        }
 }
@@ -1349,7 +1349,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
                        if (atomic_read(&pkt->io_wait) > 0)
                                return;
 
-                       if (!pkt->w_bio->bi_error) {
+                       if (!pkt->w_bio->bi_status) {
                                pkt_set_state(pkt, PACKET_FINISHED_STATE);
                        } else {
                                pkt_set_state(pkt, PACKET_RECOVERY_STATE);
@@ -1366,7 +1366,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
                        break;
 
                case PACKET_FINISHED_STATE:
-                       pkt_finish_packet(pkt, pkt->w_bio->bi_error);
+                       pkt_finish_packet(pkt, pkt->w_bio->bi_status);
                        return;
 
                default:
@@ -2301,7 +2301,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
        struct packet_stacked_data *psd = bio->bi_private;
        struct pktcdvd_device *pd = psd->pd;
 
-       psd->bio->bi_error = bio->bi_error;
+       psd->bio->bi_status = bio->bi_status;
        bio_put(bio);
        bio_endio(psd->bio);
        mempool_free(psd, psd_pool);
@@ -2310,7 +2310,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
 
 static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
 {
-       struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
+       struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, pkt_bio_set);
        struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
 
        psd->pd = pd;
@@ -2412,9 +2412,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
        char b[BDEVNAME_SIZE];
        struct bio *split;
 
-       blk_queue_bounce(q, &bio);
-
-       blk_queue_split(q, &bio, q->bio_split);
+       blk_queue_split(q, &bio);
 
        pd = q->queuedata;
        if (!pd) {
@@ -2455,7 +2453,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
 
                        split = bio_split(bio, last_zone -
                                          bio->bi_iter.bi_sector,
-                                         GFP_NOIO, fs_bio_set);
+                                         GFP_NOIO, pkt_bio_set);
                        bio_chain(split, bio);
                } else {
                        split = bio;
@@ -2583,6 +2581,11 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
        bdev = bdget(dev);
        if (!bdev)
                return -ENOMEM;
+       if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) {
+               WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
+               bdput(bdev);
+               return -EINVAL;
+       }
        ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL);
        if (ret)
                return ret;
@@ -2919,6 +2922,11 @@ static int __init pkt_init(void)
                                        sizeof(struct packet_stacked_data));
        if (!psd_pool)
                return -ENOMEM;
+       pkt_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
+       if (!pkt_bio_set) {
+               mempool_destroy(psd_pool);
+               return -ENOMEM;
+       }
 
        ret = register_blkdev(pktdev_major, DRIVER_NAME);
        if (ret < 0) {
@@ -2951,6 +2959,7 @@ out:
        unregister_blkdev(pktdev_major, DRIVER_NAME);
 out2:
        mempool_destroy(psd_pool);
+       bioset_free(pkt_bio_set);
        return ret;
 }
 
@@ -2964,6 +2973,7 @@ static void __exit pkt_exit(void)
 
        unregister_blkdev(pktdev_major, DRIVER_NAME);
        mempool_destroy(psd_pool);
+       bioset_free(pkt_bio_set);
 }
 
 MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
index a809e3e..075662f 100644 (file)
@@ -158,7 +158,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
        if (res) {
                dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
                        __LINE__, op, res);
-               __blk_end_request_all(req, -EIO);
+               __blk_end_request_all(req, BLK_STS_IOERR);
                return 0;
        }
 
@@ -180,7 +180,7 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev,
        if (res) {
                dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
                        __func__, __LINE__, res);
-               __blk_end_request_all(req, -EIO);
+               __blk_end_request_all(req, BLK_STS_IOERR);
                return 0;
        }
 
@@ -208,7 +208,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
                        break;
                default:
                        blk_dump_rq_flags(req, DEVICE_NAME " bad request");
-                       __blk_end_request_all(req, -EIO);
+                       __blk_end_request_all(req, BLK_STS_IOERR);
                }
        }
 }
@@ -231,7 +231,8 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
        struct ps3_storage_device *dev = data;
        struct ps3disk_private *priv;
        struct request *req;
-       int res, read, error;
+       int res, read;
+       blk_status_t error;
        u64 tag, status;
        const char *op;
 
@@ -269,7 +270,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
        if (status) {
                dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__,
                        __LINE__, op, status);
-               error = -EIO;
+               error = BLK_STS_IOERR;
        } else {
                dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__,
                        __LINE__, op);
index 456b4fe..e0e81ca 100644 (file)
@@ -428,7 +428,7 @@ static void ps3vram_cache_cleanup(struct ps3_system_bus_device *dev)
        kfree(priv->cache.tags);
 }
 
-static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
+static blk_status_t ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
                        size_t len, size_t *retlen, u_char *buf)
 {
        struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
@@ -438,7 +438,7 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
                (unsigned int)from, len);
 
        if (from >= priv->size)
-               return -EIO;
+               return BLK_STS_IOERR;
 
        if (len > priv->size - from)
                len = priv->size - from;
@@ -472,14 +472,14 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
        return 0;
 }
 
-static int ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
+static blk_status_t ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
                         size_t len, size_t *retlen, const u_char *buf)
 {
        struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
        unsigned int cached, count;
 
        if (to >= priv->size)
-               return -EIO;
+               return BLK_STS_IOERR;
 
        if (len > priv->size - to)
                len = priv->size - to;
@@ -554,7 +554,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
        int write = bio_data_dir(bio) == WRITE;
        const char *op = write ? "write" : "read";
        loff_t offset = bio->bi_iter.bi_sector << 9;
-       int error = 0;
+       blk_status_t error = 0;
        struct bio_vec bvec;
        struct bvec_iter iter;
        struct bio *next;
@@ -578,7 +578,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
 
                if (retlen != len) {
                        dev_err(&dev->core, "Short %s\n", op);
-                       error = -EIO;
+                       error = BLK_STS_IOERR;
                        goto out;
                }
 
@@ -593,7 +593,7 @@ out:
        next = bio_list_peek(&priv->list);
        spin_unlock_irq(&priv->lock);
 
-       bio->bi_error = error;
+       bio->bi_status = error;
        bio_endio(bio);
        return next;
 }
@@ -606,7 +606,7 @@ static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
 
        dev_dbg(&dev->core, "%s\n", __func__);
 
-       blk_queue_split(q, &bio, q->bio_split);
+       blk_queue_split(q, &bio);
 
        spin_lock_irq(&priv->lock);
        busy = !bio_list_empty(&priv->list);
index c16f745..b008b6a 100644 (file)
@@ -442,6 +442,8 @@ static DEFINE_SPINLOCK(rbd_client_list_lock);
 static struct kmem_cache       *rbd_img_request_cache;
 static struct kmem_cache       *rbd_obj_request_cache;
 
+static struct bio_set          *rbd_bio_clone;
+
 static int rbd_major;
 static DEFINE_IDA(rbd_dev_id_ida);
 
@@ -1363,7 +1365,7 @@ static struct bio *bio_clone_range(struct bio *bio_src,
 {
        struct bio *bio;
 
-       bio = bio_clone(bio_src, gfpmask);
+       bio = bio_clone_fast(bio_src, gfpmask, rbd_bio_clone);
        if (!bio)
                return NULL;    /* ENOMEM */
 
@@ -2293,11 +2295,13 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
                rbd_assert(img_request->obj_request != NULL);
                more = obj_request->which < img_request->obj_request_count - 1;
        } else {
+               blk_status_t status = errno_to_blk_status(result);
+
                rbd_assert(img_request->rq != NULL);
 
-               more = blk_update_request(img_request->rq, result, xferred);
+               more = blk_update_request(img_request->rq, status, xferred);
                if (!more)
-                       __blk_mq_end_request(img_request->rq, result);
+                       __blk_mq_end_request(img_request->rq, status);
        }
 
        return more;
@@ -4150,17 +4154,17 @@ err_rq:
                         obj_op_name(op_type), length, offset, result);
        ceph_put_snap_context(snapc);
 err:
-       blk_mq_end_request(rq, result);
+       blk_mq_end_request(rq, errno_to_blk_status(result));
 }
 
-static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
                const struct blk_mq_queue_data *bd)
 {
        struct request *rq = bd->rq;
        struct work_struct *work = blk_mq_rq_to_pdu(rq);
 
        queue_work(rbd_wq, work);
-       return BLK_MQ_RQ_QUEUE_OK;
+       return BLK_STS_OK;
 }
 
 static void rbd_free_disk(struct rbd_device *rbd_dev)
@@ -6414,8 +6418,16 @@ static int rbd_slab_init(void)
        if (!rbd_obj_request_cache)
                goto out_err;
 
+       rbd_assert(!rbd_bio_clone);
+       rbd_bio_clone = bioset_create(BIO_POOL_SIZE, 0, 0);
+       if (!rbd_bio_clone)
+               goto out_err_clone;
+
        return 0;
 
+out_err_clone:
+       kmem_cache_destroy(rbd_obj_request_cache);
+       rbd_obj_request_cache = NULL;
 out_err:
        kmem_cache_destroy(rbd_img_request_cache);
        rbd_img_request_cache = NULL;
@@ -6431,6 +6443,10 @@ static void rbd_slab_exit(void)
        rbd_assert(rbd_img_request_cache);
        kmem_cache_destroy(rbd_img_request_cache);
        rbd_img_request_cache = NULL;
+
+       rbd_assert(rbd_bio_clone);
+       bioset_free(rbd_bio_clone);
+       rbd_bio_clone = NULL;
 }
 
 static int __init rbd_init(void)
index 9c56636..7f4aceb 100644 (file)
@@ -149,9 +149,9 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 {
        struct rsxx_cardinfo *card = q->queuedata;
        struct rsxx_bio_meta *bio_meta;
-       int st = -EINVAL;
+       blk_status_t st = BLK_STS_IOERR;
 
-       blk_queue_split(q, &bio, q->bio_split);
+       blk_queue_split(q, &bio);
 
        might_sleep();
 
@@ -161,15 +161,11 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
        if (bio_end_sector(bio) > get_capacity(card->gendisk))
                goto req_err;
 
-       if (unlikely(card->halt)) {
-               st = -EFAULT;
+       if (unlikely(card->halt))
                goto req_err;
-       }
 
-       if (unlikely(card->dma_fault)) {
-               st = (-EFAULT);
+       if (unlikely(card->dma_fault))
                goto req_err;
-       }
 
        if (bio->bi_iter.bi_size == 0) {
                dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
@@ -178,7 +174,7 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
 
        bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL);
        if (!bio_meta) {
-               st = -ENOMEM;
+               st = BLK_STS_RESOURCE;
                goto req_err;
        }
 
@@ -205,7 +201,7 @@ queue_err:
        kmem_cache_free(bio_meta_pool, bio_meta);
 req_err:
        if (st)
-               bio->bi_error = st;
+               bio->bi_status = st;
        bio_endio(bio);
        return BLK_QC_T_NONE;
 }
@@ -288,7 +284,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
        }
 
        blk_queue_make_request(card->queue, rsxx_make_request);
-       blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
        blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
        blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
 
index 5a20385..6a1b217 100644 (file)
@@ -611,7 +611,7 @@ static void rsxx_schedule_done(struct work_struct *work)
        mutex_unlock(&ctrl->work_lock);
 }
 
-static int rsxx_queue_discard(struct rsxx_cardinfo *card,
+static blk_status_t rsxx_queue_discard(struct rsxx_cardinfo *card,
                                  struct list_head *q,
                                  unsigned int laddr,
                                  rsxx_dma_cb cb,
@@ -621,7 +621,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
 
        dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
        if (!dma)
-               return -ENOMEM;
+               return BLK_STS_RESOURCE;
 
        dma->cmd          = HW_CMD_BLK_DISCARD;
        dma->laddr        = laddr;
@@ -640,7 +640,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
        return 0;
 }
 
-static int rsxx_queue_dma(struct rsxx_cardinfo *card,
+static blk_status_t rsxx_queue_dma(struct rsxx_cardinfo *card,
                              struct list_head *q,
                              int dir,
                              unsigned int dma_off,
@@ -655,7 +655,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
 
        dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
        if (!dma)
-               return -ENOMEM;
+               return BLK_STS_RESOURCE;
 
        dma->cmd          = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ;
        dma->laddr        = laddr;
@@ -677,7 +677,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
        return 0;
 }
 
-int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
                           struct bio *bio,
                           atomic_t *n_dmas,
                           rsxx_dma_cb cb,
@@ -694,7 +694,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
        unsigned int dma_len;
        int dma_cnt[RSXX_MAX_TARGETS];
        int tgt;
-       int st;
+       blk_status_t st;
        int i;
 
        addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */
@@ -769,7 +769,6 @@ bvec_err:
        for (i = 0; i < card->n_targets; i++)
                rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i],
                                        FREE_DMA);
-
        return st;
 }
 
index 6bbc64d..277f27e 100644 (file)
@@ -391,7 +391,7 @@ int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
 void rsxx_dma_cleanup(void);
 void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
 int rsxx_dma_configure(struct rsxx_cardinfo *card);
-int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
                           struct bio *bio,
                           atomic_t *n_dmas,
                           rsxx_dma_cb cb,
index 27833e4..d036868 100644 (file)
@@ -451,8 +451,8 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
                                    struct skd_special_context *skspcl);
 static void skd_request_fn(struct request_queue *rq);
 static void skd_end_request(struct skd_device *skdev,
-                           struct skd_request_context *skreq, int error);
-static int skd_preop_sg_list(struct skd_device *skdev,
+               struct skd_request_context *skreq, blk_status_t status);
+static bool skd_preop_sg_list(struct skd_device *skdev,
                             struct skd_request_context *skreq);
 static void skd_postop_sg_list(struct skd_device *skdev,
                               struct skd_request_context *skreq);
@@ -491,7 +491,7 @@ static void skd_fail_all_pending(struct skd_device *skdev)
                if (req == NULL)
                        break;
                blk_start_request(req);
-               __blk_end_request_all(req, -EIO);
+               __blk_end_request_all(req, BLK_STS_IOERR);
        }
 }
 
@@ -545,7 +545,6 @@ static void skd_request_fn(struct request_queue *q)
        struct request *req = NULL;
        struct skd_scsi_request *scsi_req;
        unsigned long io_flags;
-       int error;
        u32 lba;
        u32 count;
        int data_dir;
@@ -716,9 +715,7 @@ static void skd_request_fn(struct request_queue *q)
                if (!req->bio)
                        goto skip_sg;
 
-               error = skd_preop_sg_list(skdev, skreq);
-
-               if (error != 0) {
+               if (!skd_preop_sg_list(skdev, skreq)) {
                        /*
                         * Complete the native request with error.
                         * Note that the request context is still at the
@@ -730,7 +727,7 @@ static void skd_request_fn(struct request_queue *q)
                         */
                        pr_debug("%s:%s:%d error Out\n",
                                 skdev->name, __func__, __LINE__);
-                       skd_end_request(skdev, skreq, error);
+                       skd_end_request(skdev, skreq, BLK_STS_RESOURCE);
                        continue;
                }
 
@@ -805,7 +802,7 @@ skip_sg:
 }
 
 static void skd_end_request(struct skd_device *skdev,
-                           struct skd_request_context *skreq, int error)
+               struct skd_request_context *skreq, blk_status_t error)
 {
        if (unlikely(error)) {
                struct request *req = skreq->req;
@@ -822,7 +819,7 @@ static void skd_end_request(struct skd_device *skdev,
        __blk_end_request_all(skreq->req, error);
 }
 
-static int skd_preop_sg_list(struct skd_device *skdev,
+static bool skd_preop_sg_list(struct skd_device *skdev,
                             struct skd_request_context *skreq)
 {
        struct request *req = skreq->req;
@@ -839,7 +836,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
 
        n_sg = blk_rq_map_sg(skdev->queue, req, sg);
        if (n_sg <= 0)
-               return -EINVAL;
+               return false;
 
        /*
         * Map scatterlist to PCI bus addresses.
@@ -847,7 +844,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
         */
        n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
        if (n_sg <= 0)
-               return -EINVAL;
+               return false;
 
        SKD_ASSERT(n_sg <= skdev->sgs_per_request);
 
@@ -882,7 +879,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
                }
        }
 
-       return 0;
+       return true;
 }
 
 static void skd_postop_sg_list(struct skd_device *skdev,
@@ -2333,7 +2330,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
        switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) {
        case SKD_CHECK_STATUS_REPORT_GOOD:
        case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
-               skd_end_request(skdev, skreq, 0);
+               skd_end_request(skdev, skreq, BLK_STS_OK);
                break;
 
        case SKD_CHECK_STATUS_BUSY_IMMINENT:
@@ -2355,7 +2352,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
 
        case SKD_CHECK_STATUS_REPORT_ERROR:
        default:
-               skd_end_request(skdev, skreq, -EIO);
+               skd_end_request(skdev, skreq, BLK_STS_IOERR);
                break;
        }
 }
@@ -2748,7 +2745,7 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
                         * native request.
                         */
                        if (likely(cmp_status == SAM_STAT_GOOD))
-                               skd_end_request(skdev, skreq, 0);
+                               skd_end_request(skdev, skreq, BLK_STS_OK);
                        else
                                skd_resolve_req_exception(skdev, skreq);
                }
@@ -3190,7 +3187,7 @@ static void skd_recover_requests(struct skd_device *skdev, int requeue)
                            SKD_MAX_RETRIES)
                                blk_requeue_request(skdev->queue, skreq->req);
                        else
-                               skd_end_request(skdev, skreq, -EIO);
+                               skd_end_request(skdev, skreq, BLK_STS_IOERR);
 
                        skreq->req = NULL;
 
@@ -4276,6 +4273,7 @@ static int skd_cons_disk(struct skd_device *skdev)
                rc = -ENOMEM;
                goto err_out;
        }
+       blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 
        skdev->queue = q;
        disk->queue = q;
index 3f3a3ab..6b16ead 100644 (file)
@@ -316,7 +316,7 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
 
        rqe->req = NULL;
 
-       __blk_end_request(req, (desc->status ? -EIO : 0), desc->size);
+       __blk_end_request(req, (desc->status ? BLK_STS_IOERR : 0), desc->size);
 
        vdc_blk_queue_start(port);
 }
@@ -1023,7 +1023,7 @@ static void vdc_queue_drain(struct vdc_port *port)
        struct request *req;
 
        while ((req = blk_fetch_request(port->disk->queue)) != NULL)
-               __blk_end_request_all(req, -EIO);
+               __blk_end_request_all(req, BLK_STS_IOERR);
 }
 
 static void vdc_ldc_reset_timer(unsigned long _arg)
index 3064be6..84434d3 100644 (file)
@@ -493,7 +493,7 @@ static inline int swim_read_sector(struct floppy_state *fs,
        return ret;
 }
 
-static int floppy_read_sectors(struct floppy_state *fs,
+static blk_status_t floppy_read_sectors(struct floppy_state *fs,
                               int req_sector, int sectors_nb,
                               unsigned char *buffer)
 {
@@ -516,7 +516,7 @@ static int floppy_read_sectors(struct floppy_state *fs,
                        ret = swim_read_sector(fs, side, track, sector,
                                                buffer);
                        if (try-- == 0)
-                               return -EIO;
+                               return BLK_STS_IOERR;
                } while (ret != 512);
 
                buffer += ret;
@@ -553,7 +553,7 @@ static void do_fd_request(struct request_queue *q)
 
        req = swim_next_request(swd);
        while (req) {
-               int err = -EIO;
+               blk_status_t err = BLK_STS_IOERR;
 
                fs = req->rq_disk->private_data;
                if (blk_rq_pos(req) >= fs->total_secs)
@@ -864,6 +864,8 @@ static int swim_floppy_init(struct swim_priv *swd)
                        put_disk(swd->unit[drive].disk);
                        goto exit_put_disks;
                }
+               blk_queue_bounce_limit(swd->unit[drive].disk->queue,
+                               BLK_BOUNCE_HIGH);
                swd->unit[drive].disk->queue->queuedata = swd;
                swd->unit[drive].swd = swd;
        }
index ba4809c..9f931f8 100644 (file)
@@ -257,7 +257,7 @@ static unsigned int floppy_check_events(struct gendisk *disk,
                                        unsigned int clearing);
 static int floppy_revalidate(struct gendisk *disk);
 
-static bool swim3_end_request(struct floppy_state *fs, int err, unsigned int nr_bytes)
+static bool swim3_end_request(struct floppy_state *fs, blk_status_t err, unsigned int nr_bytes)
 {
        struct request *req = fs->cur_req;
        int rc;
@@ -334,7 +334,7 @@ static void start_request(struct floppy_state *fs)
                if (fs->mdev->media_bay &&
                    check_media_bay(fs->mdev->media_bay) != MB_FD) {
                        swim3_dbg("%s", "  media bay absent, dropping req\n");
-                       swim3_end_request(fs, -ENODEV, 0);
+                       swim3_end_request(fs, BLK_STS_IOERR, 0);
                        continue;
                }
 
@@ -350,12 +350,12 @@ static void start_request(struct floppy_state *fs)
                if (blk_rq_pos(req) >= fs->total_secs) {
                        swim3_dbg("  pos out of bounds (%ld, max is %ld)\n",
                                  (long)blk_rq_pos(req), (long)fs->total_secs);
-                       swim3_end_request(fs, -EIO, 0);
+                       swim3_end_request(fs, BLK_STS_IOERR, 0);
                        continue;
                }
                if (fs->ejected) {
                        swim3_dbg("%s", "  disk ejected\n");
-                       swim3_end_request(fs, -EIO, 0);
+                       swim3_end_request(fs, BLK_STS_IOERR, 0);
                        continue;
                }
 
@@ -364,7 +364,7 @@ static void start_request(struct floppy_state *fs)
                                fs->write_prot = swim3_readbit(fs, WRITE_PROT);
                        if (fs->write_prot) {
                                swim3_dbg("%s", "  try to write, disk write protected\n");
-                               swim3_end_request(fs, -EIO, 0);
+                               swim3_end_request(fs, BLK_STS_IOERR, 0);
                                continue;
                        }
                }
@@ -548,7 +548,7 @@ static void act(struct floppy_state *fs)
                                if (fs->retries > 5) {
                                        swim3_err("Wrong cylinder in transfer, want: %d got %d\n",
                                                  fs->req_cyl, fs->cur_cyl);
-                                       swim3_end_request(fs, -EIO, 0);
+                                       swim3_end_request(fs, BLK_STS_IOERR, 0);
                                        fs->state = idle;
                                        return;
                                }
@@ -584,7 +584,7 @@ static void scan_timeout(unsigned long data)
        out_8(&sw->intr_enable, 0);
        fs->cur_cyl = -1;
        if (fs->retries > 5) {
-               swim3_end_request(fs, -EIO, 0);
+               swim3_end_request(fs, BLK_STS_IOERR, 0);
                fs->state = idle;
                start_request(fs);
        } else {
@@ -608,7 +608,7 @@ static void seek_timeout(unsigned long data)
        out_8(&sw->select, RELAX);
        out_8(&sw->intr_enable, 0);
        swim3_err("%s", "Seek timeout\n");
-       swim3_end_request(fs, -EIO, 0);
+       swim3_end_request(fs, BLK_STS_IOERR, 0);
        fs->state = idle;
        start_request(fs);
        spin_unlock_irqrestore(&swim3_lock, flags);
@@ -637,7 +637,7 @@ static void settle_timeout(unsigned long data)
                goto unlock;
        }
        swim3_err("%s", "Seek settle timeout\n");
-       swim3_end_request(fs, -EIO, 0);
+       swim3_end_request(fs, BLK_STS_IOERR, 0);
        fs->state = idle;
        start_request(fs);
  unlock:
@@ -666,7 +666,7 @@ static void xfer_timeout(unsigned long data)
        swim3_err("Timeout %sing sector %ld\n",
               (rq_data_dir(fs->cur_req)==WRITE? "writ": "read"),
               (long)blk_rq_pos(fs->cur_req));
-       swim3_end_request(fs, -EIO, 0);
+       swim3_end_request(fs, BLK_STS_IOERR, 0);
        fs->state = idle;
        start_request(fs);
        spin_unlock_irqrestore(&swim3_lock, flags);
@@ -703,7 +703,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
                                swim3_err("%s", "Seen sector but cyl=ff?\n");
                                fs->cur_cyl = -1;
                                if (fs->retries > 5) {
-                                       swim3_end_request(fs, -EIO, 0);
+                                       swim3_end_request(fs, BLK_STS_IOERR, 0);
                                        fs->state = idle;
                                        start_request(fs);
                                } else {
@@ -786,7 +786,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
                                swim3_err("Error %sing block %ld (err=%x)\n",
                                       rq_data_dir(req) == WRITE? "writ": "read",
                                       (long)blk_rq_pos(req), err);
-                               swim3_end_request(fs, -EIO, 0);
+                               swim3_end_request(fs, BLK_STS_IOERR, 0);
                                fs->state = idle;
                        }
                } else {
@@ -795,7 +795,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
                                swim3_err("fd dma error: stat=%x resid=%d\n", stat, resid);
                                swim3_err("  state=%d, dir=%x, intr=%x, err=%x\n",
                                          fs->state, rq_data_dir(req), intr, err);
-                               swim3_end_request(fs, -EIO, 0);
+                               swim3_end_request(fs, BLK_STS_IOERR, 0);
                                fs->state = idle;
                                start_request(fs);
                                break;
@@ -1223,6 +1223,7 @@ static int swim3_attach(struct macio_dev *mdev,
                put_disk(disk);
                return -ENOMEM;
        }
+       blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
        disk->queue->queuedata = &floppy_states[index];
 
        if (index == 0) {
@@ -1245,7 +1246,7 @@ static int swim3_attach(struct macio_dev *mdev,
        return 0;
 }
 
-static struct of_device_id swim3_match[] =
+static const struct of_device_id swim3_match[] =
 {
        {
        .name           = "swim3",
index c8e072c..08586dc 100644 (file)
@@ -745,7 +745,7 @@ static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
 
 static inline void carm_end_request_queued(struct carm_host *host,
                                           struct carm_request *crq,
-                                          int error)
+                                          blk_status_t error)
 {
        struct request *req = crq->rq;
        int rc;
@@ -791,7 +791,7 @@ static inline void carm_round_robin(struct carm_host *host)
 }
 
 static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
-                              int error)
+                              blk_status_t error)
 {
        carm_end_request_queued(host, crq, error);
        if (max_queue == 1)
@@ -869,14 +869,14 @@ queue_one_request:
        sg = &crq->sg[0];
        n_elem = blk_rq_map_sg(q, rq, sg);
        if (n_elem <= 0) {
-               carm_end_rq(host, crq, -EIO);
+               carm_end_rq(host, crq, BLK_STS_IOERR);
                return;         /* request with no s/g entries? */
        }
 
        /* map scatterlist to PCI bus addresses */
        n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir);
        if (n_elem <= 0) {
-               carm_end_rq(host, crq, -EIO);
+               carm_end_rq(host, crq, BLK_STS_IOERR);
                return;         /* request with no s/g entries? */
        }
        crq->n_elem = n_elem;
@@ -937,7 +937,7 @@ queue_one_request:
 
 static void carm_handle_array_info(struct carm_host *host,
                                   struct carm_request *crq, u8 *mem,
-                                  int error)
+                                  blk_status_t error)
 {
        struct carm_port *port;
        u8 *msg_data = mem + sizeof(struct carm_array_info);
@@ -997,7 +997,7 @@ out:
 
 static void carm_handle_scan_chan(struct carm_host *host,
                                  struct carm_request *crq, u8 *mem,
-                                 int error)
+                                 blk_status_t error)
 {
        u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;
        unsigned int i, dev_count = 0;
@@ -1029,7 +1029,7 @@ out:
 }
 
 static void carm_handle_generic(struct carm_host *host,
-                               struct carm_request *crq, int error,
+                               struct carm_request *crq, blk_status_t error,
                                int cur_state, int next_state)
 {
        DPRINTK("ENTER\n");
@@ -1045,7 +1045,7 @@ static void carm_handle_generic(struct carm_host *host,
 }
 
 static inline void carm_handle_rw(struct carm_host *host,
-                                 struct carm_request *crq, int error)
+                                 struct carm_request *crq, blk_status_t error)
 {
        int pci_dir;
 
@@ -1067,7 +1067,7 @@ static inline void carm_handle_resp(struct carm_host *host,
        u32 handle = le32_to_cpu(ret_handle_le);
        unsigned int msg_idx;
        struct carm_request *crq;
-       int error = (status == RMSG_OK) ? 0 : -EIO;
+       blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR;
        u8 *mem;
 
        VPRINTK("ENTER, handle == 0x%x\n", handle);
@@ -1155,7 +1155,7 @@ static inline void carm_handle_resp(struct carm_host *host,
 err_out:
        printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
               pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
-       carm_end_rq(host, crq, -EIO);
+       carm_end_rq(host, crq, BLK_STS_IOERR);
 }
 
 static inline void carm_handle_responses(struct carm_host *host)
index c141cc3..0677d25 100644 (file)
@@ -454,7 +454,7 @@ static void process_page(unsigned long data)
                                PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
                if (control & DMASCR_HARD_ERROR) {
                        /* error */
-                       bio->bi_error = -EIO;
+                       bio->bi_status = BLK_STS_IOERR;
                        dev_printk(KERN_WARNING, &card->dev->dev,
                                "I/O error on sector %d/%d\n",
                                le32_to_cpu(desc->local_addr)>>9,
@@ -529,7 +529,7 @@ static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio)
                 (unsigned long long)bio->bi_iter.bi_sector,
                 bio->bi_iter.bi_size);
 
-       blk_queue_split(q, &bio, q->bio_split);
+       blk_queue_split(q, &bio);
 
        spin_lock_irq(&card->lock);
        *card->biotail = bio;
index 553cc4c..0297ad7 100644 (file)
@@ -64,15 +64,15 @@ struct virtblk_req {
        struct scatterlist sg[];
 };
 
-static inline int virtblk_result(struct virtblk_req *vbr)
+static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
 {
        switch (vbr->status) {
        case VIRTIO_BLK_S_OK:
-               return 0;
+               return BLK_STS_OK;
        case VIRTIO_BLK_S_UNSUPP:
-               return -ENOTTY;
+               return BLK_STS_NOTSUPP;
        default:
-               return -EIO;
+               return BLK_STS_IOERR;
        }
 }
 
@@ -214,7 +214,7 @@ static void virtblk_done(struct virtqueue *vq)
        spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 }
 
-static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
                           const struct blk_mq_queue_data *bd)
 {
        struct virtio_blk *vblk = hctx->queue->queuedata;
@@ -246,7 +246,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
                break;
        default:
                WARN_ON_ONCE(1);
-               return BLK_MQ_RQ_QUEUE_ERROR;
+               return BLK_STS_IOERR;
        }
 
        vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
@@ -276,8 +276,8 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
                /* Out of mem doesn't actually happen, since we fall back
                 * to direct descriptors */
                if (err == -ENOMEM || err == -ENOSPC)
-                       return BLK_MQ_RQ_QUEUE_BUSY;
-               return BLK_MQ_RQ_QUEUE_ERROR;
+                       return BLK_STS_RESOURCE;
+               return BLK_STS_IOERR;
        }
 
        if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
@@ -286,7 +286,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 
        if (notify)
                virtqueue_notify(vblk->vqs[qid].vq);
-       return BLK_MQ_RQ_QUEUE_OK;
+       return BLK_STS_OK;
 }
 
 /* return id (s/n) string for *disk to *id_str
@@ -307,7 +307,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
                goto out;
 
        blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
-       err = virtblk_result(blk_mq_rq_to_pdu(req));
+       err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
 out:
        blk_put_request(req);
        return err;
@@ -720,9 +720,6 @@ static int virtblk_probe(struct virtio_device *vdev)
        /* We can handle whatever the host told us to handle. */
        blk_queue_max_segments(q, vblk->sg_elems-2);
 
-       /* No need to bounce any requests */
-       blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
-
        /* No real sector limit. */
        blk_queue_max_hw_sectors(q, -1U);
 
index 726c32e..fe7cd58 100644 (file)
@@ -609,8 +609,6 @@ int xen_blkif_schedule(void *arg)
        unsigned long timeout;
        int ret;
 
-       xen_blkif_get(blkif);
-
        set_freezable();
        while (!kthread_should_stop()) {
                if (try_to_freeze())
@@ -665,7 +663,6 @@ purge_gnt_list:
                print_stats(ring);
 
        ring->xenblkd = NULL;
-       xen_blkif_put(blkif);
 
        return 0;
 }
@@ -1069,20 +1066,17 @@ static void xen_blk_drain_io(struct xen_blkif_ring *ring)
        atomic_set(&blkif->drain, 0);
 }
 
-/*
- * Completion callback on the bio's. Called as bh->b_end_io()
- */
-
-static void __end_block_io_op(struct pending_req *pending_req, int error)
+static void __end_block_io_op(struct pending_req *pending_req,
+               blk_status_t error)
 {
        /* An error fails the entire request. */
-       if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
-           (error == -EOPNOTSUPP)) {
+       if (pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE &&
+           error == BLK_STS_NOTSUPP) {
                pr_debug("flush diskcache op failed, not supported\n");
                xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0);
                pending_req->status = BLKIF_RSP_EOPNOTSUPP;
-       } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
-                   (error == -EOPNOTSUPP)) {
+       } else if (pending_req->operation == BLKIF_OP_WRITE_BARRIER &&
+                  error == BLK_STS_NOTSUPP) {
                pr_debug("write barrier op failed, not supported\n");
                xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0);
                pending_req->status = BLKIF_RSP_EOPNOTSUPP;
@@ -1106,7 +1100,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
  */
 static void end_block_io_op(struct bio *bio)
 {
-       __end_block_io_op(bio->bi_private, bio->bi_error);
+       __end_block_io_op(bio->bi_private, bio->bi_status);
        bio_put(bio);
 }
 
@@ -1423,7 +1417,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
        for (i = 0; i < nbio; i++)
                bio_put(biolist[i]);
        atomic_set(&pending_req->pendcnt, 1);
-       __end_block_io_op(pending_req, -EINVAL);
+       __end_block_io_op(pending_req, BLK_STS_RESOURCE);
        msleep(1); /* back off a bit */
        return -EIO;
 }
@@ -1436,34 +1430,35 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 static void make_response(struct xen_blkif_ring *ring, u64 id,
                          unsigned short op, int st)
 {
-       struct blkif_response  resp;
+       struct blkif_response *resp;
        unsigned long     flags;
        union blkif_back_rings *blk_rings;
        int notify;
 
-       resp.id        = id;
-       resp.operation = op;
-       resp.status    = st;
-
        spin_lock_irqsave(&ring->blk_ring_lock, flags);
        blk_rings = &ring->blk_rings;
        /* Place on the response ring for the relevant domain. */
        switch (ring->blkif->blk_protocol) {
        case BLKIF_PROTOCOL_NATIVE:
-               memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
-                      &resp, sizeof(resp));
+               resp = RING_GET_RESPONSE(&blk_rings->native,
+                                        blk_rings->native.rsp_prod_pvt);
                break;
        case BLKIF_PROTOCOL_X86_32:
-               memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
-                      &resp, sizeof(resp));
+               resp = RING_GET_RESPONSE(&blk_rings->x86_32,
+                                        blk_rings->x86_32.rsp_prod_pvt);
                break;
        case BLKIF_PROTOCOL_X86_64:
-               memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
-                      &resp, sizeof(resp));
+               resp = RING_GET_RESPONSE(&blk_rings->x86_64,
+                                        blk_rings->x86_64.rsp_prod_pvt);
                break;
        default:
                BUG();
        }
+
+       resp->id        = id;
+       resp->operation = op;
+       resp->status    = st;
+
        blk_rings->common.rsp_prod_pvt++;
        RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
        spin_unlock_irqrestore(&ring->blk_ring_lock, flags);
index dea61f6..ecb35fe 100644 (file)
@@ -75,9 +75,8 @@ extern unsigned int xenblk_max_queues;
 struct blkif_common_request {
        char dummy;
 };
-struct blkif_common_response {
-       char dummy;
-};
+
+/* i386 protocol version */
 
 struct blkif_x86_32_request_rw {
        uint8_t        nr_segments;  /* number of segments                   */
@@ -129,14 +128,6 @@ struct blkif_x86_32_request {
        } u;
 } __attribute__((__packed__));
 
-/* i386 protocol version */
-#pragma pack(push, 4)
-struct blkif_x86_32_response {
-       uint64_t        id;              /* copied from request */
-       uint8_t         operation;       /* copied from request */
-       int16_t         status;          /* BLKIF_RSP_???       */
-};
-#pragma pack(pop)
 /* x86_64 protocol version */
 
 struct blkif_x86_64_request_rw {
@@ -193,18 +184,12 @@ struct blkif_x86_64_request {
        } u;
 } __attribute__((__packed__));
 
-struct blkif_x86_64_response {
-       uint64_t       __attribute__((__aligned__(8))) id;
-       uint8_t         operation;       /* copied from request */
-       int16_t         status;          /* BLKIF_RSP_???       */
-};
-
 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
-                 struct blkif_common_response);
+                 struct blkif_response);
 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
-                 struct blkif_x86_32_response);
+                 struct blkif_response __packed);
 DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
-                 struct blkif_x86_64_response);
+                 struct blkif_response);
 
 union blkif_back_rings {
        struct blkif_back_ring        native;
@@ -281,6 +266,7 @@ struct xen_blkif_ring {
 
        wait_queue_head_t       wq;
        atomic_t                inflight;
+       bool                    active;
        /* One thread per blkif ring. */
        struct task_struct      *xenblkd;
        unsigned int            waiting_reqs;
index 1f3dfaa..792da68 100644 (file)
@@ -159,7 +159,7 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
                init_waitqueue_head(&ring->shutdown_wq);
                ring->blkif = blkif;
                ring->st_print = jiffies;
-               xen_blkif_get(blkif);
+               ring->active = true;
        }
 
        return 0;
@@ -249,10 +249,12 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
                struct xen_blkif_ring *ring = &blkif->rings[r];
                unsigned int i = 0;
 
+               if (!ring->active)
+                       continue;
+
                if (ring->xenblkd) {
                        kthread_stop(ring->xenblkd);
                        wake_up(&ring->shutdown_wq);
-                       ring->xenblkd = NULL;
                }
 
                /* The above kthread_stop() guarantees that at this point we
@@ -296,7 +298,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
                BUG_ON(ring->free_pages_num != 0);
                BUG_ON(ring->persistent_gnt_c != 0);
                WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
-               xen_blkif_put(blkif);
+               ring->active = false;
        }
        blkif->nr_ring_pages = 0;
        /*
@@ -312,9 +314,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
 
 static void xen_blkif_free(struct xen_blkif *blkif)
 {
-
-       xen_blkif_disconnect(blkif);
+       WARN_ON(xen_blkif_disconnect(blkif));
        xen_vbd_free(&blkif->vbd);
+       kfree(blkif->be->mode);
+       kfree(blkif->be);
 
        /* Make sure everything is drained before shutting down */
        kmem_cache_free(xen_blkif_cachep, blkif);
@@ -511,8 +514,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
                xen_blkif_put(be->blkif);
        }
 
-       kfree(be->mode);
-       kfree(be);
        return 0;
 }
 
index 3945963..c852ed3 100644 (file)
@@ -110,11 +110,6 @@ struct blk_shadow {
        unsigned long associated_id;
 };
 
-struct split_bio {
-       struct bio *bio;
-       atomic_t pending;
-};
-
 struct blkif_req {
        int     error;
 };
@@ -881,7 +876,7 @@ static inline bool blkif_request_flush_invalid(struct request *req,
                 !info->feature_fua));
 }
 
-static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
                          const struct blk_mq_queue_data *qd)
 {
        unsigned long flags;
@@ -904,16 +899,16 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
 
        flush_requests(rinfo);
        spin_unlock_irqrestore(&rinfo->ring_lock, flags);
-       return BLK_MQ_RQ_QUEUE_OK;
+       return BLK_STS_OK;
 
 out_err:
        spin_unlock_irqrestore(&rinfo->ring_lock, flags);
-       return BLK_MQ_RQ_QUEUE_ERROR;
+       return BLK_STS_IOERR;
 
 out_busy:
        spin_unlock_irqrestore(&rinfo->ring_lock, flags);
        blk_mq_stop_hw_queue(hctx);
-       return BLK_MQ_RQ_QUEUE_BUSY;
+       return BLK_STS_RESOURCE;
 }
 
 static void blkif_complete_rq(struct request *rq)
@@ -958,9 +953,6 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
 
        /* Make sure buffer addresses are sector-aligned. */
        blk_queue_dma_alignment(rq, 511);
-
-       /* Make sure we don't use bounce buffers. */
-       blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
 }
 
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
@@ -1601,14 +1593,18 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                        continue;
                }
 
-               blkif_req(req)->error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
+               if (bret->status == BLKIF_RSP_OKAY)
+                       blkif_req(req)->error = BLK_STS_OK;
+               else
+                       blkif_req(req)->error = BLK_STS_IOERR;
+
                switch (bret->operation) {
                case BLKIF_OP_DISCARD:
                        if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
                                struct request_queue *rq = info->rq;
                                printk(KERN_WARNING "blkfront: %s: %s op failed\n",
                                           info->gd->disk_name, op_name(bret->operation));
-                               blkif_req(req)->error = -EOPNOTSUPP;
+                               blkif_req(req)->error = BLK_STS_NOTSUPP;
                                info->feature_discard = 0;
                                info->feature_secdiscard = 0;
                                queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
@@ -1626,11 +1622,11 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                                     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
                                printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
                                       info->gd->disk_name, op_name(bret->operation));
-                               blkif_req(req)->error = -EOPNOTSUPP;
+                               blkif_req(req)->error = BLK_STS_NOTSUPP;
                        }
                        if (unlikely(blkif_req(req)->error)) {
-                               if (blkif_req(req)->error == -EOPNOTSUPP)
-                                       blkif_req(req)->error = 0;
+                               if (blkif_req(req)->error == BLK_STS_NOTSUPP)
+                                       blkif_req(req)->error = BLK_STS_OK;
                                info->feature_fua = 0;
                                info->feature_flush = 0;
                                xlvbd_flush(info);
@@ -1996,28 +1992,13 @@ static int blkfront_probe(struct xenbus_device *dev,
        return 0;
 }
 
-static void split_bio_end(struct bio *bio)
-{
-       struct split_bio *split_bio = bio->bi_private;
-
-       if (atomic_dec_and_test(&split_bio->pending)) {
-               split_bio->bio->bi_phys_segments = 0;
-               split_bio->bio->bi_error = bio->bi_error;
-               bio_endio(split_bio->bio);
-               kfree(split_bio);
-       }
-       bio_put(bio);
-}
-
 static int blkif_recover(struct blkfront_info *info)
 {
-       unsigned int i, r_index;
+       unsigned int r_index;
        struct request *req, *n;
        int rc;
-       struct bio *bio, *cloned_bio;
-       unsigned int segs, offset;
-       int pending, size;
-       struct split_bio *split_bio;
+       struct bio *bio;
+       unsigned int segs;
 
        blkfront_gather_backend_features(info);
        /* Reset limits changed by blk_mq_update_nr_hw_queues(). */
@@ -2056,34 +2037,6 @@ static int blkif_recover(struct blkfront_info *info)
 
        while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
                /* Traverse the list of pending bios and re-queue them */
-               if (bio_segments(bio) > segs) {
-                       /*
-                        * This bio has more segments than what we can
-                        * handle, we have to split it.
-                        */
-                       pending = (bio_segments(bio) + segs - 1) / segs;
-                       split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO);
-                       BUG_ON(split_bio == NULL);
-                       atomic_set(&split_bio->pending, pending);
-                       split_bio->bio = bio;
-                       for (i = 0; i < pending; i++) {
-                               offset = (i * segs * XEN_PAGE_SIZE) >> 9;
-                               size = min((unsigned int)(segs * XEN_PAGE_SIZE) >> 9,
-                                          (unsigned int)bio_sectors(bio) - offset);
-                               cloned_bio = bio_clone(bio, GFP_NOIO);
-                               BUG_ON(cloned_bio == NULL);
-                               bio_trim(cloned_bio, offset, size);
-                               cloned_bio->bi_private = split_bio;
-                               cloned_bio->bi_end_io = split_bio_end;
-                               submit_bio(cloned_bio);
-                       }
-                       /*
-                        * Now we have to wait for all those smaller bios to
-                        * end, so we can also end the "parent" bio.
-                        */
-                       continue;
-               }
-               /* We don't need to split this bio */
                submit_bio(bio);
        }
 
@@ -2137,7 +2090,7 @@ static int blkfront_resume(struct xenbus_device *dev)
                        merge_bio.tail = shadow[j].request->biotail;
                        bio_list_merge(&info->bio_list, &merge_bio);
                        shadow[j].request->bio = NULL;
-                       blk_mq_end_request(shadow[j].request, 0);
+                       blk_mq_end_request(shadow[j].request, BLK_STS_OK);
                }
        }
 
index 757dce2..14459d6 100644 (file)
@@ -471,7 +471,7 @@ static struct request *ace_get_next_request(struct request_queue *q)
                if (!blk_rq_is_passthrough(req))
                        break;
                blk_start_request(req);
-               __blk_end_request_all(req, -EIO);
+               __blk_end_request_all(req, BLK_STS_IOERR);
        }
        return req;
 }
@@ -499,11 +499,11 @@ static void ace_fsm_dostate(struct ace_device *ace)
 
                /* Drop all in-flight and pending requests */
                if (ace->req) {
-                       __blk_end_request_all(ace->req, -EIO);
+                       __blk_end_request_all(ace->req, BLK_STS_IOERR);
                        ace->req = NULL;
                }
                while ((req = blk_fetch_request(ace->queue)) != NULL)
-                       __blk_end_request_all(req, -EIO);
+                       __blk_end_request_all(req, BLK_STS_IOERR);
 
                /* Drop back to IDLE state and notify waiters */
                ace->fsm_state = ACE_FSM_STATE_IDLE;
@@ -728,7 +728,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
                }
 
                /* bio finished; is there another one? */
-               if (__blk_end_request_cur(ace->req, 0)) {
+               if (__blk_end_request_cur(ace->req, BLK_STS_OK)) {
                        /* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
                         *      blk_rq_sectors(ace->req),
                         *      blk_rq_cur_sectors(ace->req));
@@ -993,6 +993,7 @@ static int ace_setup(struct ace_device *ace)
        if (ace->queue == NULL)
                goto err_blk_initq;
        blk_queue_logical_block_size(ace->queue, 512);
+       blk_queue_bounce_limit(ace->queue, BLK_BOUNCE_HIGH);
 
        /*
         * Allocate and initialize GD structure
index 968f9e5..41c95c9 100644 (file)
@@ -74,14 +74,14 @@ static void do_z2_request(struct request_queue *q)
        while (req) {
                unsigned long start = blk_rq_pos(req) << 9;
                unsigned long len  = blk_rq_cur_bytes(req);
-               int err = 0;
+               blk_status_t err = BLK_STS_OK;
 
                if (start + len > z2ram_size) {
                        pr_err(DEVICE_NAME ": bad access: block=%llu, "
                               "count=%u\n",
                               (unsigned long long)blk_rq_pos(req),
                               blk_rq_cur_sectors(req));
-                       err = -EIO;
+                       err = BLK_STS_IOERR;
                        goto done;
                }
                while (len) {
index 76c952f..e36d160 100644 (file)
@@ -2178,6 +2178,12 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
        if (!q)
                return -ENXIO;
 
+       if (!blk_queue_scsi_passthrough(q)) {
+               WARN_ONCE(true,
+                         "Attempt read CDDA info through a non-SCSI queue\n");
+               return -EINVAL;
+       }
+
        cdi->last_sense = 0;
 
        while (nframes) {
@@ -2195,7 +2201,6 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
                        break;
                }
                req = scsi_req(rq);
-               scsi_req_init(rq);
 
                ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
                if (ret) {
index 1372763..6495b03 100644 (file)
@@ -583,7 +583,8 @@ static int gdrom_set_interrupt_handlers(void)
  */
 static void gdrom_readdisk_dma(struct work_struct *work)
 {
-       int err, block, block_cnt;
+       int block, block_cnt;
+       blk_status_t err;
        struct packet_command *read_command;
        struct list_head *elem, *next;
        struct request *req;
@@ -641,7 +642,7 @@ static void gdrom_readdisk_dma(struct work_struct *work)
                __raw_writeb(1, GDROM_DMA_STATUS_REG);
                wait_event_interruptible_timeout(request_queue,
                        gd.transfer == 0, GDROM_DEFAULT_TIMEOUT);
-               err = gd.transfer ? -EIO : 0;
+               err = gd.transfer ? BLK_STS_IOERR : BLK_STS_OK;
                gd.transfer = 0;
                gd.pending = 0;
                /* now seek to take the request spinlock
@@ -670,11 +671,11 @@ static void gdrom_request(struct request_queue *rq)
                        break;
                case REQ_OP_WRITE:
                        pr_notice("Read only device - write request ignored\n");
-                       __blk_end_request_all(req, -EIO);
+                       __blk_end_request_all(req, BLK_STS_IOERR);
                        break;
                default:
                        printk(KERN_DEBUG "gdrom: Non-fs request ignored\n");
-                       __blk_end_request_all(req, -EIO);
+                       __blk_end_request_all(req, BLK_STS_IOERR);
                        break;
                }
        }
@@ -812,6 +813,7 @@ static int probe_gdrom(struct platform_device *devptr)
                err = -ENOMEM;
                goto probe_fail_requestq;
        }
+       blk_queue_bounce_limit(gd.gdrom_rq, BLK_BOUNCE_HIGH);
 
        err = probe_gdrom_setupqueue();
        if (err)
index e870f32..01a260f 100644 (file)
@@ -803,13 +803,13 @@ static int crng_fast_load(const char *cp, size_t len)
                p[crng_init_cnt % CHACHA20_KEY_SIZE] ^= *cp;
                cp++; crng_init_cnt++; len--;
        }
+       spin_unlock_irqrestore(&primary_crng.lock, flags);
        if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
                invalidate_batched_entropy();
                crng_init = 1;
                wake_up_interruptible(&crng_init_wait);
                pr_notice("random: fast init done\n");
        }
-       spin_unlock_irqrestore(&primary_crng.lock, flags);
        return 1;
 }
 
@@ -841,6 +841,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
        }
        memzero_explicit(&buf, sizeof(buf));
        crng->init_time = jiffies;
+       spin_unlock_irqrestore(&primary_crng.lock, flags);
        if (crng == &primary_crng && crng_init < 2) {
                invalidate_batched_entropy();
                crng_init = 2;
@@ -848,7 +849,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
                wake_up_interruptible(&crng_init_wait);
                pr_notice("random: crng init done\n");
        }
-       spin_unlock_irqrestore(&primary_crng.lock, flags);
 }
 
 static inline void crng_wait_ready(void)
@@ -2041,8 +2041,8 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
 u64 get_random_u64(void)
 {
        u64 ret;
-       bool use_lock = crng_init < 2;
-       unsigned long flags;
+       bool use_lock = READ_ONCE(crng_init) < 2;
+       unsigned long flags = 0;
        struct batched_entropy *batch;
 
 #if BITS_PER_LONG == 64
@@ -2073,8 +2073,8 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32);
 u32 get_random_u32(void)
 {
        u32 ret;
-       bool use_lock = crng_init < 2;
-       unsigned long flags;
+       bool use_lock = READ_ONCE(crng_init) < 2;
+       unsigned long flags = 0;
        struct batched_entropy *batch;
 
        if (arch_get_random_int(&ret))
index b917b9d..c378c7b 100644 (file)
 
 #define ACPI_SIG_TPM2 "TPM2"
 
-static const u8 CRB_ACPI_START_UUID[] = {
-       /* 0000 */ 0xAB, 0x6C, 0xBF, 0x6B, 0x63, 0x54, 0x14, 0x47,
-       /* 0008 */ 0xB7, 0xCD, 0xF0, 0x20, 0x3C, 0x03, 0x68, 0xD4
-};
+static const guid_t crb_acpi_start_guid =
+       GUID_INIT(0x6BBF6CAB, 0x5463, 0x4714,
+                 0xB7, 0xCD, 0xF0, 0x20, 0x3C, 0x03, 0x68, 0xD4);
 
 enum crb_defaults {
        CRB_ACPI_START_REVISION_ID = 1,
@@ -266,7 +265,7 @@ static int crb_do_acpi_start(struct tpm_chip *chip)
        int rc;
 
        obj = acpi_evaluate_dsm(chip->acpi_dev_handle,
-                               CRB_ACPI_START_UUID,
+                               &crb_acpi_start_guid,
                                CRB_ACPI_START_REVISION_ID,
                                CRB_ACPI_START_INDEX,
                                NULL);
index 692a2c6..86dd852 100644 (file)
 #define PPI_VS_REQ_START       128
 #define PPI_VS_REQ_END         255
 
-static const u8 tpm_ppi_uuid[] = {
-       0xA6, 0xFA, 0xDD, 0x3D,
-       0x1B, 0x36,
-       0xB4, 0x4E,
-       0xA4, 0x24,
-       0x8D, 0x10, 0x08, 0x9D, 0x16, 0x53
-};
+static const guid_t tpm_ppi_guid =
+       GUID_INIT(0x3DDDFAA6, 0x361B, 0x4EB4,
+                 0xA4, 0x24, 0x8D, 0x10, 0x08, 0x9D, 0x16, 0x53);
 
 static inline union acpi_object *
 tpm_eval_dsm(acpi_handle ppi_handle, int func, acpi_object_type type,
             union acpi_object *argv4)
 {
        BUG_ON(!ppi_handle);
-       return acpi_evaluate_dsm_typed(ppi_handle, tpm_ppi_uuid,
+       return acpi_evaluate_dsm_typed(ppi_handle, &tpm_ppi_guid,
                                       TPM_PPI_REVISION_ID,
                                       func, argv4, type);
 }
@@ -107,7 +103,7 @@ static ssize_t tpm_store_ppi_request(struct device *dev,
         * is updated with function index from SUBREQ to SUBREQ2 since PPI
         * version 1.1
         */
-       if (acpi_check_dsm(chip->acpi_dev_handle, tpm_ppi_uuid,
+       if (acpi_check_dsm(chip->acpi_dev_handle, &tpm_ppi_guid,
                           TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_SUBREQ2))
                func = TPM_PPI_FN_SUBREQ2;
 
@@ -268,7 +264,7 @@ static ssize_t show_ppi_operations(acpi_handle dev_handle, char *buf, u32 start,
                "User not required",
        };
 
-       if (!acpi_check_dsm(dev_handle, tpm_ppi_uuid, TPM_PPI_REVISION_ID,
+       if (!acpi_check_dsm(dev_handle, &tpm_ppi_guid, TPM_PPI_REVISION_ID,
                            1 << TPM_PPI_FN_GETOPR))
                return -EPERM;
 
@@ -341,12 +337,12 @@ void tpm_add_ppi(struct tpm_chip *chip)
        if (!chip->acpi_dev_handle)
                return;
 
-       if (!acpi_check_dsm(chip->acpi_dev_handle, tpm_ppi_uuid,
+       if (!acpi_check_dsm(chip->acpi_dev_handle, &tpm_ppi_guid,
                            TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_VERSION))
                return;
 
        /* Cache PPI version string. */
-       obj = acpi_evaluate_dsm_typed(chip->acpi_dev_handle, tpm_ppi_uuid,
+       obj = acpi_evaluate_dsm_typed(chip->acpi_dev_handle, &tpm_ppi_guid,
                                      TPM_PPI_REVISION_ID, TPM_PPI_FN_VERSION,
                                      NULL, ACPI_TYPE_STRING);
        if (obj) {
index 19480bc..2f29ee1 100644 (file)
@@ -14,6 +14,7 @@ config COMMON_CLK_MESON8B
 config COMMON_CLK_GXBB
        bool
        depends on COMMON_CLK_AMLOGIC
+       select RESET_CONTROLLER
        help
          Support for the clock controller on AmLogic S905 devices, aka gxbb.
          Say Y if you want peripherals and CPU frequency scaling to work.
index b0d551a..eb89c78 100644 (file)
@@ -156,6 +156,7 @@ config SUN8I_R_CCU
        bool "Support for Allwinner SoCs' PRCM CCUs"
        select SUNXI_CCU_DIV
        select SUNXI_CCU_GATE
+       select SUNXI_CCU_MP
        default MACH_SUN8I || (ARCH_SUNXI && ARM64)
 
 endif
index 9b3cd24..061b6fb 100644 (file)
@@ -31,7 +31,9 @@
 #define CLK_PLL_VIDEO0_2X              8
 #define CLK_PLL_VE                     9
 #define CLK_PLL_DDR0                   10
-#define CLK_PLL_PERIPH0                        11
+
+/* PLL_PERIPH0 exported for PRCM */
+
 #define CLK_PLL_PERIPH0_2X             12
 #define CLK_PLL_PERIPH1                        13
 #define CLK_PLL_PERIPH1_2X             14
index 5c476f9..5372bf8 100644 (file)
@@ -243,7 +243,7 @@ static SUNXI_CCU_GATE(ahb_ss_clk,   "ahb-ss",       "ahb",
 static SUNXI_CCU_GATE(ahb_dma_clk,     "ahb-dma",      "ahb",
                      0x060, BIT(6), 0);
 static SUNXI_CCU_GATE(ahb_bist_clk,    "ahb-bist",     "ahb",
-                     0x060, BIT(6), 0);
+                     0x060, BIT(7), 0);
 static SUNXI_CCU_GATE(ahb_mmc0_clk,    "ahb-mmc0",     "ahb",
                      0x060, BIT(8), 0);
 static SUNXI_CCU_GATE(ahb_mmc1_clk,    "ahb-mmc1",     "ahb",
index 89e68d2..df97e25 100644 (file)
@@ -556,7 +556,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(lcd0_ch1_clk, "lcd0-ch1", lcd_ch1_parents,
                                 0x12c, 0, 4, 24, 3, BIT(31),
                                 CLK_SET_RATE_PARENT);
 static SUNXI_CCU_M_WITH_MUX_GATE(lcd1_ch1_clk, "lcd1-ch1", lcd_ch1_parents,
-                                0x12c, 0, 4, 24, 3, BIT(31),
+                                0x130, 0, 4, 24, 3, BIT(31),
                                 CLK_SET_RATE_PARENT);
 
 static const char * const csi_sclk_parents[] = { "pll-video0", "pll-video1",
index 85973d1..1b4baea 100644 (file)
@@ -29,7 +29,9 @@
 #define CLK_PLL_VIDEO          6
 #define CLK_PLL_VE             7
 #define CLK_PLL_DDR            8
-#define CLK_PLL_PERIPH0                9
+
+/* PLL_PERIPH0 exported for PRCM */
+
 #define CLK_PLL_PERIPH0_2X     10
 #define CLK_PLL_GPU            11
 #define CLK_PLL_PERIPH1                12
index e58706b..6297add 100644 (file)
@@ -537,7 +537,7 @@ static struct ccu_reset_map sun8i_v3s_ccu_resets[] = {
        [RST_BUS_EMAC]          =  { 0x2c0, BIT(17) },
        [RST_BUS_HSTIMER]       =  { 0x2c0, BIT(19) },
        [RST_BUS_SPI0]          =  { 0x2c0, BIT(20) },
-       [RST_BUS_OTG]           =  { 0x2c0, BIT(23) },
+       [RST_BUS_OTG]           =  { 0x2c0, BIT(24) },
        [RST_BUS_EHCI0]         =  { 0x2c0, BIT(26) },
        [RST_BUS_OHCI0]         =  { 0x2c0, BIT(29) },
 
index 4bed671..8b5c300 100644 (file)
@@ -1209,9 +1209,9 @@ arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame)
                return 0;
        }
 
-       rate = readl_relaxed(frame + CNTFRQ);
+       rate = readl_relaxed(base + CNTFRQ);
 
-       iounmap(frame);
+       iounmap(base);
 
        return rate;
 }
index 44e5e95..8e64b84 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/clk.h>
 #include <linux/interrupt.h>
 #include <linux/clockchips.h>
+#include <linux/clocksource.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/slab.h>
index 2e9c830..c4656c4 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <linux/clk.h>
 #include <linux/clockchips.h>
+#include <linux/clocksource.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
index 992f7c2..88220ff 100644 (file)
@@ -185,8 +185,8 @@ static ssize_t store_down_threshold(struct gov_attr_set *attr_set,
        int ret;
        ret = sscanf(buf, "%u", &input);
 
-       /* cannot be lower than 11 otherwise freq will not fall */
-       if (ret != 1 || input < 11 || input > 100 ||
+       /* cannot be lower than 1 otherwise freq will not fall */
+       if (ret != 1 || input < 1 || input > 100 ||
                        input >= dbs_data->up_threshold)
                return -EINVAL;
 
index ffca4fc..ae8eb03 100644 (file)
@@ -180,8 +180,10 @@ int dt_init_idle_driver(struct cpuidle_driver *drv,
                if (!state_node)
                        break;
 
-               if (!of_device_is_available(state_node))
+               if (!of_device_is_available(state_node)) {
+                       of_node_put(state_node);
                        continue;
+               }
 
                if (!idle_state_valid(state_node, i, cpumask)) {
                        pr_warn("%s idle state not valid, bailing out\n",
index 5c3e7b1..f6e7956 100644 (file)
@@ -267,7 +267,11 @@ static int exynos_nocp_probe(struct platform_device *pdev)
        }
        platform_set_drvdata(pdev, nocp);
 
-       clk_prepare_enable(nocp->clk);
+       ret = clk_prepare_enable(nocp->clk);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to prepare ppmu clock\n");
+               return ret;
+       }
 
        pr_info("exynos-nocp: new NoC Probe device registered: %s\n",
                        dev_name(dev));
index 9b73509..d96e3dc 100644 (file)
@@ -44,7 +44,7 @@ struct exynos_ppmu {
        { "ppmu-event2-"#name, PPMU_PMNCNT2 },  \
        { "ppmu-event3-"#name, PPMU_PMNCNT3 }
 
-struct __exynos_ppmu_events {
+static struct __exynos_ppmu_events {
        char *name;
        int id;
 } ppmu_events[] = {
@@ -648,7 +648,11 @@ static int exynos_ppmu_probe(struct platform_device *pdev)
                        dev_name(&pdev->dev), desc[i].name);
        }
 
-       clk_prepare_enable(info->ppmu.clk);
+       ret = clk_prepare_enable(info->ppmu.clk);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to prepare ppmu clock\n");
+               return ret;
+       }
 
        return 0;
 }
index 7717b09..db75d4b 100644 (file)
@@ -214,24 +214,16 @@ static void altr_sdr_mc_create_debugfs_nodes(struct mem_ctl_info *mci)
 static unsigned long get_total_mem(void)
 {
        struct device_node *np = NULL;
-       const unsigned int *reg, *reg_end;
-       int len, sw, aw;
-       unsigned long start, size, total_mem = 0;
+       struct resource res;
+       int ret;
+       unsigned long total_mem = 0;
 
        for_each_node_by_type(np, "memory") {
-               aw = of_n_addr_cells(np);
-               sw = of_n_size_cells(np);
-               reg = (const unsigned int *)of_get_property(np, "reg", &len);
-               reg_end = reg + (len / sizeof(u32));
-
-               total_mem = 0;
-               do {
-                       start = of_read_number(reg, aw);
-                       reg += aw;
-                       size = of_read_number(reg, sw);
-                       reg += sw;
-                       total_mem += size;
-               } while (reg < reg_end);
+               ret = of_address_to_resource(np, 0, &res);
+               if (ret)
+                       continue;
+
+               total_mem += resource_size(&res);
        }
        edac_dbg(0, "total_mem 0x%lx\n", total_mem);
        return total_mem;
@@ -1839,7 +1831,7 @@ static int a10_eccmgr_irqdomain_map(struct irq_domain *d, unsigned int irq,
        return 0;
 }
 
-static struct irq_domain_ops a10_eccmgr_ic_ops = {
+static const struct irq_domain_ops a10_eccmgr_ic_ops = {
        .map = a10_eccmgr_irqdomain_map,
        .xlate = irq_domain_xlate_twocell,
 };
index f683919..8f5a56e 100644 (file)
 #define                        NREC_RDWR(x)            (((x)>>11) & 1)
 #define                        NREC_RANK(x)            (((x)>>8) & 0x7)
 #define                NRECMEMB                0xC0
-#define                        NREC_CAS(x)             (((x)>>16) & 0xFFFFFF)
+#define                        NREC_CAS(x)             (((x)>>16) & 0xFFF)
 #define                        NREC_RAS(x)             ((x) & 0x7FFF)
 #define                NRECFGLOG               0xC4
 #define                NREEECFBDA              0xC8
@@ -371,7 +371,7 @@ struct i5000_error_info {
        /* These registers are input ONLY if there was a
         * Non-Recoverable Error */
        u16 nrecmema;           /* Non-Recoverable Mem log A */
-       u16 nrecmemb;           /* Non-Recoverable Mem log B */
+       u32 nrecmemb;           /* Non-Recoverable Mem log B */
 
 };
 
@@ -407,7 +407,7 @@ static void i5000_get_error_info(struct mem_ctl_info *mci,
                                NERR_FAT_FBD, &info->nerr_fat_fbd);
                pci_read_config_word(pvt->branchmap_werrors,
                                NRECMEMA, &info->nrecmema);
-               pci_read_config_word(pvt->branchmap_werrors,
+               pci_read_config_dword(pvt->branchmap_werrors,
                                NRECMEMB, &info->nrecmemb);
 
                /* Clear the error bits, by writing them back */
index 37a9ba7..cd889ed 100644 (file)
@@ -368,7 +368,7 @@ struct i5400_error_info {
 
        /* These registers are input ONLY if there was a Non-Rec Error */
        u16 nrecmema;           /* Non-Recoverable Mem log A */
-       u16 nrecmemb;           /* Non-Recoverable Mem log B */
+       u32 nrecmemb;           /* Non-Recoverable Mem log B */
 
 };
 
@@ -458,7 +458,7 @@ static void i5400_get_error_info(struct mem_ctl_info *mci,
                                NERR_FAT_FBD, &info->nerr_fat_fbd);
                pci_read_config_word(pvt->branchmap_werrors,
                                NRECMEMA, &info->nrecmema);
-               pci_read_config_word(pvt->branchmap_werrors,
+               pci_read_config_dword(pvt->branchmap_werrors,
                                NRECMEMB, &info->nrecmemb);
 
                /* Clear the error bits, by writing them back */
index 2733fb5..4260579 100644 (file)
  * 0c04: Xeon E3-1200 v3/4th Gen Core Processor DRAM Controller
  * 0c08: Xeon E3-1200 v3 Processor DRAM Controller
  * 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers
+ * 5918: Xeon E3-1200 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers
  *
  * Based on Intel specification:
  * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf
  * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e3-1200-family-vol-2-datasheet.html
+ * http://www.intel.com/content/www/us/en/processors/core/7th-gen-core-family-mobile-h-processor-lines-datasheet-vol-2.html
  *
  * According to the above datasheet (p.16):
  * "
@@ -57,6 +59,7 @@
 #define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04
 #define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08
 #define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x1918
+#define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x5918
 
 #define IE31200_DIMMS                  4
 #define IE31200_RANKS                  8
@@ -376,7 +379,12 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
        void __iomem *window;
        struct ie31200_priv *priv;
        u32 addr_decode, mad_offset;
-       bool skl = (pdev->device == PCI_DEVICE_ID_INTEL_IE31200_HB_8);
+
+       /*
+        * Kaby Lake seems to work like Skylake. Please re-visit this logic
+        * when adding new CPU support.
+        */
+       bool skl = (pdev->device >= PCI_DEVICE_ID_INTEL_IE31200_HB_8);
 
        edac_dbg(0, "MC:\n");
 
@@ -560,6 +568,9 @@ static const struct pci_device_id ie31200_pci_tbl[] = {
                PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
                IE31200},
        {
+               PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               IE31200},
+       {
                0,
        }            /* 0 terminated list. */
 };
index ba35b7e..9a2658a 100644 (file)
@@ -161,7 +161,7 @@ static const char * const smca_ls_mce_desc[] = {
        "Sys Read data error thread 0",
        "Sys read data error thread 1",
        "DC tag error type 2",
-       "DC data error type 1 (poison comsumption)",
+       "DC data error type 1 (poison consumption)",
        "DC data error type 2",
        "DC data error type 3",
        "DC tag error type 4",
index 14b7e7b..d3650df 100644 (file)
@@ -32,21 +32,21 @@ static void mv64x60_pci_check(struct edac_pci_ctl_info *pci)
        struct mv64x60_pci_pdata *pdata = pci->pvt_info;
        u32 cause;
 
-       cause = in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
+       cause = readl(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
        if (!cause)
                return;
 
        printk(KERN_ERR "Error in PCI %d Interface\n", pdata->pci_hose);
        printk(KERN_ERR "Cause register: 0x%08x\n", cause);
        printk(KERN_ERR "Address Low: 0x%08x\n",
-              in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_LO));
+              readl(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_LO));
        printk(KERN_ERR "Address High: 0x%08x\n",
-              in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_HI));
+              readl(pdata->pci_vbase + MV64X60_PCI_ERROR_ADDR_HI));
        printk(KERN_ERR "Attribute: 0x%08x\n",
-              in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_ATTR));
+              readl(pdata->pci_vbase + MV64X60_PCI_ERROR_ATTR));
        printk(KERN_ERR "Command: 0x%08x\n",
-              in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CMD));
-       out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, ~cause);
+              readl(pdata->pci_vbase + MV64X60_PCI_ERROR_CMD));
+       writel(~cause, pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
 
        if (cause & MV64X60_PCI_PE_MASK)
                edac_pci_handle_pe(pci, pci->ctl_name);
@@ -61,7 +61,7 @@ static irqreturn_t mv64x60_pci_isr(int irq, void *dev_id)
        struct mv64x60_pci_pdata *pdata = pci->pvt_info;
        u32 val;
 
-       val = in_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
+       val = readl(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
        if (!val)
                return IRQ_NONE;
 
@@ -93,7 +93,7 @@ static int __init mv64x60_pci_fixup(struct platform_device *pdev)
        if (!pci_serr)
                return -ENOMEM;
 
-       out_le32(pci_serr, in_le32(pci_serr) & ~0x1);
+       writel(readl(pci_serr) & ~0x1, pci_serr);
        iounmap(pci_serr);
 
        return 0;
@@ -116,7 +116,7 @@ static int mv64x60_pci_err_probe(struct platform_device *pdev)
        pdata = pci->pvt_info;
 
        pdata->pci_hose = pdev->id;
-       pdata->name = "mpc85xx_pci_err";
+       pdata->name = "mv64x60_pci_err";
        platform_set_drvdata(pdev, pci);
        pci->dev = &pdev->dev;
        pci->dev_name = dev_name(&pdev->dev);
@@ -161,10 +161,10 @@ static int mv64x60_pci_err_probe(struct platform_device *pdev)
                goto err;
        }
 
-       out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, 0);
-       out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK, 0);
-       out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK,
-                MV64X60_PCIx_ERR_MASK_VAL);
+       writel(0, pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE);
+       writel(0, pdata->pci_vbase + MV64X60_PCI_ERROR_MASK);
+       writel(MV64X60_PCIx_ERR_MASK_VAL,
+                 pdata->pci_vbase + MV64X60_PCI_ERROR_MASK);
 
        if (edac_pci_add_device(pci, pdata->edac_idx) > 0) {
                edac_dbg(3, "failed edac_pci_add_device()\n");
@@ -233,23 +233,23 @@ static void mv64x60_sram_check(struct edac_device_ctl_info *edac_dev)
        struct mv64x60_sram_pdata *pdata = edac_dev->pvt_info;
        u32 cause;
 
-       cause = in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
+       cause = readl(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
        if (!cause)
                return;
 
        printk(KERN_ERR "Error in internal SRAM\n");
        printk(KERN_ERR "Cause register: 0x%08x\n", cause);
        printk(KERN_ERR "Address Low: 0x%08x\n",
-              in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_LO));
+              readl(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_LO));
        printk(KERN_ERR "Address High: 0x%08x\n",
-              in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_HI));
+              readl(pdata->sram_vbase + MV64X60_SRAM_ERR_ADDR_HI));
        printk(KERN_ERR "Data Low: 0x%08x\n",
-              in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_LO));
+              readl(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_LO));
        printk(KERN_ERR "Data High: 0x%08x\n",
-              in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_HI));
+              readl(pdata->sram_vbase + MV64X60_SRAM_ERR_DATA_HI));
        printk(KERN_ERR "Parity: 0x%08x\n",
-              in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_PARITY));
-       out_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE, 0);
+              readl(pdata->sram_vbase + MV64X60_SRAM_ERR_PARITY));
+       writel(0, pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
 
        edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
 }
@@ -260,7 +260,7 @@ static irqreturn_t mv64x60_sram_isr(int irq, void *dev_id)
        struct mv64x60_sram_pdata *pdata = edac_dev->pvt_info;
        u32 cause;
 
-       cause = in_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
+       cause = readl(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
        if (!cause)
                return IRQ_NONE;
 
@@ -322,7 +322,7 @@ static int mv64x60_sram_err_probe(struct platform_device *pdev)
        }
 
        /* setup SRAM err registers */
-       out_le32(pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE, 0);
+       writel(0, pdata->sram_vbase + MV64X60_SRAM_ERR_CAUSE);
 
        edac_dev->mod_name = EDAC_MOD_STR;
        edac_dev->ctl_name = pdata->name;
@@ -398,7 +398,7 @@ static void mv64x60_cpu_check(struct edac_device_ctl_info *edac_dev)
        struct mv64x60_cpu_pdata *pdata = edac_dev->pvt_info;
        u32 cause;
 
-       cause = in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
+       cause = readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
            MV64x60_CPU_CAUSE_MASK;
        if (!cause)
                return;
@@ -406,16 +406,16 @@ static void mv64x60_cpu_check(struct edac_device_ctl_info *edac_dev)
        printk(KERN_ERR "Error on CPU interface\n");
        printk(KERN_ERR "Cause register: 0x%08x\n", cause);
        printk(KERN_ERR "Address Low: 0x%08x\n",
-              in_le32(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_LO));
+              readl(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_LO));
        printk(KERN_ERR "Address High: 0x%08x\n",
-              in_le32(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_HI));
+              readl(pdata->cpu_vbase[0] + MV64x60_CPU_ERR_ADDR_HI));
        printk(KERN_ERR "Data Low: 0x%08x\n",
-              in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_LO));
+              readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_LO));
        printk(KERN_ERR "Data High: 0x%08x\n",
-              in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_HI));
+              readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_DATA_HI));
        printk(KERN_ERR "Parity: 0x%08x\n",
-              in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_PARITY));
-       out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE, 0);
+              readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_PARITY));
+       writel(0, pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE);
 
        edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
 }
@@ -426,7 +426,7 @@ static irqreturn_t mv64x60_cpu_isr(int irq, void *dev_id)
        struct mv64x60_cpu_pdata *pdata = edac_dev->pvt_info;
        u32 cause;
 
-       cause = in_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
+       cause = readl(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE) &
            MV64x60_CPU_CAUSE_MASK;
        if (!cause)
                return IRQ_NONE;
@@ -515,9 +515,9 @@ static int mv64x60_cpu_err_probe(struct platform_device *pdev)
        }
 
        /* setup CPU err registers */
-       out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE, 0);
-       out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK, 0);
-       out_le32(pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK, 0x000000ff);
+       writel(0, pdata->cpu_vbase[1] + MV64x60_CPU_ERR_CAUSE);
+       writel(0, pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK);
+       writel(0x000000ff, pdata->cpu_vbase[1] + MV64x60_CPU_ERR_MASK);
 
        edac_dev->mod_name = EDAC_MOD_STR;
        edac_dev->ctl_name = pdata->name;
@@ -596,13 +596,13 @@ static void mv64x60_mc_check(struct mem_ctl_info *mci)
        u32 comp_ecc;
        u32 syndrome;
 
-       reg = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
+       reg = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
        if (!reg)
                return;
 
        err_addr = reg & ~0x3;
-       sdram_ecc = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_RCVD);
-       comp_ecc = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CALC);
+       sdram_ecc = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_RCVD);
+       comp_ecc = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CALC);
        syndrome = sdram_ecc ^ comp_ecc;
 
        /* first bit clear in ECC Err Reg, 1 bit error, correctable by HW */
@@ -620,7 +620,7 @@ static void mv64x60_mc_check(struct mem_ctl_info *mci)
                                     mci->ctl_name, "");
 
        /* clear the error */
-       out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR, 0);
+       writel(0, pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
 }
 
 static irqreturn_t mv64x60_mc_isr(int irq, void *dev_id)
@@ -629,7 +629,7 @@ static irqreturn_t mv64x60_mc_isr(int irq, void *dev_id)
        struct mv64x60_mc_pdata *pdata = mci->pvt_info;
        u32 reg;
 
-       reg = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
+       reg = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
        if (!reg)
                return IRQ_NONE;
 
@@ -664,7 +664,7 @@ static void mv64x60_init_csrows(struct mem_ctl_info *mci,
 
        get_total_mem(pdata);
 
-       ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
+       ctl = readl(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
 
        csrow = mci->csrows[0];
        dimm = csrow->channels[0]->dimm;
@@ -753,7 +753,7 @@ static int mv64x60_mc_err_probe(struct platform_device *pdev)
                goto err;
        }
 
-       ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
+       ctl = readl(pdata->mc_vbase + MV64X60_SDRAM_CONFIG);
        if (!(ctl & MV64X60_SDRAM_ECC)) {
                /* Non-ECC RAM? */
                printk(KERN_WARNING "%s: No ECC DIMMs discovered\n", __func__);
@@ -779,10 +779,10 @@ static int mv64x60_mc_err_probe(struct platform_device *pdev)
        mv64x60_init_csrows(mci, pdata);
 
        /* setup MC registers */
-       out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR, 0);
-       ctl = in_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL);
+       writel(0, pdata->mc_vbase + MV64X60_SDRAM_ERR_ADDR);
+       ctl = readl(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL);
        ctl = (ctl & 0xff00ffff) | 0x10000;
-       out_le32(pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL, ctl);
+       writel(ctl, pdata->mc_vbase + MV64X60_SDRAM_ERR_ECC_CNTL);
 
        res = edac_mc_add_mc(mci);
        if (res) {
@@ -853,10 +853,10 @@ static struct platform_driver * const drivers[] = {
 
 static int __init mv64x60_edac_init(void)
 {
-       int ret = 0;
 
        printk(KERN_INFO "Marvell MV64x60 EDAC driver " MV64x60_REVISION "\n");
        printk(KERN_INFO "\t(C) 2006-2007 MontaVista Software\n");
+
        /* make sure error reporting method is sane */
        switch (edac_op_state) {
        case EDAC_OPSTATE_POLL:
index 1cad5a9..8e59949 100644 (file)
@@ -131,7 +131,7 @@ static struct mem_ctl_info *pnd2_mci;
 
 #ifdef CONFIG_X86_INTEL_SBI_APL
 #include "linux/platform_data/sbi_apl.h"
-int sbi_send(int port, int off, int op, u32 *data)
+static int sbi_send(int port, int off, int op, u32 *data)
 {
        struct sbi_apl_message sbi_arg;
        int ret, read = 0;
@@ -160,7 +160,7 @@ int sbi_send(int port, int off, int op, u32 *data)
        return ret;
 }
 #else
-int sbi_send(int port, int off, int op, u32 *data)
+static int sbi_send(int port, int off, int op, u32 *data)
 {
        return -EUNATCH;
 }
@@ -168,14 +168,15 @@ int sbi_send(int port, int off, int op, u32 *data)
 
 static int apl_rd_reg(int port, int off, int op, void *data, size_t sz, char *name)
 {
-       int     ret = 0;
+       int ret = 0;
 
        edac_dbg(2, "Read %s port=%x off=%x op=%x\n", name, port, off, op);
        switch (sz) {
        case 8:
                ret = sbi_send(port, off + 4, op, (u32 *)(data + 4));
+               /* fall through */
        case 4:
-               ret = sbi_send(port, off, op, (u32 *)data);
+               ret |= sbi_send(port, off, op, (u32 *)data);
                pnd2_printk(KERN_DEBUG, "%s=%x%08x ret=%d\n", name,
                                        sz == 8 ? *((u32 *)(data + 4)) : 0, *((u32 *)data), ret);
                break;
@@ -423,16 +424,21 @@ static void dnv_mk_region(char *name, struct region *rp, void *asym)
 
 static int apl_get_registers(void)
 {
+       int ret = -ENODEV;
        int i;
 
        if (RD_REG(&asym_2way, b_cr_asym_2way_mem_region_mchbar))
                return -ENODEV;
 
+       /*
+        * RD_REGP() will fail for unpopulated or non-existent
+        * DIMM slots. Return success if we find at least one DIMM.
+        */
        for (i = 0; i < APL_NUM_CHANNELS; i++)
-               if (RD_REGP(&drp0[i], d_cr_drp0, apl_dports[i]))
-                       return -ENODEV;
+               if (!RD_REGP(&drp0[i], d_cr_drp0, apl_dports[i]))
+                       ret = 0;
 
-       return 0;
+       return ret;
 }
 
 static int dnv_get_registers(void)
index ea21cb6..80d860c 100644 (file)
@@ -35,7 +35,7 @@ static LIST_HEAD(sbridge_edac_list);
 /*
  * Alter this version for the module when modifications are made
  */
-#define SBRIDGE_REVISION    " Ver: 1.1.1 "
+#define SBRIDGE_REVISION    " Ver: 1.1.2 "
 #define EDAC_MOD_STR      "sbridge_edac"
 
 /*
@@ -279,7 +279,7 @@ static const u32 correrrthrsld[] = {
  * sbridge structs
  */
 
-#define NUM_CHANNELS           8       /* 2MC per socket, four chan per MC */
+#define NUM_CHANNELS           4       /* Max channels per MC */
 #define MAX_DIMMS              3       /* Max DIMMS per channel */
 #define KNL_MAX_CHAS           38      /* KNL max num. of Cache Home Agents */
 #define KNL_MAX_CHANNELS       6       /* KNL max num. of PCI channels */
@@ -294,6 +294,12 @@ enum type {
        KNIGHTS_LANDING,
 };
 
+enum domain {
+       IMC0 = 0,
+       IMC1,
+       SOCK,
+};
+
 struct sbridge_pvt;
 struct sbridge_info {
        enum type       type;
@@ -324,11 +330,14 @@ struct sbridge_channel {
 struct pci_id_descr {
        int                     dev_id;
        int                     optional;
+       enum domain             dom;
 };
 
 struct pci_id_table {
        const struct pci_id_descr       *descr;
-       int                             n_devs;
+       int                             n_devs_per_imc;
+       int                             n_devs_per_sock;
+       int                             n_imcs_per_sock;
        enum type                       type;
 };
 
@@ -337,7 +346,9 @@ struct sbridge_dev {
        u8                      bus, mc;
        u8                      node_id, source_id;
        struct pci_dev          **pdev;
+       enum domain             dom;
        int                     n_devs;
+       int                     i_devs;
        struct mem_ctl_info     *mci;
 };
 
@@ -352,11 +363,12 @@ struct knl_pvt {
 };
 
 struct sbridge_pvt {
-       struct pci_dev          *pci_ta, *pci_ddrio, *pci_ras;
+       /* Devices per socket */
+       struct pci_dev          *pci_ddrio;
        struct pci_dev          *pci_sad0, *pci_sad1;
-       struct pci_dev          *pci_ha0, *pci_ha1;
        struct pci_dev          *pci_br0, *pci_br1;
-       struct pci_dev          *pci_ha1_ta;
+       /* Devices per memory controller */
+       struct pci_dev          *pci_ha, *pci_ta, *pci_ras;
        struct pci_dev          *pci_tad[NUM_CHANNELS];
 
        struct sbridge_dev      *sbridge_dev;
@@ -373,39 +385,42 @@ struct sbridge_pvt {
        struct knl_pvt knl;
 };
 
-#define PCI_DESCR(device_id, opt)      \
+#define PCI_DESCR(device_id, opt, domain)      \
        .dev_id = (device_id),          \
-       .optional = opt
+       .optional = opt,        \
+       .dom = domain
 
 static const struct pci_id_descr pci_dev_descr_sbridge[] = {
                /* Processor Home Agent */
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0, 0)     },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0,   0, IMC0) },
 
                /* Memory controller */
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA, 0)      },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS, 0)     },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0, 0)    },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1, 0)    },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2, 0)    },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3, 0)    },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1)   },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA,    0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS,   0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0,  0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1,  0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2,  0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3,  0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO, 1, SOCK) },
 
                /* System Address Decoder */
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0, 0)        },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1, 0)        },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0,      0, SOCK) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1,      0, SOCK) },
 
                /* Broadcast Registers */
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0)          },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_BR,        0, SOCK) },
 };
 
-#define PCI_ID_TABLE_ENTRY(A, T) {     \
+#define PCI_ID_TABLE_ENTRY(A, N, M, T) {       \
        .descr = A,                     \
-       .n_devs = ARRAY_SIZE(A),        \
+       .n_devs_per_imc = N,    \
+       .n_devs_per_sock = ARRAY_SIZE(A),       \
+       .n_imcs_per_sock = M,   \
        .type = T                       \
 }
 
 static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
-       PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge, SANDY_BRIDGE),
+       PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge, ARRAY_SIZE(pci_dev_descr_sbridge), 1, SANDY_BRIDGE),
        {0,}                    /* 0 terminated list. */
 };
 
@@ -439,40 +454,39 @@ static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
 
 static const struct pci_id_descr pci_dev_descr_ibridge[] = {
                /* Processor Home Agent */
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0, 0)             },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0,        0, IMC0) },
 
                /* Memory controller */
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA, 0)          },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS, 0)         },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0, 0)        },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1, 0)        },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2, 0)        },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3, 0)        },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA,     0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS,    0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0,   0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1,   0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2,   0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3,   0, IMC0) },
+
+               /* Optional, mode 2HA */
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1,        1, IMC1) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA,     1, IMC1) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS,    1, IMC1) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0,   1, IMC1) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1,   1, IMC1) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2,   1, IMC1) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3,   1, IMC1) },
+
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0, 1, SOCK) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0, 1, SOCK) },
 
                /* System Address Decoder */
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_SAD, 0)                 },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_SAD,            0, SOCK) },
 
                /* Broadcast Registers */
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR0, 1)                 },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR1, 0)                 },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR0,            1, SOCK) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_BR1,            0, SOCK) },
 
-               /* Optional, mode 2HA */
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1, 1)             },
-#if 0
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA, 1)  },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS, 1) },
-#endif
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0, 1)        },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1, 1)        },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2, 1)        },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3, 1)        },
-
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_1HA_DDRIO0, 1)      },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_2HA_DDRIO0, 1)      },
 };
 
 static const struct pci_id_table pci_dev_descr_ibridge_table[] = {
-       PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge, IVY_BRIDGE),
+       PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge, 12, 2, IVY_BRIDGE),
        {0,}                    /* 0 terminated list. */
 };
 
@@ -498,9 +512,9 @@ static const struct pci_id_table pci_dev_descr_ibridge_table[] = {
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0    0x2fa0
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1    0x2f60
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA 0x2fa8
-#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL 0x2f71
+#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TM 0x2f71
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA 0x2f68
-#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_THERMAL 0x2f79
+#define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TM 0x2f79
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0 0x2ffc
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1 0x2ffd
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0 0x2faa
@@ -517,35 +531,33 @@ static const struct pci_id_table pci_dev_descr_ibridge_table[] = {
 #define PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3 0x2fbb
 static const struct pci_id_descr pci_dev_descr_haswell[] = {
        /* first item must be the HA */
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0, 0)             },
-
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0, 0)        },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1, 0)        },
-
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1, 1)             },
-
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA, 0)          },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL, 0)     },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0, 0)        },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1, 0)        },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2, 1)        },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3, 1)        },
-
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0, 1)          },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1, 1)          },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2, 1)          },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3, 1)          },
-
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA, 1)          },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_THERMAL, 1)     },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0, 1)        },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1, 1)        },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2, 1)        },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3, 1)        },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0,      0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1,      1, IMC1) },
+
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA,   0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TM,   0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0, 0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1, 0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2, 1, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3, 1, IMC0) },
+
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA,   1, IMC1) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TM,   1, IMC1) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0, 1, IMC1) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1, 1, IMC1) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2, 1, IMC1) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3, 1, IMC1) },
+
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD0, 0, SOCK) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_CBO_SAD1, 0, SOCK) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO0,   1, SOCK) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO1,   1, SOCK) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO2,   1, SOCK) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_HASWELL_IMC_DDRIO3,   1, SOCK) },
 };
 
 static const struct pci_id_table pci_dev_descr_haswell_table[] = {
-       PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell, HASWELL),
+       PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell, 13, 2, HASWELL),
        {0,}                    /* 0 terminated list. */
 };
 
@@ -559,7 +571,7 @@ static const struct pci_id_table pci_dev_descr_haswell_table[] = {
 /* Memory controller, TAD tables, error injection - 2-8-0, 2-9-0 (2 of these) */
 #define PCI_DEVICE_ID_INTEL_KNL_IMC_MC       0x7840
 /* DRAM channel stuff; bank addrs, dimmmtr, etc.. 2-8-2 - 2-9-4 (6 of these) */
-#define PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL  0x7843
+#define PCI_DEVICE_ID_INTEL_KNL_IMC_CHAN     0x7843
 /* kdrwdbu TAD limits/offsets, MCMTR - 2-10-1, 2-11-1 (2 of these) */
 #define PCI_DEVICE_ID_INTEL_KNL_IMC_TA       0x7844
 /* CHA broadcast registers, dram rules - 1-29-0 (1 of these) */
@@ -579,17 +591,17 @@ static const struct pci_id_table pci_dev_descr_haswell_table[] = {
  */
 
 static const struct pci_id_descr pci_dev_descr_knl[] = {
-       [0]         = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0, 0) },
-       [1]         = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1, 0) },
-       [2 ... 3]   = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_MC, 0)},
-       [4 ... 41]  = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHA, 0) },
-       [42 ... 47] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL, 0) },
-       [48]        = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TA, 0) },
-       [49]        = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM, 0) },
+       [0 ... 1]   = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_MC,    0, IMC0)},
+       [2 ... 7]   = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHAN,  0, IMC0) },
+       [8]         = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TA,    0, IMC0) },
+       [9]         = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_TOLHM, 0, IMC0) },
+       [10]        = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0,  0, SOCK) },
+       [11]        = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_SAD1,  0, SOCK) },
+       [12 ... 49] = { PCI_DESCR(PCI_DEVICE_ID_INTEL_KNL_IMC_CHA,   0, SOCK) },
 };
 
 static const struct pci_id_table pci_dev_descr_knl_table[] = {
-       PCI_ID_TABLE_ENTRY(pci_dev_descr_knl, KNIGHTS_LANDING),
+       PCI_ID_TABLE_ENTRY(pci_dev_descr_knl, ARRAY_SIZE(pci_dev_descr_knl), 1, KNIGHTS_LANDING),
        {0,}
 };
 
@@ -615,9 +627,9 @@ static const struct pci_id_table pci_dev_descr_knl_table[] = {
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0  0x6fa0
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1  0x6f60
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA       0x6fa8
-#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL 0x6f71
+#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TM       0x6f71
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA       0x6f68
-#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_THERMAL 0x6f79
+#define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TM       0x6f79
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0 0x6ffc
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1 0x6ffd
 #define PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0 0x6faa
@@ -632,32 +644,30 @@ static const struct pci_id_table pci_dev_descr_knl_table[] = {
 
 static const struct pci_id_descr pci_dev_descr_broadwell[] = {
        /* first item must be the HA */
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0, 0)           },
-
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0, 0)      },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1, 0)      },
-
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1, 1)           },
-
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA, 0)        },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL, 0)   },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0, 0)      },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1, 0)      },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2, 1)      },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3, 1)      },
-
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0, 1)        },
-
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA, 1)        },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_THERMAL, 1)   },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0, 1)      },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1, 1)      },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2, 1)      },
-       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3, 1)      },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0,      0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1,      1, IMC1) },
+
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA,   0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TM,   0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0, 0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1, 0, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2, 1, IMC0) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3, 1, IMC0) },
+
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA,   1, IMC1) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TM,   1, IMC1) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0, 1, IMC1) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1, 1, IMC1) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2, 1, IMC1) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3, 1, IMC1) },
+
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD0, 0, SOCK) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_CBO_SAD1, 0, SOCK) },
+       { PCI_DESCR(PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0,   1, SOCK) },
 };
 
 static const struct pci_id_table pci_dev_descr_broadwell_table[] = {
-       PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell, BROADWELL),
+       PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell, 10, 2, BROADWELL),
        {0,}                    /* 0 terminated list. */
 };
 
@@ -709,7 +719,8 @@ static inline int numcol(u32 mtr)
        return 1 << cols;
 }
 
-static struct sbridge_dev *get_sbridge_dev(u8 bus, int multi_bus)
+static struct sbridge_dev *get_sbridge_dev(u8 bus, enum domain dom, int multi_bus,
+                                          struct sbridge_dev *prev)
 {
        struct sbridge_dev *sbridge_dev;
 
@@ -722,16 +733,19 @@ static struct sbridge_dev *get_sbridge_dev(u8 bus, int multi_bus)
                                struct sbridge_dev, list);
        }
 
-       list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
-               if (sbridge_dev->bus == bus)
+       sbridge_dev = list_entry(prev ? prev->list.next
+                                     : sbridge_edac_list.next, struct sbridge_dev, list);
+
+       list_for_each_entry_from(sbridge_dev, &sbridge_edac_list, list) {
+               if (sbridge_dev->bus == bus && (dom == SOCK || dom == sbridge_dev->dom))
                        return sbridge_dev;
        }
 
        return NULL;
 }
 
-static struct sbridge_dev *alloc_sbridge_dev(u8 bus,
-                                          const struct pci_id_table *table)
+static struct sbridge_dev *alloc_sbridge_dev(u8 bus, enum domain dom,
+                                            const struct pci_id_table *table)
 {
        struct sbridge_dev *sbridge_dev;
 
@@ -739,15 +753,17 @@ static struct sbridge_dev *alloc_sbridge_dev(u8 bus,
        if (!sbridge_dev)
                return NULL;
 
-       sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs,
-                                  GFP_KERNEL);
+       sbridge_dev->pdev = kcalloc(table->n_devs_per_imc,
+                                   sizeof(*sbridge_dev->pdev),
+                                   GFP_KERNEL);
        if (!sbridge_dev->pdev) {
                kfree(sbridge_dev);
                return NULL;
        }
 
        sbridge_dev->bus = bus;
-       sbridge_dev->n_devs = table->n_devs;
+       sbridge_dev->dom = dom;
+       sbridge_dev->n_devs = table->n_devs_per_imc;
        list_add_tail(&sbridge_dev->list, &sbridge_edac_list);
 
        return sbridge_dev;
@@ -1044,79 +1060,6 @@ static int haswell_chan_hash(int idx, u64 addr)
        return idx;
 }
 
-/****************************************************************************
-                       Memory check routines
- ****************************************************************************/
-static struct pci_dev *get_pdev_same_bus(u8 bus, u32 id)
-{
-       struct pci_dev *pdev = NULL;
-
-       do {
-               pdev = pci_get_device(PCI_VENDOR_ID_INTEL, id, pdev);
-               if (pdev && pdev->bus->number == bus)
-                       break;
-       } while (pdev);
-
-       return pdev;
-}
-
-/**
- * check_if_ecc_is_active() - Checks if ECC is active
- * @bus:       Device bus
- * @type:      Memory controller type
- * returns: 0 in case ECC is active, -ENODEV if it can't be determined or
- *         disabled
- */
-static int check_if_ecc_is_active(const u8 bus, enum type type)
-{
-       struct pci_dev *pdev = NULL;
-       u32 mcmtr, id;
-
-       switch (type) {
-       case IVY_BRIDGE:
-               id = PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA;
-               break;
-       case HASWELL:
-               id = PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA;
-               break;
-       case SANDY_BRIDGE:
-               id = PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA;
-               break;
-       case BROADWELL:
-               id = PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA;
-               break;
-       case KNIGHTS_LANDING:
-               /*
-                * KNL doesn't group things by bus the same way
-                * SB/IB/Haswell does.
-                */
-               id = PCI_DEVICE_ID_INTEL_KNL_IMC_TA;
-               break;
-       default:
-               return -ENODEV;
-       }
-
-       if (type != KNIGHTS_LANDING)
-               pdev = get_pdev_same_bus(bus, id);
-       else
-               pdev = pci_get_device(PCI_VENDOR_ID_INTEL, id, 0);
-
-       if (!pdev) {
-               sbridge_printk(KERN_ERR, "Couldn't find PCI device "
-                                       "%04x:%04x! on bus %02d\n",
-                                       PCI_VENDOR_ID_INTEL, id, bus);
-               return -ENODEV;
-       }
-
-       pci_read_config_dword(pdev,
-                       type == KNIGHTS_LANDING ? KNL_MCMTR : MCMTR, &mcmtr);
-       if (!IS_ECC_ENABLED(mcmtr)) {
-               sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
-               return -ENODEV;
-       }
-       return 0;
-}
-
 /* Low bits of TAD limit, and some metadata. */
 static const u32 knl_tad_dram_limit_lo[] = {
        0x400, 0x500, 0x600, 0x700,
@@ -1587,25 +1530,13 @@ static int knl_get_dimm_capacity(struct sbridge_pvt *pvt, u64 *mc_sizes)
        return 0;
 }
 
-static int get_dimm_config(struct mem_ctl_info *mci)
+static void get_source_id(struct mem_ctl_info *mci)
 {
        struct sbridge_pvt *pvt = mci->pvt_info;
-       struct dimm_info *dimm;
-       unsigned i, j, banks, ranks, rows, cols, npages;
-       u64 size;
        u32 reg;
-       enum edac_type mode;
-       enum mem_type mtype;
-       int channels = pvt->info.type == KNIGHTS_LANDING ?
-               KNL_MAX_CHANNELS : NUM_CHANNELS;
-       u64 knl_mc_sizes[KNL_MAX_CHANNELS];
 
-       if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) {
-               pci_read_config_dword(pvt->pci_ha0, HASWELL_HASYSDEFEATURE2, &reg);
-               pvt->is_chan_hash = GET_BITFIELD(reg, 21, 21);
-       }
        if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL ||
-                       pvt->info.type == KNIGHTS_LANDING)
+           pvt->info.type == KNIGHTS_LANDING)
                pci_read_config_dword(pvt->pci_sad1, SAD_TARGET, &reg);
        else
                pci_read_config_dword(pvt->pci_br0, SAD_TARGET, &reg);
@@ -1614,50 +1545,19 @@ static int get_dimm_config(struct mem_ctl_info *mci)
                pvt->sbridge_dev->source_id = SOURCE_ID_KNL(reg);
        else
                pvt->sbridge_dev->source_id = SOURCE_ID(reg);
+}
 
-       pvt->sbridge_dev->node_id = pvt->info.get_node_id(pvt);
-       edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n",
-                pvt->sbridge_dev->mc,
-                pvt->sbridge_dev->node_id,
-                pvt->sbridge_dev->source_id);
-
-       /* KNL doesn't support mirroring or lockstep,
-        * and is always closed page
-        */
-       if (pvt->info.type == KNIGHTS_LANDING) {
-               mode = EDAC_S4ECD4ED;
-               pvt->is_mirrored = false;
-
-               if (knl_get_dimm_capacity(pvt, knl_mc_sizes) != 0)
-                       return -1;
-       } else {
-               pci_read_config_dword(pvt->pci_ras, RASENABLES, &reg);
-               if (IS_MIRROR_ENABLED(reg)) {
-                       edac_dbg(0, "Memory mirror is enabled\n");
-                       pvt->is_mirrored = true;
-               } else {
-                       edac_dbg(0, "Memory mirror is disabled\n");
-                       pvt->is_mirrored = false;
-               }
-
-               pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
-               if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
-                       edac_dbg(0, "Lockstep is enabled\n");
-                       mode = EDAC_S8ECD8ED;
-                       pvt->is_lockstep = true;
-               } else {
-                       edac_dbg(0, "Lockstep is disabled\n");
-                       mode = EDAC_S4ECD4ED;
-                       pvt->is_lockstep = false;
-               }
-               if (IS_CLOSE_PG(pvt->info.mcmtr)) {
-                       edac_dbg(0, "address map is on closed page mode\n");
-                       pvt->is_close_pg = true;
-               } else {
-                       edac_dbg(0, "address map is on open page mode\n");
-                       pvt->is_close_pg = false;
-               }
-       }
+static int __populate_dimms(struct mem_ctl_info *mci,
+                           u64 knl_mc_sizes[KNL_MAX_CHANNELS],
+                           enum edac_type mode)
+{
+       struct sbridge_pvt *pvt = mci->pvt_info;
+       int channels = pvt->info.type == KNIGHTS_LANDING ? KNL_MAX_CHANNELS
+                                                        : NUM_CHANNELS;
+       unsigned int i, j, banks, ranks, rows, cols, npages;
+       struct dimm_info *dimm;
+       enum mem_type mtype;
+       u64 size;
 
        mtype = pvt->info.get_memory_type(pvt);
        if (mtype == MEM_RDDR3 || mtype == MEM_RDDR4)
@@ -1688,8 +1588,7 @@ static int get_dimm_config(struct mem_ctl_info *mci)
                }
 
                for (j = 0; j < max_dimms_per_channel; j++) {
-                       dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
-                                      i, j, 0);
+                       dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, j, 0);
                        if (pvt->info.type == KNIGHTS_LANDING) {
                                pci_read_config_dword(pvt->knl.pci_channel[i],
                                        knl_mtr_reg, &mtr);
@@ -1699,6 +1598,12 @@ static int get_dimm_config(struct mem_ctl_info *mci)
                        }
                        edac_dbg(4, "Channel #%d  MTR%d = %x\n", i, j, mtr);
                        if (IS_DIMM_PRESENT(mtr)) {
+                               if (!IS_ECC_ENABLED(pvt->info.mcmtr)) {
+                                       sbridge_printk(KERN_ERR, "CPU SrcID #%d, Ha #%d, Channel #%d has DIMMs, but ECC is disabled\n",
+                                                      pvt->sbridge_dev->source_id,
+                                                      pvt->sbridge_dev->dom, i);
+                                       return -ENODEV;
+                               }
                                pvt->channel[i].dimms++;
 
                                ranks = numrank(pvt->info.type, mtr);
@@ -1717,7 +1622,7 @@ static int get_dimm_config(struct mem_ctl_info *mci)
                                npages = MiB_TO_PAGES(size);
 
                                edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
-                                        pvt->sbridge_dev->mc, i/4, i%4, j,
+                                        pvt->sbridge_dev->mc, pvt->sbridge_dev->dom, i, j,
                                         size, npages,
                                         banks, ranks, rows, cols);
 
@@ -1727,8 +1632,8 @@ static int get_dimm_config(struct mem_ctl_info *mci)
                                dimm->mtype = mtype;
                                dimm->edac_mode = mode;
                                snprintf(dimm->label, sizeof(dimm->label),
-                                        "CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u",
-                                        pvt->sbridge_dev->source_id, i/4, i%4, j);
+                                                "CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u",
+                                                pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom, i, j);
                        }
                }
        }
@@ -1736,6 +1641,65 @@ static int get_dimm_config(struct mem_ctl_info *mci)
        return 0;
 }
 
+static int get_dimm_config(struct mem_ctl_info *mci)
+{
+       struct sbridge_pvt *pvt = mci->pvt_info;
+       u64 knl_mc_sizes[KNL_MAX_CHANNELS];
+       enum edac_type mode;
+       u32 reg;
+
+       if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) {
+               pci_read_config_dword(pvt->pci_ha, HASWELL_HASYSDEFEATURE2, &reg);
+               pvt->is_chan_hash = GET_BITFIELD(reg, 21, 21);
+       }
+       pvt->sbridge_dev->node_id = pvt->info.get_node_id(pvt);
+       edac_dbg(0, "mc#%d: Node ID: %d, source ID: %d\n",
+                pvt->sbridge_dev->mc,
+                pvt->sbridge_dev->node_id,
+                pvt->sbridge_dev->source_id);
+
+       /* KNL doesn't support mirroring or lockstep,
+        * and is always closed page
+        */
+       if (pvt->info.type == KNIGHTS_LANDING) {
+               mode = EDAC_S4ECD4ED;
+               pvt->is_mirrored = false;
+
+               if (knl_get_dimm_capacity(pvt, knl_mc_sizes) != 0)
+                       return -1;
+               pci_read_config_dword(pvt->pci_ta, KNL_MCMTR, &pvt->info.mcmtr);
+       } else {
+               pci_read_config_dword(pvt->pci_ras, RASENABLES, &reg);
+               if (IS_MIRROR_ENABLED(reg)) {
+                       edac_dbg(0, "Memory mirror is enabled\n");
+                       pvt->is_mirrored = true;
+               } else {
+                       edac_dbg(0, "Memory mirror is disabled\n");
+                       pvt->is_mirrored = false;
+               }
+
+               pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
+               if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
+                       edac_dbg(0, "Lockstep is enabled\n");
+                       mode = EDAC_S8ECD8ED;
+                       pvt->is_lockstep = true;
+               } else {
+                       edac_dbg(0, "Lockstep is disabled\n");
+                       mode = EDAC_S4ECD4ED;
+                       pvt->is_lockstep = false;
+               }
+               if (IS_CLOSE_PG(pvt->info.mcmtr)) {
+                       edac_dbg(0, "address map is on closed page mode\n");
+                       pvt->is_close_pg = true;
+               } else {
+                       edac_dbg(0, "address map is on open page mode\n");
+                       pvt->is_close_pg = false;
+               }
+       }
+
+       return __populate_dimms(mci, knl_mc_sizes, mode);
+}
+
 static void get_memory_layout(const struct mem_ctl_info *mci)
 {
        struct sbridge_pvt *pvt = mci->pvt_info;
@@ -1816,8 +1780,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
         */
        prv = 0;
        for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
-               pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
-                                     &reg);
+               pci_read_config_dword(pvt->pci_ha, tad_dram_rule[n_tads], &reg);
                limit = TAD_LIMIT(reg);
                if (limit <= prv)
                        break;
@@ -1899,12 +1862,12 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
        }
 }
 
-static struct mem_ctl_info *get_mci_for_node_id(u8 node_id)
+static struct mem_ctl_info *get_mci_for_node_id(u8 node_id, u8 ha)
 {
        struct sbridge_dev *sbridge_dev;
 
        list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
-               if (sbridge_dev->node_id == node_id)
+               if (sbridge_dev->node_id == node_id && sbridge_dev->dom == ha)
                        return sbridge_dev->mci;
        }
        return NULL;
@@ -1925,7 +1888,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
        int                     interleave_mode, shiftup = 0;
        unsigned                sad_interleave[pvt->info.max_interleave];
        u32                     reg, dram_rule;
-       u8                      ch_way, sck_way, pkg, sad_ha = 0, ch_add = 0;
+       u8                      ch_way, sck_way, pkg, sad_ha = 0;
        u32                     tad_offset;
        u32                     rir_way;
        u32                     mb, gb;
@@ -2038,13 +2001,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
                pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx);
                *socket = sad_pkg_socket(pkg);
                sad_ha = sad_pkg_ha(pkg);
-               if (sad_ha)
-                       ch_add = 4;
 
                if (a7mode) {
                        /* MCChanShiftUpEnable */
-                       pci_read_config_dword(pvt->pci_ha0,
-                                             HASWELL_HASYSDEFEATURE2, &reg);
+                       pci_read_config_dword(pvt->pci_ha, HASWELL_HASYSDEFEATURE2, &reg);
                        shiftup = GET_BITFIELD(reg, 22, 22);
                }
 
@@ -2056,8 +2016,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
                pkg = sad_pkg(pvt->info.interleave_pkg, reg, idx);
                *socket = sad_pkg_socket(pkg);
                sad_ha = sad_pkg_ha(pkg);
-               if (sad_ha)
-                       ch_add = 4;
                edac_dbg(0, "SAD interleave package: %d = CPU socket %d, HA %d\n",
                         idx, *socket, sad_ha);
        }
@@ -2068,7 +2026,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
         * Move to the proper node structure, in order to access the
         * right PCI registers
         */
-       new_mci = get_mci_for_node_id(*socket);
+       new_mci = get_mci_for_node_id(*socket, sad_ha);
        if (!new_mci) {
                sprintf(msg, "Struct for socket #%u wasn't initialized",
                        *socket);
@@ -2081,14 +2039,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
         * Step 2) Get memory channel
         */
        prv = 0;
-       if (pvt->info.type == SANDY_BRIDGE)
-               pci_ha = pvt->pci_ha0;
-       else {
-               if (sad_ha)
-                       pci_ha = pvt->pci_ha1;
-               else
-                       pci_ha = pvt->pci_ha0;
-       }
+       pci_ha = pvt->pci_ha;
        for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
                pci_read_config_dword(pci_ha, tad_dram_rule[n_tads], &reg);
                limit = TAD_LIMIT(reg);
@@ -2139,9 +2090,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
        }
        *channel_mask = 1 << base_ch;
 
-       pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
-                               tad_ch_nilv_offset[n_tads],
-                               &tad_offset);
+       pci_read_config_dword(pvt->pci_tad[base_ch], tad_ch_nilv_offset[n_tads], &tad_offset);
 
        if (pvt->is_mirrored) {
                *channel_mask |= 1 << ((base_ch + 2) % 4);
@@ -2192,9 +2141,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
         * Step 3) Decode rank
         */
        for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) {
-               pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
-                                     rir_way_limit[n_rir],
-                                     &reg);
+               pci_read_config_dword(pvt->pci_tad[base_ch], rir_way_limit[n_rir], &reg);
 
                if (!IS_RIR_VALID(reg))
                        continue;
@@ -2222,9 +2169,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
                idx = (ch_addr >> 13);  /* FIXME: Datasheet says to shift by 15 */
        idx %= 1 << rir_way;
 
-       pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
-                             rir_offset[n_rir][idx],
-                             &reg);
+       pci_read_config_dword(pvt->pci_tad[base_ch], rir_offset[n_rir][idx], &reg);
        *rank = RIR_RNK_TGT(pvt->info.type, reg);
 
        edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
@@ -2277,10 +2222,11 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
                                 const unsigned devno,
                                 const int multi_bus)
 {
-       struct sbridge_dev *sbridge_dev;
+       struct sbridge_dev *sbridge_dev = NULL;
        const struct pci_id_descr *dev_descr = &table->descr[devno];
        struct pci_dev *pdev = NULL;
        u8 bus = 0;
+       int i = 0;
 
        sbridge_printk(KERN_DEBUG,
                "Seeking for: PCI ID %04x:%04x\n",
@@ -2311,9 +2257,14 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
        }
        bus = pdev->bus->number;
 
-       sbridge_dev = get_sbridge_dev(bus, multi_bus);
+next_imc:
+       sbridge_dev = get_sbridge_dev(bus, dev_descr->dom, multi_bus, sbridge_dev);
        if (!sbridge_dev) {
-               sbridge_dev = alloc_sbridge_dev(bus, table);
+
+               if (dev_descr->dom == SOCK)
+                       goto out_imc;
+
+               sbridge_dev = alloc_sbridge_dev(bus, dev_descr->dom, table);
                if (!sbridge_dev) {
                        pci_dev_put(pdev);
                        return -ENOMEM;
@@ -2321,7 +2272,7 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
                (*num_mc)++;
        }
 
-       if (sbridge_dev->pdev[devno]) {
+       if (sbridge_dev->pdev[sbridge_dev->i_devs]) {
                sbridge_printk(KERN_ERR,
                        "Duplicated device for %04x:%04x\n",
                        PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
@@ -2329,8 +2280,16 @@ static int sbridge_get_onedevice(struct pci_dev **prev,
                return -ENODEV;
        }
 
-       sbridge_dev->pdev[devno] = pdev;
+       sbridge_dev->pdev[sbridge_dev->i_devs++] = pdev;
+
+       /* pdev belongs to more than one IMC, do extra gets */
+       if (++i > 1)
+               pci_dev_get(pdev);
 
+       if (dev_descr->dom == SOCK && i < table->n_imcs_per_sock)
+               goto next_imc;
+
+out_imc:
        /* Be sure that the device is enabled */
        if (unlikely(pci_enable_device(pdev) < 0)) {
                sbridge_printk(KERN_ERR,
@@ -2374,7 +2333,7 @@ static int sbridge_get_all_devices(u8 *num_mc,
        if (table->type == KNIGHTS_LANDING)
                allow_dups = multi_bus = 1;
        while (table && table->descr) {
-               for (i = 0; i < table->n_devs; i++) {
+               for (i = 0; i < table->n_devs_per_sock; i++) {
                        if (!allow_dups || i == 0 ||
                                        table->descr[i].dev_id !=
                                                table->descr[i-1].dev_id) {
@@ -2385,7 +2344,7 @@ static int sbridge_get_all_devices(u8 *num_mc,
                                                           table, i, multi_bus);
                                if (rc < 0) {
                                        if (i == 0) {
-                                               i = table->n_devs;
+                                               i = table->n_devs_per_sock;
                                                break;
                                        }
                                        sbridge_put_all_devices();
@@ -2399,6 +2358,13 @@ static int sbridge_get_all_devices(u8 *num_mc,
        return 0;
 }
 
+/*
+ * Device IDs for {SBRIDGE,IBRIDGE,HASWELL,BROADWELL}_IMC_HA0_TAD0 are in
+ * the format: XXXa. So we can convert from a device to the corresponding
+ * channel like this
+ */
+#define TAD_DEV_TO_CHAN(dev) (((dev) & 0xf) - 0xa)
+
 static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
                                 struct sbridge_dev *sbridge_dev)
 {
@@ -2423,7 +2389,7 @@ static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
                        pvt->pci_br0 = pdev;
                        break;
                case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0:
-                       pvt->pci_ha0 = pdev;
+                       pvt->pci_ha = pdev;
                        break;
                case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA:
                        pvt->pci_ta = pdev;
@@ -2436,7 +2402,7 @@ static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
                case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2:
                case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3:
                {
-                       int id = pdev->device - PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0;
+                       int id = TAD_DEV_TO_CHAN(pdev->device);
                        pvt->pci_tad[id] = pdev;
                        saw_chan_mask |= 1 << id;
                }
@@ -2455,7 +2421,7 @@ static int sbridge_mci_bind_devs(struct mem_ctl_info *mci,
        }
 
        /* Check if everything were registered */
-       if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 ||
+       if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha ||
            !pvt->pci_ras || !pvt->pci_ta)
                goto enodev;
 
@@ -2488,19 +2454,26 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci,
 
                switch (pdev->device) {
                case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0:
-                       pvt->pci_ha0 = pdev;
+               case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1:
+                       pvt->pci_ha = pdev;
                        break;
                case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA:
+               case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TA:
                        pvt->pci_ta = pdev;
                case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_RAS:
+               case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_RAS:
                        pvt->pci_ras = pdev;
                        break;
                case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0:
                case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD1:
                case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD2:
                case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD3:
+               case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0:
+               case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1:
+               case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2:
+               case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3:
                {
-                       int id = pdev->device - PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TAD0;
+                       int id = TAD_DEV_TO_CHAN(pdev->device);
                        pvt->pci_tad[id] = pdev;
                        saw_chan_mask |= 1 << id;
                }
@@ -2520,19 +2493,6 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci,
                case PCI_DEVICE_ID_INTEL_IBRIDGE_BR1:
                        pvt->pci_br1 = pdev;
                        break;
-               case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1:
-                       pvt->pci_ha1 = pdev;
-                       break;
-               case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0:
-               case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD1:
-               case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD2:
-               case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD3:
-               {
-                       int id = pdev->device - PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA1_TAD0 + 4;
-                       pvt->pci_tad[id] = pdev;
-                       saw_chan_mask |= 1 << id;
-               }
-                       break;
                default:
                        goto error;
                }
@@ -2544,13 +2504,12 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci,
        }
 
        /* Check if everything were registered */
-       if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_br0 ||
+       if (!pvt->pci_sad0 || !pvt->pci_ha || !pvt->pci_br0 ||
            !pvt->pci_br1 || !pvt->pci_ras || !pvt->pci_ta)
                goto enodev;
 
-       if (saw_chan_mask != 0x0f && /* -EN */
-           saw_chan_mask != 0x33 && /* -EP */
-           saw_chan_mask != 0xff)   /* -EX */
+       if (saw_chan_mask != 0x0f && /* -EN/-EX */
+           saw_chan_mask != 0x03)   /* -EP */
                goto enodev;
        return 0;
 
@@ -2593,32 +2552,27 @@ static int haswell_mci_bind_devs(struct mem_ctl_info *mci,
                        pvt->pci_sad1 = pdev;
                        break;
                case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0:
-                       pvt->pci_ha0 = pdev;
+               case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1:
+                       pvt->pci_ha = pdev;
                        break;
                case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TA:
+               case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA:
                        pvt->pci_ta = pdev;
                        break;
-               case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_THERMAL:
+               case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TM:
+               case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TM:
                        pvt->pci_ras = pdev;
                        break;
                case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0:
                case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD1:
                case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD2:
                case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD3:
-               {
-                       int id = pdev->device - PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0_TAD0;
-
-                       pvt->pci_tad[id] = pdev;
-                       saw_chan_mask |= 1 << id;
-               }
-                       break;
                case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0:
                case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD1:
                case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD2:
                case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD3:
                {
-                       int id = pdev->device - PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TAD0 + 4;
-
+                       int id = TAD_DEV_TO_CHAN(pdev->device);
                        pvt->pci_tad[id] = pdev;
                        saw_chan_mask |= 1 << id;
                }
@@ -2630,12 +2584,6 @@ static int haswell_mci_bind_devs(struct mem_ctl_info *mci,
                        if (!pvt->pci_ddrio)
                                pvt->pci_ddrio = pdev;
                        break;
-               case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1:
-                       pvt->pci_ha1 = pdev;
-                       break;
-               case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA1_TA:
-                       pvt->pci_ha1_ta = pdev;
-                       break;
                default:
                        break;
                }
@@ -2647,13 +2595,12 @@ static int haswell_mci_bind_devs(struct mem_ctl_info *mci,
        }
 
        /* Check if everything were registered */
-       if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_sad1 ||
+       if (!pvt->pci_sad0 || !pvt->pci_ha || !pvt->pci_sad1 ||
            !pvt->pci_ras  || !pvt->pci_ta || !pvt->info.pci_vtd)
                goto enodev;
 
-       if (saw_chan_mask != 0x0f && /* -EN */
-           saw_chan_mask != 0x33 && /* -EP */
-           saw_chan_mask != 0xff)   /* -EX */
+       if (saw_chan_mask != 0x0f && /* -EN/-EX */
+           saw_chan_mask != 0x03)   /* -EP */
                goto enodev;
        return 0;
 
@@ -2690,30 +2637,27 @@ static int broadwell_mci_bind_devs(struct mem_ctl_info *mci,
                        pvt->pci_sad1 = pdev;
                        break;
                case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0:
-                       pvt->pci_ha0 = pdev;
+               case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1:
+                       pvt->pci_ha = pdev;
                        break;
                case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TA:
+               case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA:
                        pvt->pci_ta = pdev;
                        break;
-               case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_THERMAL:
+               case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TM:
+               case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TM:
                        pvt->pci_ras = pdev;
                        break;
                case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0:
                case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD1:
                case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD2:
                case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD3:
-               {
-                       int id = pdev->device - PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0_TAD0;
-                       pvt->pci_tad[id] = pdev;
-                       saw_chan_mask |= 1 << id;
-               }
-                       break;
                case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0:
                case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD1:
                case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD2:
                case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD3:
                {
-                       int id = pdev->device - PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TAD0 + 4;
+                       int id = TAD_DEV_TO_CHAN(pdev->device);
                        pvt->pci_tad[id] = pdev;
                        saw_chan_mask |= 1 << id;
                }
@@ -2721,12 +2665,6 @@ static int broadwell_mci_bind_devs(struct mem_ctl_info *mci,
                case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_DDRIO0:
                        pvt->pci_ddrio = pdev;
                        break;
-               case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1:
-                       pvt->pci_ha1 = pdev;
-                       break;
-               case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA1_TA:
-                       pvt->pci_ha1_ta = pdev;
-                       break;
                default:
                        break;
                }
@@ -2738,13 +2676,12 @@ static int broadwell_mci_bind_devs(struct mem_ctl_info *mci,
        }
 
        /* Check if everything were registered */
-       if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_sad1 ||
+       if (!pvt->pci_sad0 || !pvt->pci_ha || !pvt->pci_sad1 ||
            !pvt->pci_ras  || !pvt->pci_ta || !pvt->info.pci_vtd)
                goto enodev;
 
-       if (saw_chan_mask != 0x0f && /* -EN */
-           saw_chan_mask != 0x33 && /* -EP */
-           saw_chan_mask != 0xff)   /* -EX */
+       if (saw_chan_mask != 0x0f && /* -EN/-EX */
+           saw_chan_mask != 0x03)   /* -EP */
                goto enodev;
        return 0;
 
@@ -2812,7 +2749,7 @@ static int knl_mci_bind_devs(struct mem_ctl_info *mci,
                        pvt->knl.pci_cha[devidx] = pdev;
                        break;
 
-               case PCI_DEVICE_ID_INTEL_KNL_IMC_CHANNEL:
+               case PCI_DEVICE_ID_INTEL_KNL_IMC_CHAN:
                        devidx = -1;
 
                        /*
@@ -3006,7 +2943,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
 
        if (rc < 0)
                goto err_parsing;
-       new_mci = get_mci_for_node_id(socket);
+       new_mci = get_mci_for_node_id(socket, ha);
        if (!new_mci) {
                strcpy(msg, "Error: socket got corrupted!");
                goto err_parsing;
@@ -3053,7 +2990,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
        /* Call the helper to output message */
        edac_mc_handle_error(tp_event, mci, core_err_cnt,
                             m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
-                            4*ha+channel, dimm, -1,
+                            channel, dimm, -1,
                             optype, msg);
        return;
 err_parsing:
@@ -3078,7 +3015,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
        if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
                return NOTIFY_DONE;
 
-       mci = get_mci_for_node_id(mce->socketid);
+       mci = get_mci_for_node_id(mce->socketid, IMC0);
        if (!mci)
                return NOTIFY_DONE;
        pvt = mci->pvt_info;
@@ -3159,11 +3096,6 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
        struct pci_dev *pdev = sbridge_dev->pdev[0];
        int rc;
 
-       /* Check the number of active and not disabled channels */
-       rc = check_if_ecc_is_active(sbridge_dev->bus, type);
-       if (unlikely(rc < 0))
-               return rc;
-
        /* allocate a new MC control structure */
        layers[0].type = EDAC_MC_LAYER_CHANNEL;
        layers[0].size = type == KNIGHTS_LANDING ?
@@ -3192,7 +3124,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
                MEM_FLAG_DDR4 : MEM_FLAG_DDR3;
        mci->edac_ctl_cap = EDAC_FLAG_NONE;
        mci->edac_cap = EDAC_FLAG_NONE;
-       mci->mod_name = "sbridge_edac.c";
+       mci->mod_name = "sb_edac.c";
        mci->mod_ver = SBRIDGE_REVISION;
        mci->dev_name = pci_name(pdev);
        mci->ctl_page_to_phys = NULL;
@@ -3215,12 +3147,14 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
                pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
                pvt->info.interleave_pkg = ibridge_interleave_pkg;
                pvt->info.get_width = ibridge_get_width;
-               mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge Socket#%d", mci->mc_idx);
 
                /* Store pci devices at mci for faster access */
                rc = ibridge_mci_bind_devs(mci, sbridge_dev);
                if (unlikely(rc < 0))
                        goto fail0;
+               get_source_id(mci);
+               mci->ctl_name = kasprintf(GFP_KERNEL, "Ivy Bridge SrcID#%d_Ha#%d",
+                       pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
                break;
        case SANDY_BRIDGE:
                pvt->info.rankcfgr = SB_RANK_CFG_A;
@@ -3238,12 +3172,14 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
                pvt->info.max_interleave = ARRAY_SIZE(sbridge_interleave_list);
                pvt->info.interleave_pkg = sbridge_interleave_pkg;
                pvt->info.get_width = sbridge_get_width;
-               mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx);
 
                /* Store pci devices at mci for faster access */
                rc = sbridge_mci_bind_devs(mci, sbridge_dev);
                if (unlikely(rc < 0))
                        goto fail0;
+               get_source_id(mci);
+               mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge SrcID#%d_Ha#%d",
+                       pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
                break;
        case HASWELL:
                /* rankcfgr isn't used */
@@ -3261,12 +3197,14 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
                pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
                pvt->info.interleave_pkg = ibridge_interleave_pkg;
                pvt->info.get_width = ibridge_get_width;
-               mci->ctl_name = kasprintf(GFP_KERNEL, "Haswell Socket#%d", mci->mc_idx);
 
                /* Store pci devices at mci for faster access */
                rc = haswell_mci_bind_devs(mci, sbridge_dev);
                if (unlikely(rc < 0))
                        goto fail0;
+               get_source_id(mci);
+               mci->ctl_name = kasprintf(GFP_KERNEL, "Haswell SrcID#%d_Ha#%d",
+                       pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
                break;
        case BROADWELL:
                /* rankcfgr isn't used */
@@ -3284,12 +3222,14 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
                pvt->info.max_interleave = ARRAY_SIZE(ibridge_interleave_list);
                pvt->info.interleave_pkg = ibridge_interleave_pkg;
                pvt->info.get_width = broadwell_get_width;
-               mci->ctl_name = kasprintf(GFP_KERNEL, "Broadwell Socket#%d", mci->mc_idx);
 
                /* Store pci devices at mci for faster access */
                rc = broadwell_mci_bind_devs(mci, sbridge_dev);
                if (unlikely(rc < 0))
                        goto fail0;
+               get_source_id(mci);
+               mci->ctl_name = kasprintf(GFP_KERNEL, "Broadwell SrcID#%d_Ha#%d",
+                       pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
                break;
        case KNIGHTS_LANDING:
                /* pvt->info.rankcfgr == ??? */
@@ -3307,17 +3247,22 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
                pvt->info.max_interleave = ARRAY_SIZE(knl_interleave_list);
                pvt->info.interleave_pkg = ibridge_interleave_pkg;
                pvt->info.get_width = knl_get_width;
-               mci->ctl_name = kasprintf(GFP_KERNEL,
-                       "Knights Landing Socket#%d", mci->mc_idx);
 
                rc = knl_mci_bind_devs(mci, sbridge_dev);
                if (unlikely(rc < 0))
                        goto fail0;
+               get_source_id(mci);
+               mci->ctl_name = kasprintf(GFP_KERNEL, "Knights Landing SrcID#%d_Ha#%d",
+                       pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom);
                break;
        }
 
        /* Get dimm basic config and the memory layout */
-       get_dimm_config(mci);
+       rc = get_dimm_config(mci);
+       if (rc < 0) {
+               edac_dbg(0, "MC: failed to get_dimm_config()\n");
+               goto fail;
+       }
        get_memory_layout(mci);
 
        /* record ptr to the generic device */
@@ -3327,13 +3272,14 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
        if (unlikely(edac_mc_add_mc(mci))) {
                edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
                rc = -EINVAL;
-               goto fail0;
+               goto fail;
        }
 
        return 0;
 
-fail0:
+fail:
        kfree(mci->ctl_name);
+fail0:
        edac_mc_free(mci);
        sbridge_dev->mci = NULL;
        return rc;
index 86d585c..2d352b4 100644 (file)
@@ -2080,7 +2080,7 @@ static int thunderx_l2c_probe(struct pci_dev *pdev,
        if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
                l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
 
-               thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
+               ret = thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
                                              l2c, dfs_entries);
 
                if (ret != dfs_entries) {
index dc269cb..951b6c7 100644 (file)
@@ -47,7 +47,7 @@ DEFINE_DMI_ATTR_WITH_SHOW(product_name,               0444, DMI_PRODUCT_NAME);
 DEFINE_DMI_ATTR_WITH_SHOW(product_version,     0444, DMI_PRODUCT_VERSION);
 DEFINE_DMI_ATTR_WITH_SHOW(product_serial,      0400, DMI_PRODUCT_SERIAL);
 DEFINE_DMI_ATTR_WITH_SHOW(product_uuid,                0400, DMI_PRODUCT_UUID);
-DEFINE_DMI_ATTR_WITH_SHOW(product_family,      0400, DMI_PRODUCT_FAMILY);
+DEFINE_DMI_ATTR_WITH_SHOW(product_family,      0444, DMI_PRODUCT_FAMILY);
 DEFINE_DMI_ATTR_WITH_SHOW(board_vendor,                0444, DMI_BOARD_VENDOR);
 DEFINE_DMI_ATTR_WITH_SHOW(board_name,          0444, DMI_BOARD_NAME);
 DEFINE_DMI_ATTR_WITH_SHOW(board_version,       0444, DMI_BOARD_VERSION);
@@ -192,7 +192,7 @@ static void __init dmi_id_init_attr_table(void)
        ADD_DMI_ATTR(product_version,   DMI_PRODUCT_VERSION);
        ADD_DMI_ATTR(product_serial,    DMI_PRODUCT_SERIAL);
        ADD_DMI_ATTR(product_uuid,      DMI_PRODUCT_UUID);
-       ADD_DMI_ATTR(product_family,      DMI_PRODUCT_FAMILY);
+       ADD_DMI_ATTR(product_family,    DMI_PRODUCT_FAMILY);
        ADD_DMI_ATTR(board_vendor,      DMI_BOARD_VENDOR);
        ADD_DMI_ATTR(board_name,        DMI_BOARD_NAME);
        ADD_DMI_ATTR(board_version,     DMI_BOARD_VERSION);
index 93f7acd..7830419 100644 (file)
@@ -144,7 +144,7 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *,
 
        buf = dmi_early_remap(dmi_base, orig_dmi_len);
        if (buf == NULL)
-               return -1;
+               return -ENOMEM;
 
        dmi_decode_table(buf, decode, NULL);
 
@@ -178,7 +178,7 @@ static void __init dmi_save_ident(const struct dmi_header *dm, int slot,
        const char *d = (const char *) dm;
        const char *p;
 
-       if (dmi_ident[slot])
+       if (dmi_ident[slot] || dm->length <= string)
                return;
 
        p = dmi_string(dm, d[string]);
@@ -191,13 +191,14 @@ static void __init dmi_save_ident(const struct dmi_header *dm, int slot,
 static void __init dmi_save_uuid(const struct dmi_header *dm, int slot,
                int index)
 {
-       const u8 *d = (u8 *) dm + index;
+       const u8 *d;
        char *s;
        int is_ff = 1, is_00 = 1, i;
 
-       if (dmi_ident[slot])
+       if (dmi_ident[slot] || dm->length <= index + 16)
                return;
 
+       d = (u8 *) dm + index;
        for (i = 0; i < 16 && (is_ff || is_00); i++) {
                if (d[i] != 0x00)
                        is_00 = 0;
@@ -228,16 +229,17 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot,
 static void __init dmi_save_type(const struct dmi_header *dm, int slot,
                int index)
 {
-       const u8 *d = (u8 *) dm + index;
+       const u8 *d;
        char *s;
 
-       if (dmi_ident[slot])
+       if (dmi_ident[slot] || dm->length <= index)
                return;
 
        s = dmi_alloc(4);
        if (!s)
                return;
 
+       d = (u8 *) dm + index;
        sprintf(s, "%u", *d & 0x7F);
        dmi_ident[slot] = s;
 }
@@ -278,9 +280,13 @@ static void __init dmi_save_devices(const struct dmi_header *dm)
 
 static void __init dmi_save_oem_strings_devices(const struct dmi_header *dm)
 {
-       int i, count = *(u8 *)(dm + 1);
+       int i, count;
        struct dmi_device *dev;
 
+       if (dm->length < 0x05)
+               return;
+
+       count = *(u8 *)(dm + 1);
        for (i = 1; i <= count; i++) {
                const char *devname = dmi_string(dm, i);
 
@@ -353,6 +359,9 @@ static void __init dmi_save_extended_devices(const struct dmi_header *dm)
        const char *name;
        const u8 *d = (u8 *)dm;
 
+       if (dm->length < 0x0B)
+               return;
+
        /* Skip disabled device */
        if ((d[0x5] & 0x80) == 0)
                return;
@@ -387,7 +396,7 @@ static void __init save_mem_devices(const struct dmi_header *dm, void *v)
        const char *d = (const char *)dm;
        static int nr;
 
-       if (dm->type != DMI_ENTRY_MEM_DEVICE)
+       if (dm->type != DMI_ENTRY_MEM_DEVICE || dm->length < 0x12)
                return;
        if (nr >= dmi_memdev_nr) {
                pr_warn(FW_BUG "Too many DIMM entries in SMBIOS table\n");
@@ -650,6 +659,21 @@ void __init dmi_scan_machine(void)
                        goto error;
 
                /*
+                * Same logic as above, look for a 64-bit entry point
+                * first, and if not found, fall back to 32-bit entry point.
+                */
+               memcpy_fromio(buf, p, 16);
+               for (q = p + 16; q < p + 0x10000; q += 16) {
+                       memcpy_fromio(buf + 16, q, 16);
+                       if (!dmi_smbios3_present(buf)) {
+                               dmi_available = 1;
+                               dmi_early_unmap(p, 0x10000);
+                               goto out;
+                       }
+                       memcpy(buf, buf + 16, 16);
+               }
+
+               /*
                 * Iterate over all possible DMI header addresses q.
                 * Maintain the 32 bytes around q in buf.  On the
                 * first iteration, substitute zero for the
@@ -659,7 +683,7 @@ void __init dmi_scan_machine(void)
                memset(buf, 0, 16);
                for (q = p; q < p + 0x10000; q += 16) {
                        memcpy_fromio(buf + 16, q, 16);
-                       if (!dmi_smbios3_present(buf) || !dmi_present(buf)) {
+                       if (!dmi_present(buf)) {
                                dmi_available = 1;
                                dmi_early_unmap(p, 0x10000);
                                goto out;
@@ -993,7 +1017,8 @@ EXPORT_SYMBOL(dmi_get_date);
  *     @decode: Callback function
  *     @private_data: Private data to be passed to the callback function
  *
- *     Returns -1 when the DMI table can't be reached, 0 on success.
+ *     Returns 0 on success, -ENXIO if DMI is not selected or not present,
+ *     or a different negative error code if DMI walking fails.
  */
 int dmi_walk(void (*decode)(const struct dmi_header *, void *),
             void *private_data)
@@ -1001,11 +1026,11 @@ int dmi_walk(void (*decode)(const struct dmi_header *, void *),
        u8 *buf;
 
        if (!dmi_available)
-               return -1;
+               return -ENXIO;
 
        buf = dmi_remap(dmi_base, dmi_len);
        if (buf == NULL)
-               return -1;
+               return -ENOMEM;
 
        dmi_decode_table(buf, decode, private_data);
 
index 5104b63..c83ea68 100644 (file)
@@ -721,7 +721,7 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
        u32 set;
 
        if (!of_device_is_compatible(mvchip->chip.of_node,
-                                    "marvell,armada-370-xp-gpio"))
+                                    "marvell,armada-370-gpio"))
                return 0;
 
        if (IS_ERR(mvchip->clk))
@@ -852,7 +852,7 @@ static const struct of_device_id mvebu_gpio_of_match[] = {
                .data       = (void *) MVEBU_GPIO_SOC_VARIANT_ARMADAXP,
        },
        {
-               .compatible = "marvell,armada-370-xp-gpio",
+               .compatible = "marvell,armada-370-gpio",
                .data       = (void *) MVEBU_GPIO_SOC_VARIANT_ORION,
        },
        {
@@ -1128,7 +1128,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
                                                 mvchip);
        }
 
-       /* Armada 370/XP has simple PWM support for GPIO lines */
+       /* Some MVEBU SoCs have simple PWM support for GPIO lines */
        if (IS_ENABLED(CONFIG_PWM))
                return mvebu_pwm_probe(pdev, mvchip, id);
 
index 2185232..8fa5fcd 100644 (file)
@@ -201,7 +201,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
                        handler = acpi_gpio_irq_handler_evt;
        }
        if (!handler)
-               return AE_BAD_PARAMETER;
+               return AE_OK;
 
        pin = acpi_gpiochip_pin_to_gpio_offset(chip->gpiodev, pin);
        if (pin < 0)
index 5db4413..a42a1ee 100644 (file)
@@ -708,7 +708,8 @@ static irqreturn_t lineevent_irq_thread(int irq, void *p)
 
        ge.timestamp = ktime_get_real_ns();
 
-       if (le->eflags & GPIOEVENT_REQUEST_BOTH_EDGES) {
+       if (le->eflags & GPIOEVENT_REQUEST_RISING_EDGE
+           && le->eflags & GPIOEVENT_REQUEST_FALLING_EDGE) {
                int level = gpiod_get_value_cansleep(le->desc);
 
                if (level)
index 1cf78f4..1e8e112 100644 (file)
@@ -693,6 +693,10 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
                        DRM_INFO("Changing default dispclk from %dMhz to 600Mhz\n",
                                 adev->clock.default_dispclk / 100);
                        adev->clock.default_dispclk = 60000;
+               } else if (adev->clock.default_dispclk <= 60000) {
+                       DRM_INFO("Changing default dispclk from %dMhz to 625Mhz\n",
+                                adev->clock.default_dispclk / 100);
+                       adev->clock.default_dispclk = 62500;
                }
                adev->clock.dp_extclk =
                        le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
index f2d705e..ab6b0d0 100644 (file)
@@ -449,6 +449,7 @@ static const struct pci_device_id pciidlist[] = {
        {0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
        {0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
        {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+       {0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
        {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
        /* Vega 10 */
        {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
index 8c9bc75..8a0818b 100644 (file)
@@ -165,7 +165,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
        struct drm_device *dev = crtc->dev;
        struct amdgpu_device *adev = dev->dev_private;
        int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
-       ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args;
+       ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
 
        memset(&args, 0, sizeof(args));
 
@@ -178,7 +178,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
 void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
 {
        int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
-       ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args;
+       ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
 
        memset(&args, 0, sizeof(args));
 
index 0cdeb6a..5dffa27 100644 (file)
@@ -1207,8 +1207,11 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
        u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
        if (amdgpu_crtc->base.enabled && num_heads && mode) {
-               active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-               line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+               active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+                                           (u32)mode->clock);
+               line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+                                         (u32)mode->clock);
+               line_time = min(line_time, (u32)65535);
 
                /* watermark for high clocks */
                if (adev->pm.dpm_enabled) {
index 773654a..47bbc87 100644 (file)
@@ -1176,8 +1176,11 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
        u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
        if (amdgpu_crtc->base.enabled && num_heads && mode) {
-               active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-               line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+               active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+                                           (u32)mode->clock);
+               line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+                                         (u32)mode->clock);
+               line_time = min(line_time, (u32)65535);
 
                /* watermark for high clocks */
                if (adev->pm.dpm_enabled) {
index 1f35529..d8c9a95 100644 (file)
@@ -983,8 +983,11 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
        fixed20_12 a, b, c;
 
        if (amdgpu_crtc->base.enabled && num_heads && mode) {
-               active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-               line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+               active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+                                           (u32)mode->clock);
+               line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+                                         (u32)mode->clock);
+               line_time = min(line_time, (u32)65535);
                priority_a_cnt = 0;
                priority_b_cnt = 0;
 
index 3c558c1..db30c6b 100644 (file)
@@ -1091,8 +1091,11 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
        u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
        if (amdgpu_crtc->base.enabled && num_heads && mode) {
-               active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-               line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+               active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+                                           (u32)mode->clock);
+               line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+                                         (u32)mode->clock);
+               line_time = min(line_time, (u32)65535);
 
                /* watermark for high clocks */
                if (adev->pm.dpm_enabled) {
index 40d2827..53e78d0 100644 (file)
@@ -1,6 +1,7 @@
 config DRM_DW_HDMI
        tristate
        select DRM_KMS_HELPER
+       select REGMAP_MMIO
 
 config DRM_DW_HDMI_AHB_AUDIO
        tristate "Synopsys Designware AHB Audio interface"
index 9f84761..48ca245 100644 (file)
@@ -1229,21 +1229,6 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
        if (!connector)
                return -ENOENT;
 
-       drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
-       encoder = drm_connector_get_encoder(connector);
-       if (encoder)
-               out_resp->encoder_id = encoder->base.id;
-       else
-               out_resp->encoder_id = 0;
-
-       ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic,
-                       (uint32_t __user *)(unsigned long)(out_resp->props_ptr),
-                       (uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr),
-                       &out_resp->count_props);
-       drm_modeset_unlock(&dev->mode_config.connection_mutex);
-       if (ret)
-               goto out_unref;
-
        for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++)
                if (connector->encoder_ids[i] != 0)
                        encoders_count++;
@@ -1256,7 +1241,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
                                if (put_user(connector->encoder_ids[i],
                                             encoder_ptr + copied)) {
                                        ret = -EFAULT;
-                                       goto out_unref;
+                                       goto out;
                                }
                                copied++;
                        }
@@ -1300,15 +1285,32 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
                        if (copy_to_user(mode_ptr + copied,
                                         &u_mode, sizeof(u_mode))) {
                                ret = -EFAULT;
+                               mutex_unlock(&dev->mode_config.mutex);
+
                                goto out;
                        }
                        copied++;
                }
        }
        out_resp->count_modes = mode_count;
-out:
        mutex_unlock(&dev->mode_config.mutex);
-out_unref:
+
+       drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+       encoder = drm_connector_get_encoder(connector);
+       if (encoder)
+               out_resp->encoder_id = encoder->base.id;
+       else
+               out_resp->encoder_id = 0;
+
+       /* Only grab properties after probing, to make sure EDID and other
+        * properties reflect the latest status. */
+       ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic,
+                       (uint32_t __user *)(unsigned long)(out_resp->props_ptr),
+                       (uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr),
+                       &out_resp->count_props);
+       drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+out:
        drm_connector_put(connector);
 
        return ret;
index c4a091e..e437fba 100644 (file)
@@ -106,9 +106,10 @@ struct etnaviv_gem_submit {
        struct etnaviv_gpu *gpu;
        struct ww_acquire_ctx ticket;
        struct dma_fence *fence;
+       u32 flags;
        unsigned int nr_bos;
        struct etnaviv_gem_submit_bo bos[0];
-       u32 flags;
+       /* No new members here, the previous one is variable-length! */
 };
 
 int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
index de80ee1..1013765 100644 (file)
@@ -172,7 +172,7 @@ static int submit_fence_sync(const struct etnaviv_gem_submit *submit)
        for (i = 0; i < submit->nr_bos; i++) {
                struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
                bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE;
-               bool explicit = !(submit->flags & ETNA_SUBMIT_NO_IMPLICIT);
+               bool explicit = !!(submit->flags & ETNA_SUBMIT_NO_IMPLICIT);
 
                ret = etnaviv_gpu_fence_sync_obj(etnaviv_obj, context, write,
                                                 explicit);
index d689e51..4bd1467 100644 (file)
@@ -292,6 +292,8 @@ static int per_file_stats(int id, void *ptr, void *data)
        struct file_stats *stats = data;
        struct i915_vma *vma;
 
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
+
        stats->count++;
        stats->total += obj->base.size;
        if (!obj->bind_count)
@@ -476,6 +478,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
                struct drm_i915_gem_request *request;
                struct task_struct *task;
 
+               mutex_lock(&dev->struct_mutex);
+
                memset(&stats, 0, sizeof(stats));
                stats.file_priv = file->driver_priv;
                spin_lock(&file->table_lock);
@@ -487,7 +491,6 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
                 * still alive (e.g. get_pid(current) => fork() => exit()).
                 * Therefore, we need to protect this ->comm access using RCU.
                 */
-               mutex_lock(&dev->struct_mutex);
                request = list_first_entry_or_null(&file_priv->mm.request_list,
                                                   struct drm_i915_gem_request,
                                                   client_link);
@@ -497,6 +500,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
                                PIDTYPE_PID);
                print_file_stats(m, task ? task->comm : "<unknown>", stats);
                rcu_read_unlock();
+
                mutex_unlock(&dev->struct_mutex);
        }
        mutex_unlock(&dev->filelist_mutex);
index 462031c..615f0a8 100644 (file)
@@ -2285,8 +2285,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
        struct page *page;
        unsigned long last_pfn = 0;     /* suppress gcc warning */
        unsigned int max_segment;
+       gfp_t noreclaim;
        int ret;
-       gfp_t gfp;
 
        /* Assert that the object is not currently in any GPU domain. As it
         * wasn't in the GTT, there shouldn't be any way it could have been in
@@ -2315,22 +2315,31 @@ rebuild_st:
         * Fail silently without starting the shrinker
         */
        mapping = obj->base.filp->f_mapping;
-       gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
-       gfp |= __GFP_NORETRY | __GFP_NOWARN;
+       noreclaim = mapping_gfp_constraint(mapping,
+                                          ~(__GFP_IO | __GFP_RECLAIM));
+       noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
+
        sg = st->sgl;
        st->nents = 0;
        for (i = 0; i < page_count; i++) {
-               page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-               if (unlikely(IS_ERR(page))) {
-                       i915_gem_shrink(dev_priv,
-                                       page_count,
-                                       I915_SHRINK_BOUND |
-                                       I915_SHRINK_UNBOUND |
-                                       I915_SHRINK_PURGEABLE);
+               const unsigned int shrink[] = {
+                       I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
+                       0,
+               }, *s = shrink;
+               gfp_t gfp = noreclaim;
+
+               do {
                        page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-               }
-               if (unlikely(IS_ERR(page))) {
-                       gfp_t reclaim;
+                       if (likely(!IS_ERR(page)))
+                               break;
+
+                       if (!*s) {
+                               ret = PTR_ERR(page);
+                               goto err_sg;
+                       }
+
+                       i915_gem_shrink(dev_priv, 2 * page_count, *s++);
+                       cond_resched();
 
                        /* We've tried hard to allocate the memory by reaping
                         * our own buffer, now let the real VM do its job and
@@ -2340,15 +2349,26 @@ rebuild_st:
                         * defer the oom here by reporting the ENOMEM back
                         * to userspace.
                         */
-                       reclaim = mapping_gfp_mask(mapping);
-                       reclaim |= __GFP_NORETRY; /* reclaim, but no oom */
-
-                       page = shmem_read_mapping_page_gfp(mapping, i, reclaim);
-                       if (IS_ERR(page)) {
-                               ret = PTR_ERR(page);
-                               goto err_sg;
+                       if (!*s) {
+                               /* reclaim and warn, but no oom */
+                               gfp = mapping_gfp_mask(mapping);
+
+                               /* Our bo are always dirty and so we require
+                                * kswapd to reclaim our pages (direct reclaim
+                                * does not effectively begin pageout of our
+                                * buffers on its own). However, direct reclaim
+                                * only waits for kswapd when under allocation
+                                * congestion. So as a result __GFP_RECLAIM is
+                                * unreliable and fails to actually reclaim our
+                                * dirty pages -- unless you try over and over
+                                * again with !__GFP_NORETRY. However, we still
+                                * want to fail this allocation rather than
+                                * trigger the out-of-memory killer and for
+                                * this we want the future __GFP_MAYFAIL.
+                                */
                        }
-               }
+               } while (1);
+
                if (!i ||
                    sg->length >= max_segment ||
                    page_to_pfn(page) != last_pfn + 1) {
@@ -4222,6 +4242,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
 
        mapping = obj->base.filp->f_mapping;
        mapping_set_gfp_mask(mapping, mask);
+       GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
 
        i915_gem_object_init(obj, &i915_gem_object_ops);
 
index a3e59c8..9ad13ee 100644 (file)
@@ -546,11 +546,12 @@ repeat:
 }
 
 static int
-i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
+i915_gem_execbuffer_relocate_entry(struct i915_vma *vma,
                                   struct eb_vmas *eb,
                                   struct drm_i915_gem_relocation_entry *reloc,
                                   struct reloc_cache *cache)
 {
+       struct drm_i915_gem_object *obj = vma->obj;
        struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
        struct drm_gem_object *target_obj;
        struct drm_i915_gem_object *target_i915_obj;
@@ -628,6 +629,16 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
                return -EINVAL;
        }
 
+       /*
+        * If we write into the object, we need to force the synchronisation
+        * barrier, either with an asynchronous clflush or if we executed the
+        * patching using the GPU (though that should be serialised by the
+        * timeline). To be completely sure, and since we are required to
+        * do relocations we are already stalling, disable the user's opt
+        * of our synchronisation.
+        */
+       vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC;
+
        ret = relocate_entry(obj, reloc, cache, target_offset);
        if (ret)
                return ret;
@@ -678,7 +689,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
                do {
                        u64 offset = r->presumed_offset;
 
-                       ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache);
+                       ret = i915_gem_execbuffer_relocate_entry(vma, eb, r, &cache);
                        if (ret)
                                goto out;
 
@@ -726,7 +737,7 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
 
        reloc_cache_init(&cache, eb->i915);
        for (i = 0; i < entry->relocation_count; i++) {
-               ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);
+               ret = i915_gem_execbuffer_relocate_entry(vma, eb, &relocs[i], &cache);
                if (ret)
                        break;
        }
index 5ddbc94..a74d0ac 100644 (file)
@@ -623,7 +623,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
         * GPU processing the request, we never over-estimate the
         * position of the head.
         */
-       req->head = req->ring->tail;
+       req->head = req->ring->emit;
 
        /* Check that we didn't interrupt ourselves with a new request */
        GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
index 1642fff..ab5140b 100644 (file)
@@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
        GEM_BUG_ON(freespace < wqi_size);
 
        /* The GuC firmware wants the tail index in QWords, not bytes */
-       tail = rq->tail;
-       assert_ring_tail_valid(rq->ring, rq->tail);
-       tail >>= 3;
+       tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
        GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
 
        /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
index c0cb297..2cfe96d 100644 (file)
 #define VGT_VERSION_MAJOR 1
 #define VGT_VERSION_MINOR 0
 
-#define INTEL_VGT_IF_VERSION_ENCODE(major, minor) ((major) << 16 | (minor))
-#define INTEL_VGT_IF_VERSION \
-       INTEL_VGT_IF_VERSION_ENCODE(VGT_VERSION_MAJOR, VGT_VERSION_MINOR)
-
 /*
  * notifications from guest to vgpu device model
  */
@@ -55,8 +51,8 @@ enum vgt_g2v_type {
 
 struct vgt_if {
        u64 magic;              /* VGT_MAGIC */
-       uint16_t version_major;
-       uint16_t version_minor;
+       u16 version_major;
+       u16 version_minor;
        u32 vgt_id;             /* ID of vGT instance */
        u32 rsv1[12];           /* pad to offset 0x40 */
        /*
index 4ab8a97..2e73901 100644 (file)
@@ -60,8 +60,8 @@
  */
 void i915_check_vgpu(struct drm_i915_private *dev_priv)
 {
-       uint64_t magic;
-       uint32_t version;
+       u64 magic;
+       u16 version_major;
 
        BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
 
@@ -69,10 +69,8 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)
        if (magic != VGT_MAGIC)
                return;
 
-       version = INTEL_VGT_IF_VERSION_ENCODE(
-               __raw_i915_read16(dev_priv, vgtif_reg(version_major)),
-               __raw_i915_read16(dev_priv, vgtif_reg(version_minor)));
-       if (version != INTEL_VGT_IF_VERSION) {
+       version_major = __raw_i915_read16(dev_priv, vgtif_reg(version_major));
+       if (version_major < VGT_VERSION_MAJOR) {
                DRM_INFO("VGT interface version mismatch!\n");
                return;
        }
index 1aba470..f066e2d 100644 (file)
@@ -650,6 +650,11 @@ int i915_vma_unbind(struct i915_vma *vma)
                                break;
                }
 
+               if (!ret) {
+                       ret = i915_gem_active_retire(&vma->last_fence,
+                                                    &vma->vm->i915->drm.struct_mutex);
+               }
+
                __i915_vma_unpin(vma);
                if (ret)
                        return ret;
index eb638a1..42fb436 100644 (file)
@@ -15,13 +15,9 @@ static struct intel_dsm_priv {
        acpi_handle dhandle;
 } intel_dsm_priv;
 
-static const u8 intel_dsm_guid[] = {
-       0xd3, 0x73, 0xd8, 0x7e,
-       0xd0, 0xc2,
-       0x4f, 0x4e,
-       0xa8, 0x54,
-       0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c
-};
+static const guid_t intel_dsm_guid =
+       GUID_INIT(0x7ed873d3, 0xc2d0, 0x4e4f,
+                 0xa8, 0x54, 0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c);
 
 static char *intel_dsm_port_name(u8 id)
 {
@@ -80,7 +76,7 @@ static void intel_dsm_platform_mux_info(void)
        int i;
        union acpi_object *pkg, *connector_count;
 
-       pkg = acpi_evaluate_dsm_typed(intel_dsm_priv.dhandle, intel_dsm_guid,
+       pkg = acpi_evaluate_dsm_typed(intel_dsm_priv.dhandle, &intel_dsm_guid,
                        INTEL_DSM_REVISION_ID, INTEL_DSM_FN_PLATFORM_MUX_INFO,
                        NULL, ACPI_TYPE_PACKAGE);
        if (!pkg) {
@@ -118,7 +114,7 @@ static bool intel_dsm_pci_probe(struct pci_dev *pdev)
        if (!dhandle)
                return false;
 
-       if (!acpi_check_dsm(dhandle, intel_dsm_guid, INTEL_DSM_REVISION_ID,
+       if (!acpi_check_dsm(dhandle, &intel_dsm_guid, INTEL_DSM_REVISION_ID,
                            1 << INTEL_DSM_FN_PLATFORM_MUX_INFO)) {
                DRM_DEBUG_KMS("no _DSM method for intel device\n");
                return false;
index 569717a..9106ea3 100644 (file)
@@ -120,7 +120,8 @@ static void intel_crtc_init_scalers(struct intel_crtc *crtc,
 static void skylake_pfit_enable(struct intel_crtc *crtc);
 static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force);
 static void ironlake_pfit_enable(struct intel_crtc *crtc);
-static void intel_modeset_setup_hw_state(struct drm_device *dev);
+static void intel_modeset_setup_hw_state(struct drm_device *dev,
+                                        struct drm_modeset_acquire_ctx *ctx);
 static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc);
 
 struct intel_limit {
@@ -3449,7 +3450,7 @@ __intel_display_resume(struct drm_device *dev,
        struct drm_crtc *crtc;
        int i, ret;
 
-       intel_modeset_setup_hw_state(dev);
+       intel_modeset_setup_hw_state(dev, ctx);
        i915_redisable_vga(to_i915(dev));
 
        if (!state)
@@ -4598,7 +4599,7 @@ static void cpt_verify_modeset(struct drm_device *dev, int pipe)
 
 static int
 skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach,
-                 unsigned scaler_user, int *scaler_id, unsigned int rotation,
+                 unsigned int scaler_user, int *scaler_id,
                  int src_w, int src_h, int dst_w, int dst_h)
 {
        struct intel_crtc_scaler_state *scaler_state =
@@ -4607,9 +4608,12 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach,
                to_intel_crtc(crtc_state->base.crtc);
        int need_scaling;
 
-       need_scaling = drm_rotation_90_or_270(rotation) ?
-               (src_h != dst_w || src_w != dst_h):
-               (src_w != dst_w || src_h != dst_h);
+       /*
+        * Src coordinates are already rotated by 270 degrees for
+        * the 90/270 degree plane rotation cases (to match the
+        * GTT mapping), hence no need to account for rotation here.
+        */
+       need_scaling = src_w != dst_w || src_h != dst_h;
 
        /*
         * if plane is being disabled or scaler is no more required or force detach
@@ -4671,7 +4675,7 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state)
        const struct drm_display_mode *adjusted_mode = &state->base.adjusted_mode;
 
        return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX,
-               &state->scaler_state.scaler_id, DRM_ROTATE_0,
+               &state->scaler_state.scaler_id,
                state->pipe_src_w, state->pipe_src_h,
                adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay);
 }
@@ -4700,7 +4704,6 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state,
        ret = skl_update_scaler(crtc_state, force_detach,
                                drm_plane_index(&intel_plane->base),
                                &plane_state->scaler_id,
-                               plane_state->base.rotation,
                                drm_rect_width(&plane_state->base.src) >> 16,
                                drm_rect_height(&plane_state->base.src) >> 16,
                                drm_rect_width(&plane_state->base.dst),
@@ -5823,7 +5826,8 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state,
                intel_update_watermarks(intel_crtc);
 }
 
-static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
+static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
+                                       struct drm_modeset_acquire_ctx *ctx)
 {
        struct intel_encoder *encoder;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -5853,7 +5857,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
                return;
        }
 
-       state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
+       state->acquire_ctx = ctx;
 
        /* Everything's already locked, -EDEADLK can't happen. */
        crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
@@ -15028,7 +15032,7 @@ int intel_modeset_init(struct drm_device *dev)
        intel_setup_outputs(dev_priv);
 
        drm_modeset_lock_all(dev);
-       intel_modeset_setup_hw_state(dev);
+       intel_modeset_setup_hw_state(dev, dev->mode_config.acquire_ctx);
        drm_modeset_unlock_all(dev);
 
        for_each_intel_crtc(dev, crtc) {
@@ -15065,13 +15069,13 @@ int intel_modeset_init(struct drm_device *dev)
        return 0;
 }
 
-static void intel_enable_pipe_a(struct drm_device *dev)
+static void intel_enable_pipe_a(struct drm_device *dev,
+                               struct drm_modeset_acquire_ctx *ctx)
 {
        struct intel_connector *connector;
        struct drm_connector_list_iter conn_iter;
        struct drm_connector *crt = NULL;
        struct intel_load_detect_pipe load_detect_temp;
-       struct drm_modeset_acquire_ctx *ctx = dev->mode_config.acquire_ctx;
        int ret;
 
        /* We can't just switch on the pipe A, we need to set things up with a
@@ -15143,7 +15147,8 @@ static bool has_pch_trancoder(struct drm_i915_private *dev_priv,
                (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A);
 }
 
-static void intel_sanitize_crtc(struct intel_crtc *crtc)
+static void intel_sanitize_crtc(struct intel_crtc *crtc,
+                               struct drm_modeset_acquire_ctx *ctx)
 {
        struct drm_device *dev = crtc->base.dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
@@ -15189,7 +15194,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
                plane = crtc->plane;
                crtc->base.primary->state->visible = true;
                crtc->plane = !plane;
-               intel_crtc_disable_noatomic(&crtc->base);
+               intel_crtc_disable_noatomic(&crtc->base, ctx);
                crtc->plane = plane;
        }
 
@@ -15199,13 +15204,13 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
                 * resume. Force-enable the pipe to fix this, the update_dpms
                 * call below we restore the pipe to the right state, but leave
                 * the required bits on. */
-               intel_enable_pipe_a(dev);
+               intel_enable_pipe_a(dev, ctx);
        }
 
        /* Adjust the state of the output pipe according to whether we
         * have active connectors/encoders. */
        if (crtc->active && !intel_crtc_has_encoders(crtc))
-               intel_crtc_disable_noatomic(&crtc->base);
+               intel_crtc_disable_noatomic(&crtc->base, ctx);
 
        if (crtc->active || HAS_GMCH_DISPLAY(dev_priv)) {
                /*
@@ -15503,7 +15508,8 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv)
  * and sanitizes it to the current state
  */
 static void
-intel_modeset_setup_hw_state(struct drm_device *dev)
+intel_modeset_setup_hw_state(struct drm_device *dev,
+                            struct drm_modeset_acquire_ctx *ctx)
 {
        struct drm_i915_private *dev_priv = to_i915(dev);
        enum pipe pipe;
@@ -15523,7 +15529,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev)
        for_each_pipe(dev_priv, pipe) {
                crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
 
-               intel_sanitize_crtc(crtc);
+               intel_sanitize_crtc(crtc, ctx);
                intel_dump_pipe_config(crtc, crtc->config,
                                       "[setup_hw_state]");
        }
index 6532e22..40ba313 100644 (file)
@@ -119,8 +119,6 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector,
        struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
        struct intel_panel *panel = &connector->panel;
 
-       intel_dp_aux_enable_backlight(connector);
-
        if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT)
                panel->backlight.max = 0xFFFF;
        else
index dac4e00..62f44d3 100644 (file)
@@ -326,8 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
                rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
        u32 *reg_state = ce->lrc_reg_state;
 
-       assert_ring_tail_valid(rq->ring, rq->tail);
-       reg_state[CTX_RING_TAIL+1] = rq->tail;
+       reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
 
        /* True 32b PPGTT with dynamic page allocation: update PDP
         * registers and point the unallocated PDPs to scratch page.
@@ -2036,8 +2035,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
                        ce->state->obj->mm.dirty = true;
                        i915_gem_object_unpin_map(ce->state->obj);
 
-                       ce->ring->head = ce->ring->tail = 0;
-                       intel_ring_update_space(ce->ring);
+                       intel_ring_reset(ce->ring, 0);
                }
        }
 }
index 2ca481b..078fd1b 100644 (file)
@@ -3373,20 +3373,26 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
 
        /* n.b., src is 16.16 fixed point, dst is whole integer */
        if (plane->id == PLANE_CURSOR) {
+               /*
+                * Cursors only support 0/180 degree rotation,
+                * hence no need to account for rotation here.
+                */
                src_w = pstate->base.src_w;
                src_h = pstate->base.src_h;
                dst_w = pstate->base.crtc_w;
                dst_h = pstate->base.crtc_h;
        } else {
+               /*
+                * Src coordinates are already rotated by 270 degrees for
+                * the 90/270 degree plane rotation cases (to match the
+                * GTT mapping), hence no need to account for rotation here.
+                */
                src_w = drm_rect_width(&pstate->base.src);
                src_h = drm_rect_height(&pstate->base.src);
                dst_w = drm_rect_width(&pstate->base.dst);
                dst_h = drm_rect_height(&pstate->base.dst);
        }
 
-       if (drm_rotation_90_or_270(pstate->base.rotation))
-               swap(dst_w, dst_h);
-
        downscale_h = max(src_h / dst_h, (uint32_t)DRM_PLANE_HELPER_NO_SCALING);
        downscale_w = max(src_w / dst_w, (uint32_t)DRM_PLANE_HELPER_NO_SCALING);
 
@@ -3417,12 +3423,14 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
        if (y && format != DRM_FORMAT_NV12)
                return 0;
 
+       /*
+        * Src coordinates are already rotated by 270 degrees for
+        * the 90/270 degree plane rotation cases (to match the
+        * GTT mapping), hence no need to account for rotation here.
+        */
        width = drm_rect_width(&intel_pstate->base.src) >> 16;
        height = drm_rect_height(&intel_pstate->base.src) >> 16;
 
-       if (drm_rotation_90_or_270(pstate->rotation))
-               swap(width, height);
-
        /* for planar format */
        if (format == DRM_FORMAT_NV12) {
                if (y)  /* y-plane data rate */
@@ -3505,12 +3513,14 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate,
            fb->modifier != I915_FORMAT_MOD_Yf_TILED)
                return 8;
 
+       /*
+        * Src coordinates are already rotated by 270 degrees for
+        * the 90/270 degree plane rotation cases (to match the
+        * GTT mapping), hence no need to account for rotation here.
+        */
        src_w = drm_rect_width(&intel_pstate->base.src) >> 16;
        src_h = drm_rect_height(&intel_pstate->base.src) >> 16;
 
-       if (drm_rotation_90_or_270(pstate->rotation))
-               swap(src_w, src_h);
-
        /* Halve UV plane width and height for NV12 */
        if (fb->format->format == DRM_FORMAT_NV12 && !y) {
                src_w /= 2;
@@ -3794,13 +3804,15 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
                width = intel_pstate->base.crtc_w;
                height = intel_pstate->base.crtc_h;
        } else {
+               /*
+                * Src coordinates are already rotated by 270 degrees for
+                * the 90/270 degree plane rotation cases (to match the
+                * GTT mapping), hence no need to account for rotation here.
+                */
                width = drm_rect_width(&intel_pstate->base.src) >> 16;
                height = drm_rect_height(&intel_pstate->base.src) >> 16;
        }
 
-       if (drm_rotation_90_or_270(pstate->rotation))
-               swap(width, height);
-
        cpp = fb->format->cpp[0];
        plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate);
 
index 66a2b8b..513a0f4 100644 (file)
@@ -49,7 +49,7 @@ static int __intel_ring_space(int head, int tail, int size)
 
 void intel_ring_update_space(struct intel_ring *ring)
 {
-       ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
+       ring->space = __intel_ring_space(ring->head, ring->emit, ring->size);
 }
 
 static int
@@ -774,8 +774,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
 
        i915_gem_request_submit(request);
 
-       assert_ring_tail_valid(request->ring, request->tail);
-       I915_WRITE_TAIL(request->engine, request->tail);
+       I915_WRITE_TAIL(request->engine,
+                       intel_ring_set_tail(request->ring, request->tail));
 }
 
 static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
@@ -1316,11 +1316,23 @@ err:
        return PTR_ERR(addr);
 }
 
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+       GEM_BUG_ON(!list_empty(&ring->request_list));
+       ring->tail = tail;
+       ring->head = tail;
+       ring->emit = tail;
+       intel_ring_update_space(ring);
+}
+
 void intel_ring_unpin(struct intel_ring *ring)
 {
        GEM_BUG_ON(!ring->vma);
        GEM_BUG_ON(!ring->vaddr);
 
+       /* Discard any unused bytes beyond that submitted to hw. */
+       intel_ring_reset(ring, ring->tail);
+
        if (i915_vma_is_map_and_fenceable(ring->vma))
                i915_vma_unpin_iomap(ring->vma);
        else
@@ -1562,8 +1574,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
 
+       /* Restart from the beginning of the rings for convenience */
        for_each_engine(engine, dev_priv, id)
-               engine->buffer->head = engine->buffer->tail;
+               intel_ring_reset(engine->buffer, 0);
 }
 
 static int ring_request_alloc(struct drm_i915_gem_request *request)
@@ -1616,7 +1629,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
                unsigned space;
 
                /* Would completion of this request free enough space? */
-               space = __intel_ring_space(target->postfix, ring->tail,
+               space = __intel_ring_space(target->postfix, ring->emit,
                                           ring->size);
                if (space >= bytes)
                        break;
@@ -1641,8 +1654,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 {
        struct intel_ring *ring = req->ring;
-       int remain_actual = ring->size - ring->tail;
-       int remain_usable = ring->effective_size - ring->tail;
+       int remain_actual = ring->size - ring->emit;
+       int remain_usable = ring->effective_size - ring->emit;
        int bytes = num_dwords * sizeof(u32);
        int total_bytes, wait_bytes;
        bool need_wrap = false;
@@ -1678,17 +1691,17 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 
        if (unlikely(need_wrap)) {
                GEM_BUG_ON(remain_actual > ring->space);
-               GEM_BUG_ON(ring->tail + remain_actual > ring->size);
+               GEM_BUG_ON(ring->emit + remain_actual > ring->size);
 
                /* Fill the tail with MI_NOOP */
-               memset(ring->vaddr + ring->tail, 0, remain_actual);
-               ring->tail = 0;
+               memset(ring->vaddr + ring->emit, 0, remain_actual);
+               ring->emit = 0;
                ring->space -= remain_actual;
        }
 
-       GEM_BUG_ON(ring->tail > ring->size - bytes);
-       cs = ring->vaddr + ring->tail;
-       ring->tail += bytes;
+       GEM_BUG_ON(ring->emit > ring->size - bytes);
+       cs = ring->vaddr + ring->emit;
+       ring->emit += bytes;
        ring->space -= bytes;
        GEM_BUG_ON(ring->space < 0);
 
@@ -1699,7 +1712,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
 int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
 {
        int num_dwords =
-               (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
+               (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
        u32 *cs;
 
        if (num_dwords == 0)
index a82a080..f7144fe 100644 (file)
@@ -145,6 +145,7 @@ struct intel_ring {
 
        u32 head;
        u32 tail;
+       u32 emit;
 
        int space;
        int size;
@@ -488,6 +489,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 struct intel_ring *
 intel_engine_create_ring(struct intel_engine_cs *engine, int size);
 int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+void intel_ring_update_space(struct intel_ring *ring);
 void intel_ring_unpin(struct intel_ring *ring);
 void intel_ring_free(struct intel_ring *ring);
 
@@ -511,7 +514,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
         * reserved for the command packet (i.e. the value passed to
         * intel_ring_begin()).
         */
-       GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs);
+       GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
 }
 
 static inline u32
@@ -540,7 +543,19 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
        GEM_BUG_ON(tail >= ring->size);
 }
 
-void intel_ring_update_space(struct intel_ring *ring);
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+       /* Whilst writes to the tail are strictly order, there is no
+        * serialisation between readers and the writers. The tail may be
+        * read by i915_gem_request_retire() just as it is being updated
+        * by execlists, as although the breadcrumb is complete, the context
+        * switch hasn't been seen.
+        */
+       assert_ring_tail_valid(ring, tail);
+       ring->tail = tail;
+       return tail;
+}
 
 void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 
index adb411a..f4b5358 100644 (file)
@@ -1173,7 +1173,10 @@ static int mga_crtc_mode_set(struct drm_crtc *crtc,
 
 
        if (IS_G200_SE(mdev)) {
-               if (mdev->unique_rev_id >= 0x02) {
+               if  (mdev->unique_rev_id >= 0x04) {
+                       WREG8(MGAREG_CRTCEXT_INDEX, 0x06);
+                       WREG8(MGAREG_CRTCEXT_DATA, 0);
+               } else if (mdev->unique_rev_id >= 0x02) {
                        u8 hi_pri_lvl;
                        u32 bpp;
                        u32 mb;
@@ -1639,6 +1642,10 @@ static int mga_vga_mode_valid(struct drm_connector *connector,
                        if (mga_vga_calculate_mode_bandwidth(mode, bpp)
                                > (30100 * 1024))
                                return MODE_BANDWIDTH;
+               } else {
+                       if (mga_vga_calculate_mode_bandwidth(mode, bpp)
+                               > (55000 * 1024))
+                               return MODE_BANDWIDTH;
                }
        } else if (mdev->type == G200_WB) {
                if (mode->hdisplay > 1280)
index 1144e0c..0abe776 100644 (file)
 #include "mxsfb_drv.h"
 #include "mxsfb_regs.h"
 
+#define MXS_SET_ADDR           0x4
+#define MXS_CLR_ADDR           0x8
+#define MODULE_CLKGATE         BIT(30)
+#define MODULE_SFTRST          BIT(31)
+/* 1 second delay should be plenty of time for block reset */
+#define RESET_TIMEOUT          1000000
+
 static u32 set_hsync_pulse_width(struct mxsfb_drm_private *mxsfb, u32 val)
 {
        return (val & mxsfb->devdata->hs_wdth_mask) <<
@@ -159,6 +166,36 @@ static void mxsfb_disable_controller(struct mxsfb_drm_private *mxsfb)
                clk_disable_unprepare(mxsfb->clk_disp_axi);
 }
 
+/*
+ * Clear the bit and poll it cleared.  This is usually called with
+ * a reset address and mask being either SFTRST(bit 31) or CLKGATE
+ * (bit 30).
+ */
+static int clear_poll_bit(void __iomem *addr, u32 mask)
+{
+       u32 reg;
+
+       writel(mask, addr + MXS_CLR_ADDR);
+       return readl_poll_timeout(addr, reg, !(reg & mask), 0, RESET_TIMEOUT);
+}
+
+static int mxsfb_reset_block(void __iomem *reset_addr)
+{
+       int ret;
+
+       ret = clear_poll_bit(reset_addr, MODULE_SFTRST);
+       if (ret)
+               return ret;
+
+       writel(MODULE_CLKGATE, reset_addr + MXS_CLR_ADDR);
+
+       ret = clear_poll_bit(reset_addr, MODULE_SFTRST);
+       if (ret)
+               return ret;
+
+       return clear_poll_bit(reset_addr, MODULE_CLKGATE);
+}
+
 static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb)
 {
        struct drm_display_mode *m = &mxsfb->pipe.crtc.state->adjusted_mode;
@@ -173,6 +210,11 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb)
         */
        mxsfb_enable_axi_clk(mxsfb);
 
+       /* Mandatory eLCDIF reset as per the Reference Manual */
+       err = mxsfb_reset_block(mxsfb->base);
+       if (err)
+               return;
+
        /* Clear the FIFOs */
        writel(CTRL1_FIFO_CLEAR, mxsfb->base + LCDC_CTRL1 + REG_SET);
 
index 39468c2..7459ef9 100644 (file)
@@ -60,15 +60,13 @@ bool nouveau_is_v1_dsm(void) {
 }
 
 #ifdef CONFIG_VGA_SWITCHEROO
-static const char nouveau_dsm_muid[] = {
-       0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D,
-       0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4,
-};
+static const guid_t nouveau_dsm_muid =
+       GUID_INIT(0x9D95A0A0, 0x0060, 0x4D48,
+                 0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4);
 
-static const char nouveau_op_dsm_muid[] = {
-       0xF8, 0xD8, 0x86, 0xA4, 0xDA, 0x0B, 0x1B, 0x47,
-       0xA7, 0x2B, 0x60, 0x42, 0xA6, 0xB5, 0xBE, 0xE0,
-};
+static const guid_t nouveau_op_dsm_muid =
+       GUID_INIT(0xA486D8F8, 0x0BDA, 0x471B,
+                 0xA7, 0x2B, 0x60, 0x42, 0xA6, 0xB5, 0xBE, 0xE0);
 
 static int nouveau_optimus_dsm(acpi_handle handle, int func, int arg, uint32_t *result)
 {
@@ -86,7 +84,7 @@ static int nouveau_optimus_dsm(acpi_handle handle, int func, int arg, uint32_t *
                args_buff[i] = (arg >> i * 8) & 0xFF;
 
        *result = 0;
-       obj = acpi_evaluate_dsm_typed(handle, nouveau_op_dsm_muid, 0x00000100,
+       obj = acpi_evaluate_dsm_typed(handle, &nouveau_op_dsm_muid, 0x00000100,
                                      func, &argv4, ACPI_TYPE_BUFFER);
        if (!obj) {
                acpi_handle_info(handle, "failed to evaluate _DSM\n");
@@ -138,7 +136,7 @@ static int nouveau_dsm(acpi_handle handle, int func, int arg)
                .integer.value = arg,
        };
 
-       obj = acpi_evaluate_dsm_typed(handle, nouveau_dsm_muid, 0x00000102,
+       obj = acpi_evaluate_dsm_typed(handle, &nouveau_dsm_muid, 0x00000102,
                                      func, &argv4, ACPI_TYPE_INTEGER);
        if (!obj) {
                acpi_handle_info(handle, "failed to evaluate _DSM\n");
@@ -259,7 +257,7 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, acpi_handle *dhandle_out
        if (!acpi_has_method(dhandle, "_DSM"))
                return;
 
-       supports_mux = acpi_check_dsm(dhandle, nouveau_dsm_muid, 0x00000102,
+       supports_mux = acpi_check_dsm(dhandle, &nouveau_dsm_muid, 0x00000102,
                                      1 << NOUVEAU_DSM_POWER);
        optimus_funcs = nouveau_dsm_get_optimus_functions(dhandle);
 
index e3e2f5e..f44682d 100644 (file)
@@ -81,10 +81,9 @@ mxm_shadow_dsm(struct nvkm_mxm *mxm, u8 version)
 {
        struct nvkm_subdev *subdev = &mxm->subdev;
        struct nvkm_device *device = subdev->device;
-       static char muid[] = {
-               0x00, 0xA4, 0x04, 0x40, 0x7D, 0x91, 0xF2, 0x4C,
-               0xB8, 0x9C, 0x79, 0xB6, 0x2F, 0xD5, 0x56, 0x65
-       };
+       static guid_t muid =
+               GUID_INIT(0x4004A400, 0x917D, 0x4CF2,
+                         0xB8, 0x9C, 0x79, 0xB6, 0x2F, 0xD5, 0x56, 0x65);
        u32 mxms_args[] = { 0x00000000 };
        union acpi_object argv4 = {
                .buffer.type = ACPI_TYPE_BUFFER,
@@ -105,7 +104,7 @@ mxm_shadow_dsm(struct nvkm_mxm *mxm, u8 version)
         * unless you pass in exactly the version it supports..
         */
        rev = (version & 0xf0) << 4 | (version & 0x0f);
-       obj = acpi_evaluate_dsm(handle, muid, rev, 0x00000010, &argv4);
+       obj = acpi_evaluate_dsm(handle, &muid, rev, 0x00000010, &argv4);
        if (!obj) {
                nvkm_debug(subdev, "DSM MXMS failed\n");
                return false;
index 008c145..ca44233 100644 (file)
@@ -9267,8 +9267,11 @@ static void dce8_program_watermarks(struct radeon_device *rdev,
        u32 tmp, wm_mask;
 
        if (radeon_crtc->base.enabled && num_heads && mode) {
-               active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-               line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+               active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+                                           (u32)mode->clock);
+               line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+                                         (u32)mode->clock);
+               line_time = min(line_time, (u32)65535);
 
                /* watermark for high clocks */
                if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
index 0bf1035..5346372 100644 (file)
@@ -2266,8 +2266,11 @@ static void evergreen_program_watermarks(struct radeon_device *rdev,
        fixed20_12 a, b, c;
 
        if (radeon_crtc->base.enabled && num_heads && mode) {
-               active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-               line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+               active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+                                           (u32)mode->clock);
+               line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+                                         (u32)mode->clock);
+               line_time = min(line_time, (u32)65535);
                priority_a_cnt = 0;
                priority_b_cnt = 0;
                dram_channels = evergreen_get_number_of_dram_channels(rdev);
index 432480f..3178ba0 100644 (file)
@@ -3393,6 +3393,13 @@ void radeon_combios_asic_init(struct drm_device *dev)
            rdev->pdev->subsystem_vendor == 0x103c &&
            rdev->pdev->subsystem_device == 0x280a)
                return;
+       /* quirk for rs4xx Toshiba Sattellite L20-183 latop to make it resume
+        * - it hangs on resume inside the dynclk 1 table.
+        */
+       if (rdev->family == CHIP_RS400 &&
+           rdev->pdev->subsystem_vendor == 0x1179 &&
+           rdev->pdev->subsystem_device == 0xff31)
+               return;
 
        /* DYN CLK 1 */
        table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE);
index 6ecf427..0a6444d 100644 (file)
@@ -136,6 +136,10 @@ static struct radeon_px_quirk radeon_px_quirk_list[] = {
         * https://bugzilla.kernel.org/show_bug.cgi?id=51381
         */
        { PCI_VENDOR_ID_ATI, 0x6840, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX },
+       /* Asus K53TK laptop with AMD A6-3420M APU and Radeon 7670m GPU
+        * https://bugs.freedesktop.org/show_bug.cgi?id=101491
+        */
+       { PCI_VENDOR_ID_ATI, 0x6741, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX },
        /* macbook pro 8.2 */
        { PCI_VENDOR_ID_ATI, 0x6741, PCI_VENDOR_ID_APPLE, 0x00e2, RADEON_PX_QUIRK_LONG_WAKEUP },
        { 0, 0, 0, 0, 0 },
index 7431eb4..d34d1cf 100644 (file)
@@ -621,7 +621,7 @@ static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
        }
 
        /* TODO: is this still necessary on NI+ ? */
-       if ((cmd == 0 || cmd == 1 || cmd == 0x3) &&
+       if ((cmd == 0 || cmd == 0x3) &&
            (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) {
                DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
                          start, end);
index 76d1888..5303f25 100644 (file)
@@ -2284,8 +2284,11 @@ static void dce6_program_watermarks(struct radeon_device *rdev,
        fixed20_12 a, b, c;
 
        if (radeon_crtc->base.enabled && num_heads && mode) {
-               active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
-               line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
+               active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+                                           (u32)mode->clock);
+               line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+                                         (u32)mode->clock);
+               line_time = min(line_time, (u32)65535);
                priority_a_cnt = 0;
                priority_b_cnt = 0;
 
index 9a1e34e..81f86a6 100644 (file)
@@ -451,18 +451,6 @@ fail:
 
 
 #ifdef CONFIG_DRM_TEGRA_STAGING
-static struct tegra_drm_context *
-tegra_drm_file_get_context(struct tegra_drm_file *file, u32 id)
-{
-       struct tegra_drm_context *context;
-
-       mutex_lock(&file->lock);
-       context = idr_find(&file->contexts, id);
-       mutex_unlock(&file->lock);
-
-       return context;
-}
-
 static int tegra_gem_create(struct drm_device *drm, void *data,
                            struct drm_file *file)
 {
@@ -551,7 +539,7 @@ static int tegra_client_open(struct tegra_drm_file *fpriv,
        if (err < 0)
                return err;
 
-       err = idr_alloc(&fpriv->contexts, context, 0, 0, GFP_KERNEL);
+       err = idr_alloc(&fpriv->contexts, context, 1, 0, GFP_KERNEL);
        if (err < 0) {
                client->ops->close_channel(context);
                return err;
@@ -606,7 +594,7 @@ static int tegra_close_channel(struct drm_device *drm, void *data,
 
        mutex_lock(&fpriv->lock);
 
-       context = tegra_drm_file_get_context(fpriv, args->context);
+       context = idr_find(&fpriv->contexts, args->context);
        if (!context) {
                err = -EINVAL;
                goto unlock;
@@ -631,7 +619,7 @@ static int tegra_get_syncpt(struct drm_device *drm, void *data,
 
        mutex_lock(&fpriv->lock);
 
-       context = tegra_drm_file_get_context(fpriv, args->context);
+       context = idr_find(&fpriv->contexts, args->context);
        if (!context) {
                err = -ENODEV;
                goto unlock;
@@ -660,7 +648,7 @@ static int tegra_submit(struct drm_device *drm, void *data,
 
        mutex_lock(&fpriv->lock);
 
-       context = tegra_drm_file_get_context(fpriv, args->context);
+       context = idr_find(&fpriv->contexts, args->context);
        if (!context) {
                err = -ENODEV;
                goto unlock;
@@ -685,7 +673,7 @@ static int tegra_get_syncpt_base(struct drm_device *drm, void *data,
 
        mutex_lock(&fpriv->lock);
 
-       context = tegra_drm_file_get_context(fpriv, args->context);
+       context = idr_find(&fpriv->contexts, args->context);
        if (!context) {
                err = -ENODEV;
                goto unlock;
index 13db8a2..1f013d4 100644 (file)
@@ -321,6 +321,7 @@ void vmw_cmdbuf_res_man_destroy(struct vmw_cmdbuf_res_manager *man)
        list_for_each_entry_safe(entry, next, &man->list, head)
                vmw_cmdbuf_res_free(man, entry);
 
+       drm_ht_remove(&man->resources);
        kfree(man);
 }
 
index f05ebb1..ac65f52 100644 (file)
@@ -172,7 +172,7 @@ static int host1x_probe(struct platform_device *pdev)
 
        host->rst = devm_reset_control_get(&pdev->dev, "host1x");
        if (IS_ERR(host->rst)) {
-               err = PTR_ERR(host->clk);
+               err = PTR_ERR(host->rst);
                dev_err(&pdev->dev, "failed to get reset: %d\n", err);
                return err;
        }
index 04cee65..6e04069 100644 (file)
@@ -826,11 +826,35 @@ static int hid_scan_report(struct hid_device *hid)
                                 * hid-rmi should take care of them,
                                 * not hid-generic
                                 */
-                               if (IS_ENABLED(CONFIG_HID_RMI))
-                                       hid->group = HID_GROUP_RMI;
+                               hid->group = HID_GROUP_RMI;
                break;
        }
 
+       /* fall back to generic driver in case specific driver doesn't exist */
+       switch (hid->group) {
+       case HID_GROUP_MULTITOUCH_WIN_8:
+               /* fall-through */
+       case HID_GROUP_MULTITOUCH:
+               if (!IS_ENABLED(CONFIG_HID_MULTITOUCH))
+                       hid->group = HID_GROUP_GENERIC;
+               break;
+       case HID_GROUP_SENSOR_HUB:
+               if (!IS_ENABLED(CONFIG_HID_SENSOR_HUB))
+                       hid->group = HID_GROUP_GENERIC;
+               break;
+       case HID_GROUP_RMI:
+               if (!IS_ENABLED(CONFIG_HID_RMI))
+                       hid->group = HID_GROUP_GENERIC;
+               break;
+       case HID_GROUP_WACOM:
+               if (!IS_ENABLED(CONFIG_HID_WACOM))
+                       hid->group = HID_GROUP_GENERIC;
+               break;
+       case HID_GROUP_LOGITECH_DJ_DEVICE:
+               if (!IS_ENABLED(CONFIG_HID_LOGITECH_DJ))
+                       hid->group = HID_GROUP_GENERIC;
+               break;
+       }
        vfree(parser);
        return 0;
 }
@@ -1763,15 +1787,23 @@ EXPORT_SYMBOL_GPL(hid_disconnect);
  * used as a driver. See hid_scan_report().
  */
 static const struct hid_device_id hid_have_special_driver[] = {
+#if IS_ENABLED(CONFIG_HID_A4TECH)
        { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_WCP32PU) },
        { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_X5_005D) },
        { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_RP_649) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ACCUTOUCH)
+       { HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_ACCUTOUCH_2216) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ACRUX)
        { HID_USB_DEVICE(USB_VENDOR_ID_ACRUX, 0x0802) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ACRUX, 0xf705) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ALPS)
        { HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1_DUAL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_APPLE)
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MIGHTYMOUSE) },
-       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) },
-       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_ISO) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI) },
@@ -1792,11 +1824,6 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL2) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL3) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL5) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS) },
@@ -1851,62 +1878,100 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_ANSI) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) },
        { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) },
+#endif
+#if IS_ENABLED(CONFIG_HID_APPLEIR)
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL2) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL3) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL4) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_IRCONTROL5) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ASUS)
        { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD) },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD2) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T100_KEYBOARD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_AUREAL)
        { HID_USB_DEVICE(USB_VENDOR_ID_AUREAL, USB_DEVICE_ID_AUREAL_W01RN) },
+#endif
+#if IS_ENABLED(CONFIG_HID_BELKIN)
        { HID_USB_DEVICE(USB_VENDOR_ID_BELKIN, USB_DEVICE_ID_FLIP_KVM) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_BETOP_FF)
        { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185BFM, 0x2208) },
        { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185PC, 0x5506) },
        { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185V2PC, 0x1850) },
        { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185V2BFM, 0x5500) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE_2) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CHERRY)
        { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CHICONY)
        { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_TACTICAL_PAD) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CMEDIA)
+       { HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CM6533) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CORSAIR)
        { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CP2112)
        { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) },
+#endif
+#if IS_ENABLED(CONFIG_HID_CYPRESS)
        { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_2) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_3) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_4) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_DELCOM, USB_DEVICE_ID_DELCOM_VISUAL_IND) },
+#endif
+#if IS_ENABLED(CONFIG_HID_DRAGONRISE)
        { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, 0x0006) },
        { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, 0x0011) },
-#if IS_ENABLED(CONFIG_HID_MAYFLASH)
-       { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_DOLPHINBAR) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE1) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE2) },
 #endif
-       { HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_WN) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_FA) },
+#if IS_ENABLED(CONFIG_HID_ELECOM)
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRED) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRELESS) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ELO)
        { HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0009) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0030) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_ACCUTOUCH_2216) },
+#endif
+#if IS_ENABLED(CONFIG_HID_EMS_FF)
        { HID_USB_DEVICE(USB_VENDOR_ID_EMS, USB_DEVICE_ID_EMS_TRIO_LINKER_PLUS_II) },
+#endif
+#if IS_ENABLED(CONFIG_HID_EZKEY)
        { HID_USB_DEVICE(USB_VENDOR_ID_EZKEY, USB_DEVICE_ID_BTC_8193) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GEMBIRD)
        { HID_USB_DEVICE(USB_VENDOR_ID_GEMBIRD, USB_DEVICE_ID_GEMBIRD_JPD_DUALFORCE2) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0003) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GFRM)
+        { HID_BLUETOOTH_DEVICE(0x58, 0x2000) },
+        { HID_BLUETOOTH_DEVICE(0x471, 0x2210) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GREENASIA)
        { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0012) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GT683R)
+       { HID_USB_DEVICE(USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GT683R_LED_PANEL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_GYRATION)
        { HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE) },
        { HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE_2) },
        { HID_USB_DEVICE(USB_VENDOR_ID_GYRATION, USB_DEVICE_ID_GYRATION_REMOTE_3) },
+#endif
+#if IS_ENABLED(CONFIG_HID_HOLTEK)
        { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK, USB_DEVICE_ID_HOLTEK_ON_LINE_GRIP) },
        { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A04A) },
@@ -1915,12 +1980,17 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A072) },
        { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) },
        { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ICADE)
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_KENSINGTON)
        { HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_KEYTOUCH)
        { HID_USB_DEVICE(USB_VENDOR_ID_KEYTOUCH, USB_DEVICE_ID_KEYTOUCH_IEC) },
+#endif
+#if IS_ENABLED(CONFIG_HID_KYE)
        { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE) },
        { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_MANTICORE) },
        { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_GX_IMPERATOR) },
@@ -1930,21 +2000,29 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2) },
        { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X) },
        { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_LCPOWER)
        { HID_USB_DEVICE(USB_VENDOR_ID_LCPOWER, USB_DEVICE_ID_LCPOWER_LC1000 ) },
+#endif
+#if IS_ENABLED(CONFIG_HID_LED)
+       { HID_USB_DEVICE(USB_VENDOR_ID_DELCOM, USB_DEVICE_ID_DELCOM_VISUAL_IND) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_WN) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_FA) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_LUXAFOR) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_RISO_KAGAKU, USB_DEVICE_ID_RI_KA_WEBMAIL) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_THINGM, USB_DEVICE_ID_BLINK1) },
+#endif
 #if IS_ENABLED(CONFIG_HID_LENOVO)
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPKBD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CUSBKBD) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CBTKBD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPPRODOCK) },
 #endif
-       { HID_USB_DEVICE(USB_VENDOR_ID_LG, USB_DEVICE_ID_LG_MELFAS_MT) },
+#if IS_ENABLED(CONFIG_HID_LOGITECH)
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER_2) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RECEIVER) },
-       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3) },
-       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_T651) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_DESKTOP) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_EDGE) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_DINOVO_MINI) },
@@ -1957,7 +2035,6 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2_2) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G29_WHEEL) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G920_WHEEL) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_F3D) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_FFG ) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_FORCE3D_PRO) },
@@ -1969,17 +2046,30 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DFGT_WHEEL) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G25_WHEEL) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G27_WHEEL) },
-#if IS_ENABLED(CONFIG_HID_LOGITECH_DJ)
-       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER_2) },
-#endif
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WII_WHEEL) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACETRAVELLER) },
        { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACENAVIGATOR) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD_BOOTLOADER) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_LUXAFOR) },
+#endif
+#if IS_ENABLED(CONFIG_HID_LOGITECH_HIDPP)
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_T651) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G920_WHEEL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_LOGITECH_DJ)
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER_2) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MAGICMOUSE)
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICMOUSE) },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGICTRACKPAD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MAYFLASH)
+       { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_DOLPHINBAR) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE1) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_GAMECUBE2) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MICROSOFT)
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_MOUSE_4500) },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_KEYBOARD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_SIDEWINDER_GV) },
@@ -1995,9 +2085,22 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_600) },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3KV1) },
        { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER) },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MONTEREY)
        { HID_USB_DEVICE(USB_VENDOR_ID_MONTEREY, USB_DEVICE_ID_GENIUS_KB29E) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GT683R_LED_PANEL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_MULTITOUCH)
+       { HID_USB_DEVICE(USB_VENDOR_ID_LG, USB_DEVICE_ID_LG_MELFAS_MT) },
+#endif
+#if IS_ENABLED(CONFIG_HID_WIIMOTE)
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE) },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE2) },
+#endif
+#if IS_ENABLED(CONFIG_HID_NTI)
        { HID_USB_DEVICE(USB_VENDOR_ID_NTI, USB_DEVICE_ID_USB_SUN) },
+#endif
+#if IS_ENABLED(CONFIG_HID_NTRIG)
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN) },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_1) },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_2) },
@@ -2017,13 +2120,41 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_16) },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_17) },
        { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_18) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ORTEK)
        { HID_USB_DEVICE(USB_VENDOR_ID_ORTEK, USB_DEVICE_ID_ORTEK_PKB1700) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ORTEK, USB_DEVICE_ID_ORTEK_WKB2000) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_SKYCABLE, USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PANTHERLORD)
+       { HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0003) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PENMOUNT)
        { HID_USB_DEVICE(USB_VENDOR_ID_PENMOUNT, USB_DEVICE_ID_PENMOUNT_6000) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PETALYNX)
        { HID_USB_DEVICE(USB_VENDOR_ID_PETALYNX, USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PICOLCD)
+       { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_MICROCHIP, USB_DEVICE_ID_PICOLCD_BOOTLOADER) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PLANTRONICS)
        { HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS, HID_ANY_ID) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PRIMAX)
        { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_KEYBOARD) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_RISO_KAGAKU, USB_DEVICE_ID_RI_KA_WEBMAIL) },
+#endif
+#if IS_ENABLED(CONFIG_HID_PRODIKEYS)
+       { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
+#endif
+#if IS_ENABLED(CONFIG_HID_RMI)
+       { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_COVER) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_RAZER, USB_DEVICE_ID_RAZER_BLADE_14) },
+#endif
 #if IS_ENABLED(CONFIG_HID_ROCCAT)
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ARVO) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ISKU) },
@@ -2051,9 +2182,21 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT5) },
        { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT9) },
 #endif
+#if IS_ENABLED(CONFIG_HID_SAMSUNG)
        { HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_IR_REMOTE) },
        { HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_SKYCABLE, USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER) },
+#endif
+#if IS_ENABLED(CONFIG_HID_SMARTJOYPLUS)
+       { HID_USB_DEVICE(USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SUPER_JOY_BOX_3) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_3_PRO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_DUAL_BOX_PRO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_5_PRO) },
+#endif
+#if IS_ENABLED(CONFIG_HID_SONY)
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SMK, USB_DEVICE_ID_SMK_PS3_BDREMOTE) },
        { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_BUZZ_CONTROLLER) },
        { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_WIRELESS_BUZZ_CONTROLLER) },
@@ -2072,9 +2215,17 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE) },
        { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGP_MOUSE) },
        { HID_USB_DEVICE(USB_VENDOR_ID_SINO_LITE, USB_DEVICE_ID_SINO_LITE_CONTROLLER) },
+#endif
+#if IS_ENABLED(CONFIG_HID_SPEEDLINK)
+       { HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_STEELSERIES)
        { HID_USB_DEVICE(USB_VENDOR_ID_STEELSERIES, USB_DEVICE_ID_STEELSERIES_SRWS1) },
+#endif
+#if IS_ENABLED(CONFIG_HID_SUNPLUS)
        { HID_USB_DEVICE(USB_VENDOR_ID_SUNPLUS, USB_DEVICE_ID_SUNPLUS_WDESKTOP) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_THINGM, USB_DEVICE_ID_BLINK1) },
+#endif
+#if IS_ENABLED(CONFIG_HID_THRUSTMASTER)
        { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb300) },
        { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb304) },
        { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb323) },
@@ -2083,12 +2234,25 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb653) },
        { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb654) },
        { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb65a) },
+#endif
+#if IS_ENABLED(CONFIG_HID_TIVO)
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_BT) },
        { HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE) },
        { HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_PRO) },
+#endif
+#if IS_ENABLED(CONFIG_HID_TOPSEED)
+       { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE_2) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS) },
        { HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED, USB_DEVICE_ID_TOPSEED_CYBERLINK) },
        { HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED2, USB_DEVICE_ID_TOPSEED2_RF_COMBO) },
+#endif
+#if IS_ENABLED(CONFIG_HID_TWINHAN)
        { HID_USB_DEVICE(USB_VENDOR_ID_TWINHAN, USB_DEVICE_ID_TWINHAN_IR_REMOTE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_UCLOGIC)
+       { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_HUION_TABLET) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_PF1209) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP5540U) },
@@ -2096,20 +2260,17 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP1062) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_WIRELESS_TABLET_TWHL850) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_THQ, USB_DEVICE_ID_THQ_PS3_UDRAW) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_YIYNOVA_TABLET) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UGEE_TABLET_81) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UGEE_TABLET_45) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_TABLET_EX07S) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SUPER_JOY_BOX_3) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_3_PRO) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_DUAL_BOX_PRO) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_5_PRO) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) },
+#endif
+#if IS_ENABLED(CONFIG_HID_UDRAW_PS3)
+       { HID_USB_DEVICE(USB_VENDOR_ID_THQ, USB_DEVICE_ID_THQ_PS3_UDRAW) },
+#endif
+#if IS_ENABLED(CONFIG_HID_WALTOP)
        { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_5_8_INCH) },
        { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_12_1_INCH) },
        { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_Q_PAD) },
@@ -2117,19 +2278,18 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH) },
        { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH) },
        { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_XINMO)
        { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) },
        { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ZEROPLUS)
        { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) },
        { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) },
+#endif
+#if IS_ENABLED(CONFIG_HID_ZYDACRON)
        { HID_USB_DEVICE(USB_VENDOR_ID_ZYDACRON, USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL) },
-
-       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT) },
-       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE) },
-       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE2) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_RAZER, USB_DEVICE_ID_RAZER_BLADE_14) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CM6533) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_COVER) },
+#endif
        { }
 };
 
index 8ca1e8c..4f9a393 100644 (file)
 #define USB_VENDOR_ID_DELCOM           0x0fc5
 #define USB_DEVICE_ID_DELCOM_VISUAL_IND        0xb080
 
+#define USB_VENDOR_ID_DELL                             0x413c
+#define USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE    0x301a
+
 #define USB_VENDOR_ID_DELORME          0x1163
 #define USB_DEVICE_ID_DELORME_EARTHMATE        0x0100
 #define USB_DEVICE_ID_DELORME_EM_LT20  0x0200
index 1d6c997..20b40ad 100644 (file)
@@ -349,7 +349,6 @@ static int magicmouse_raw_event(struct hid_device *hdev,
 
        if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) {
                magicmouse_emit_buttons(msc, clicks & 3);
-               input_mt_report_pointer_emulation(input, true);
                input_report_rel(input, REL_X, x);
                input_report_rel(input, REL_Y, y);
        } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
@@ -389,16 +388,16 @@ static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hd
                __clear_bit(BTN_RIGHT, input->keybit);
                __clear_bit(BTN_MIDDLE, input->keybit);
                __set_bit(BTN_MOUSE, input->keybit);
+               __set_bit(BTN_TOOL_FINGER, input->keybit);
+               __set_bit(BTN_TOOL_DOUBLETAP, input->keybit);
+               __set_bit(BTN_TOOL_TRIPLETAP, input->keybit);
+               __set_bit(BTN_TOOL_QUADTAP, input->keybit);
+               __set_bit(BTN_TOOL_QUINTTAP, input->keybit);
+               __set_bit(BTN_TOUCH, input->keybit);
+               __set_bit(INPUT_PROP_POINTER, input->propbit);
                __set_bit(INPUT_PROP_BUTTONPAD, input->propbit);
        }
 
-       __set_bit(BTN_TOOL_FINGER, input->keybit);
-       __set_bit(BTN_TOOL_DOUBLETAP, input->keybit);
-       __set_bit(BTN_TOOL_TRIPLETAP, input->keybit);
-       __set_bit(BTN_TOOL_QUADTAP, input->keybit);
-       __set_bit(BTN_TOOL_QUINTTAP, input->keybit);
-       __set_bit(BTN_TOUCH, input->keybit);
-       __set_bit(INPUT_PROP_POINTER, input->propbit);
 
        __set_bit(EV_ABS, input->evbit);
 
index fb55fb4..0401503 100644 (file)
@@ -872,10 +872,9 @@ static int i2c_hid_fetch_hid_descriptor(struct i2c_hid *ihid)
 static int i2c_hid_acpi_pdata(struct i2c_client *client,
                struct i2c_hid_platform_data *pdata)
 {
-       static u8 i2c_hid_guid[] = {
-               0xF7, 0xF6, 0xDF, 0x3C, 0x67, 0x42, 0x55, 0x45,
-               0xAD, 0x05, 0xB3, 0x0A, 0x3D, 0x89, 0x38, 0xDE,
-       };
+       static guid_t i2c_hid_guid =
+               GUID_INIT(0x3CDFF6F7, 0x4267, 0x4555,
+                         0xAD, 0x05, 0xB3, 0x0A, 0x3D, 0x89, 0x38, 0xDE);
        union acpi_object *obj;
        struct acpi_device *adev;
        acpi_handle handle;
@@ -884,7 +883,7 @@ static int i2c_hid_acpi_pdata(struct i2c_client *client,
        if (!handle || acpi_bus_get_device(handle, &adev))
                return -ENODEV;
 
-       obj = acpi_evaluate_dsm_typed(handle, i2c_hid_guid, 1, 1, NULL,
+       obj = acpi_evaluate_dsm_typed(handle, &i2c_hid_guid, 1, 1, NULL,
                                      ACPI_TYPE_INTEGER);
        if (!obj) {
                dev_err(&client->dev, "device _DSM execution failed\n");
index 6316498..a88e7c7 100644 (file)
@@ -85,6 +85,7 @@ static const struct hid_blacklist {
        { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
        { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
        { USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET },
+       { USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL },
        { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT },
        { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_PS3, HID_QUIRK_MULTI_INPUT },
index 26b0510..93d28c0 100644 (file)
@@ -1066,7 +1066,7 @@ static void ssip_pn_setup(struct net_device *dev)
        dev->addr_len           = 1;
        dev->tx_queue_len       = SSIP_TXQUEUE_LEN;
 
-       dev->destructor         = free_netdev;
+       dev->needs_free_netdev  = true;
        dev->header_ops         = &phonet_header_ops;
 }
 
index 95ed171..54a47b4 100644 (file)
@@ -734,9 +734,9 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx,
                 * the first read operation, otherwise the first read cost
                 * one extra clock cycle.
                 */
-               temp = readb(i2c_imx->base + IMX_I2C_I2CR);
+               temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
                temp |= I2CR_MTX;
-               writeb(temp, i2c_imx->base + IMX_I2C_I2CR);
+               imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
        }
        msgs->buf[msgs->len-1] = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR);
 
@@ -857,9 +857,9 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bo
                                 * the first read operation, otherwise the first read cost
                                 * one extra clock cycle.
                                 */
-                               temp = readb(i2c_imx->base + IMX_I2C_I2CR);
+                               temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
                                temp |= I2CR_MTX;
-                               writeb(temp, i2c_imx->base + IMX_I2C_I2CR);
+                               imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
                        }
                } else if (i == (msgs->len - 2)) {
                        dev_dbg(&i2c_imx->adapter.dev,
index f573448..e98e44e 100644 (file)
@@ -584,7 +584,7 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
 
        /* unmap the data buffer */
        if (dma_size != 0)
-               dma_unmap_single(&adap->dev, dma_addr, dma_size, dma_direction);
+               dma_unmap_single(dev, dma_addr, dma_size, dma_direction);
 
        if (unlikely(!time_left)) {
                dev_err(dev, "completion wait timed out\n");
index 214bf28..8be3e6c 100644 (file)
@@ -319,7 +319,7 @@ static void rcar_i2c_dma_unmap(struct rcar_i2c_priv *priv)
        rcar_i2c_write(priv, ICFBSCR, TCYC06);
 
        dma_unmap_single(chan->device->dev, sg_dma_address(&priv->sg),
-                        priv->msg->len, priv->dma_direction);
+                        sg_dma_len(&priv->sg), priv->dma_direction);
 
        priv->dma_direction = DMA_NONE;
 }
index 5901937..14d1e7d 100644 (file)
@@ -93,7 +93,6 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
        int error;
 
        rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-       scsi_req_init(rq);
        ide_req(rq)->type = ATA_PRIV_MISC;
        rq->special = (char *)pc;
 
@@ -200,7 +199,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
        memset(sense, 0, sizeof(*sense));
 
        blk_rq_init(rq->q, sense_rq);
-       scsi_req_init(sense_rq);
+       scsi_req_init(req);
 
        err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
                              GFP_NOIO);
@@ -273,7 +272,7 @@ void ide_retry_pc(ide_drive_t *drive)
        ide_requeue_and_plug(drive, failed_rq);
        if (ide_queue_sense_rq(drive, pc)) {
                blk_start_request(failed_rq);
-               ide_complete_rq(drive, -EIO, blk_rq_bytes(failed_rq));
+               ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(failed_rq));
        }
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
@@ -437,7 +436,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
        /* No more interrupts */
        if ((stat & ATA_DRQ) == 0) {
-               int uptodate, error;
+               int uptodate;
+               blk_status_t error;
 
                debug_log("Packet command completed, %d bytes transferred\n",
                          blk_rq_bytes(rq));
@@ -490,7 +490,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
                if (ata_misc_request(rq)) {
                        scsi_req(rq)->result = 0;
-                       error = 0;
+                       error = BLK_STS_OK;
                } else {
 
                        if (blk_rq_is_passthrough(rq) && uptodate <= 0) {
@@ -498,7 +498,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
                                        scsi_req(rq)->result = -EIO;
                        }
 
-                       error = uptodate ? 0 : -EIO;
+                       error = uptodate ? BLK_STS_OK : BLK_STS_IOERR;
                }
 
                ide_complete_rq(drive, error, blk_rq_bytes(rq));
index 07e5ff3..81e18f9 100644 (file)
@@ -228,7 +228,7 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
                scsi_req(failed)->sense_len = scsi_req(rq)->sense_len;
                cdrom_analyze_sense_data(drive, failed);
 
-               if (ide_end_rq(drive, failed, -EIO, blk_rq_bytes(failed)))
+               if (ide_end_rq(drive, failed, BLK_STS_IOERR, blk_rq_bytes(failed)))
                        BUG();
        } else
                cdrom_analyze_sense_data(drive, NULL);
@@ -438,7 +438,6 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 
                rq = blk_get_request(drive->queue,
                        write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN,  __GFP_RECLAIM);
-               scsi_req_init(rq);
                memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB);
                ide_req(rq)->type = ATA_PRIV_PC;
                rq->rq_flags |= rq_flags;
@@ -508,7 +507,7 @@ static bool ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
                nr_bytes -= cmd->last_xfer_len;
 
        if (nr_bytes > 0) {
-               ide_complete_rq(drive, 0, nr_bytes);
+               ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
                return true;
        }
 
@@ -674,7 +673,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 out_end:
        if (blk_rq_is_scsi(rq) && rc == 0) {
                scsi_req(rq)->resid_len = 0;
-               blk_end_request_all(rq, 0);
+               blk_end_request_all(rq, BLK_STS_OK);
                hwif->rq = NULL;
        } else {
                if (sense && uptodate)
@@ -699,7 +698,7 @@ out_end:
                                scsi_req(rq)->resid_len += cmd->last_xfer_len;
                }
 
-               ide_complete_rq(drive, uptodate ? 0 : -EIO, blk_rq_bytes(rq));
+               ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, blk_rq_bytes(rq));
 
                if (sense && rc == 2)
                        ide_error(drive, "request sense failure", stat);
@@ -844,7 +843,7 @@ out_end:
        if (nsectors == 0)
                nsectors = 1;
 
-       ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9);
+       ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, nsectors << 9);
 
        return ide_stopped;
 }
index 55cd736..9d26c97 100644 (file)
@@ -304,7 +304,6 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
        int ret;
 
        rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-       scsi_req_init(rq);
        ide_req(rq)->type = ATA_PRIV_MISC;
        rq->rq_flags = RQF_QUIET;
        blk_execute_rq(drive->queue, cd->disk, rq, 0);
index 9b69c32..ef7c8c4 100644 (file)
@@ -166,7 +166,6 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
                return setting->set(drive, arg);
 
        rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
-       scsi_req_init(rq);
        ide_req(rq)->type = ATA_PRIV_MISC;
        scsi_req(rq)->cmd_len = 5;
        scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;
index 7c06237..241983d 100644 (file)
@@ -478,7 +478,6 @@ static int set_multcount(ide_drive_t *drive, int arg)
                return -EBUSY;
 
        rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-       scsi_req_init(rq);
        ide_req(rq)->type = ATA_PRIV_TASKFILE;
 
        drive->mult_req = arg;
index 51c8122..54d4d78 100644 (file)
@@ -104,7 +104,7 @@ ide_startstop_t ide_dma_intr(ide_drive_t *drive)
                        if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
                                ide_finish_cmd(drive, cmd, stat);
                        else
-                               ide_complete_rq(drive, 0,
+                               ide_complete_rq(drive, BLK_STS_OK,
                                                blk_rq_sectors(cmd->rq) << 9);
                        return ide_stopped;
                }
index 4b7ffd7..47d5f33 100644 (file)
@@ -135,7 +135,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
                        return ide_stopped;
                }
                scsi_req(rq)->result = err;
-               ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
+               ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
                return ide_stopped;
        }
 
@@ -143,7 +143,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
 }
 EXPORT_SYMBOL_GPL(ide_error);
 
-static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
+static inline void ide_complete_drive_reset(ide_drive_t *drive, blk_status_t err)
 {
        struct request *rq = drive->hwif->rq;
 
@@ -151,7 +151,7 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
            scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
                if (err <= 0 && scsi_req(rq)->result == 0)
                        scsi_req(rq)->result = -EIO;
-               ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));
+               ide_complete_rq(drive, err, blk_rq_bytes(rq));
        }
 }
 
@@ -191,7 +191,7 @@ static ide_startstop_t atapi_reset_pollfunc(ide_drive_t *drive)
        }
        /* done polling */
        hwif->polling = 0;
-       ide_complete_drive_reset(drive, 0);
+       ide_complete_drive_reset(drive, BLK_STS_OK);
        return ide_stopped;
 }
 
@@ -225,7 +225,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
        ide_hwif_t *hwif = drive->hwif;
        const struct ide_port_ops *port_ops = hwif->port_ops;
        u8 tmp;
-       int err = 0;
+       blk_status_t err = BLK_STS_OK;
 
        if (port_ops && port_ops->reset_poll) {
                err = port_ops->reset_poll(drive);
@@ -247,7 +247,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
                printk(KERN_ERR "%s: reset timed-out, status=0x%02x\n",
                        hwif->name, tmp);
                drive->failures++;
-               err = -EIO;
+               err = BLK_STS_IOERR;
        } else  {
                tmp = ide_read_error(drive);
 
@@ -257,7 +257,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
                } else {
                        ide_reset_report_error(hwif, tmp);
                        drive->failures++;
-                       err = -EIO;
+                       err = BLK_STS_IOERR;
                }
        }
 out:
@@ -392,7 +392,7 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi)
 
        if (io_ports->ctl_addr == 0) {
                spin_unlock_irqrestore(&hwif->lock, flags);
-               ide_complete_drive_reset(drive, -ENXIO);
+               ide_complete_drive_reset(drive, BLK_STS_IOERR);
                return ide_stopped;
        }
 
index 8ac6048..627b1f6 100644 (file)
@@ -143,7 +143,7 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive,
 
                drive->failed_pc = NULL;
                drive->pc_callback(drive, 0);
-               ide_complete_rq(drive, -EIO, done);
+               ide_complete_rq(drive, BLK_STS_IOERR, done);
                return ide_stopped;
        }
 
@@ -248,7 +248,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
 
                if (ata_misc_request(rq)) {
                        scsi_req(rq)->result = 0;
-                       ide_complete_rq(drive, 0, blk_rq_bytes(rq));
+                       ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
                        return ide_stopped;
                } else
                        goto out_end;
@@ -303,7 +303,7 @@ out_end:
        drive->failed_pc = NULL;
        if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
                scsi_req(rq)->result = -EIO;
-       ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+       ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
        return ide_stopped;
 }
 
index 323af72..3a23470 100644 (file)
@@ -54,7 +54,7 @@
 #include <linux/uaccess.h>
 #include <asm/io.h>
 
-int ide_end_rq(ide_drive_t *drive, struct request *rq, int error,
+int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error,
               unsigned int nr_bytes)
 {
        /*
@@ -112,7 +112,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err)
        }
 }
 
-int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
+int ide_complete_rq(ide_drive_t *drive, blk_status_t error, unsigned int nr_bytes)
 {
        ide_hwif_t *hwif = drive->hwif;
        struct request *rq = hwif->rq;
@@ -122,7 +122,7 @@ int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
         * if failfast is set on a request, override number of sectors
         * and complete the whole request right now
         */
-       if (blk_noretry_request(rq) && error <= 0)
+       if (blk_noretry_request(rq) && error)
                nr_bytes = blk_rq_sectors(rq) << 9;
 
        rc = ide_end_rq(drive, rq, error, nr_bytes);
@@ -149,7 +149,7 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq)
                        scsi_req(rq)->result = -EIO;
        }
 
-       ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+       ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
 }
 
 static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
@@ -272,7 +272,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
        printk("%s: DRIVE_CMD (null)\n", drive->name);
 #endif
        scsi_req(rq)->result = 0;
-       ide_complete_rq(drive, 0, blk_rq_bytes(rq));
+       ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
 
        return ide_stopped;
 }
index 8c0d172..3661abb 100644 (file)
@@ -126,7 +126,6 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
                struct request *rq;
 
                rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-               scsi_req_init(rq);
                ide_req(rq)->type = ATA_PRIV_TASKFILE;
                blk_execute_rq(drive->queue, NULL, rq, 0);
                err = scsi_req(rq)->result ? -EIO : 0;
@@ -224,7 +223,6 @@ static int generic_drive_reset(ide_drive_t *drive)
        int ret = 0;
 
        rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-       scsi_req_init(rq);
        ide_req(rq)->type = ATA_PRIV_MISC;
        scsi_req(rq)->cmd_len = 1;
        scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
index 94e3107..1f264d5 100644 (file)
@@ -32,7 +32,6 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
        spin_unlock_irq(&hwif->lock);
 
        rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
-       scsi_req_init(rq);
        scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
        scsi_req(rq)->cmd_len = 1;
        ide_req(rq)->type = ATA_PRIV_MISC;
@@ -48,7 +47,6 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
         * timeout has expired, so power management will be reenabled.
         */
        rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT);
-       scsi_req_init(rq);
        if (IS_ERR(rq))
                goto out;
 
index 0977fc1..544f02d 100644 (file)
@@ -19,7 +19,6 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 
        memset(&rqpm, 0, sizeof(rqpm));
        rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-       scsi_req_init(rq);
        ide_req(rq)->type = ATA_PRIV_PM_SUSPEND;
        rq->special = &rqpm;
        rqpm.pm_step = IDE_PM_START_SUSPEND;
@@ -40,7 +39,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
        return ret;
 }
 
-static void ide_end_sync_rq(struct request *rq, int error)
+static void ide_end_sync_rq(struct request *rq, blk_status_t error)
 {
        complete(rq->end_io_data);
 }
@@ -57,7 +56,7 @@ static int ide_pm_execute_rq(struct request *rq)
        if (unlikely(blk_queue_dying(q))) {
                rq->rq_flags |= RQF_QUIET;
                scsi_req(rq)->result = -ENXIO;
-               __blk_end_request_all(rq, 0);
+               __blk_end_request_all(rq, BLK_STS_OK);
                spin_unlock_irq(q->queue_lock);
                return -ENXIO;
        }
@@ -91,7 +90,6 @@ int generic_ide_resume(struct device *dev)
 
        memset(&rqpm, 0, sizeof(rqpm));
        rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-       scsi_req_init(rq);
        ide_req(rq)->type = ATA_PRIV_PM_RESUME;
        rq->rq_flags |= RQF_PREEMPT;
        rq->special = &rqpm;
@@ -235,7 +233,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
 
        drive->hwif->rq = NULL;
 
-       if (blk_end_request(rq, 0, 0))
+       if (blk_end_request(rq, BLK_STS_OK, 0))
                BUG();
 }
 
index 0235625..01b2adf 100644 (file)
@@ -741,12 +741,12 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
        }
 }
 
-static int ide_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
+static void ide_initialize_rq(struct request *rq)
 {
        struct ide_request *req = blk_mq_rq_to_pdu(rq);
 
+       scsi_req_init(&req->sreq);
        req->sreq.sense = req->sense;
-       return 0;
 }
 
 /*
@@ -771,8 +771,9 @@ static int ide_init_queue(ide_drive_t *drive)
                return 1;
 
        q->request_fn = do_ide_request;
-       q->init_rq_fn = ide_init_rq;
+       q->initialize_rq_fn = ide_initialize_rq;
        q->cmd_size = sizeof(struct ide_request);
+       queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
        if (blk_init_allocated_queue(q) < 0) {
                blk_cleanup_queue(q);
                return 1;
index a0651f9..fd57e8c 100644 (file)
@@ -474,7 +474,7 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
 
                drive->failed_pc = NULL;
                drive->pc_callback(drive, 0);
-               ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+               ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
                return ide_stopped;
        }
        ide_debug_log(IDE_DBG_SENSE, "retry #%d, cmd: 0x%02x", pc->retries,
@@ -855,7 +855,6 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
        BUG_ON(size < 0 || size % tape->blk_size);
 
        rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
-       scsi_req_init(rq);
        ide_req(rq)->type = ATA_PRIV_MISC;
        scsi_req(rq)->cmd[13] = cmd;
        rq->rq_disk = tape->disk;
index d71199d..4efe4c6 100644 (file)
@@ -318,7 +318,7 @@ static void ide_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
                }
 
                if (nr_bytes > 0)
-                       ide_complete_rq(drive, 0, nr_bytes);
+                       ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
        }
 }
 
@@ -336,7 +336,7 @@ void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat)
                ide_driveid_update(drive);
        }
 
-       ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
+       ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
 }
 
 /*
@@ -394,7 +394,7 @@ out_end:
        if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
                ide_finish_cmd(drive, cmd, stat);
        else
-               ide_complete_rq(drive, 0, blk_rq_sectors(cmd->rq) << 9);
+               ide_complete_rq(drive, BLK_STS_OK, blk_rq_sectors(cmd->rq) << 9);
        return ide_stopped;
 out_err:
        ide_error_cmd(drive, cmd);
@@ -433,7 +433,6 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
        rq = blk_get_request(drive->queue,
                (cmd->tf_flags & IDE_TFLAG_WRITE) ?
                        REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
-       scsi_req_init(rq);
        ide_req(rq)->type = ATA_PRIV_TASKFILE;
 
        /*
index 6a1849b..57eea5a 100644 (file)
@@ -406,7 +406,7 @@ static int siimage_dma_test_irq(ide_drive_t *drive)
  *     yet.
  */
 
-static int sil_sata_reset_poll(ide_drive_t *drive)
+static blk_status_t sil_sata_reset_poll(ide_drive_t *drive)
 {
        ide_hwif_t *hwif = drive->hwif;
        void __iomem *sata_status_addr
@@ -419,11 +419,11 @@ static int sil_sata_reset_poll(ide_drive_t *drive)
                if ((sata_stat & 0x03) != 0x03) {
                        printk(KERN_WARNING "%s: reset phy dead, status=0x%08x\n",
                                            hwif->name, sata_stat);
-                       return -ENXIO;
+                       return BLK_STS_IOERR;
                }
        }
 
-       return 0;
+       return BLK_STS_OK;
 }
 
 /**
index dd4190b..6066bbf 100644 (file)
@@ -468,13 +468,13 @@ static void meson_sar_adc_unlock(struct iio_dev *indio_dev)
 static void meson_sar_adc_clear_fifo(struct iio_dev *indio_dev)
 {
        struct meson_sar_adc_priv *priv = iio_priv(indio_dev);
-       int count;
+       unsigned int count, tmp;
 
        for (count = 0; count < MESON_SAR_ADC_MAX_FIFO_SIZE; count++) {
                if (!meson_sar_adc_get_fifo_count(indio_dev))
                        break;
 
-               regmap_read(priv->regmap, MESON_SAR_ADC_FIFO_RD, 0);
+               regmap_read(priv->regmap, MESON_SAR_ADC_FIFO_RD, &tmp);
        }
 }
 
index b0c7d8e..6888167 100644 (file)
@@ -718,9 +718,12 @@ static int mxs_lradc_adc_probe(struct platform_device *pdev)
        adc->dev = dev;
 
        iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!iores)
+               return -EINVAL;
+
        adc->base = devm_ioremap(dev, iores->start, resource_size(iores));
-       if (IS_ERR(adc->base))
-               return PTR_ERR(adc->base);
+       if (!adc->base)
+               return -ENOMEM;
 
        init_completion(&adc->completion);
        spin_lock_init(&adc->lock);
index dd99d27..ff03324 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/poll.h>
 #include <linux/iio/buffer.h>
+#include <linux/iio/buffer_impl.h>
 #include <linux/iio/buffer-dma.h>
 #include <linux/dma-mapping.h>
 #include <linux/sizes.h>
index 9fabed4..2b5a320 100644 (file)
@@ -14,6 +14,7 @@
 
 #include <linux/iio/iio.h>
 #include <linux/iio/buffer.h>
+#include <linux/iio/buffer_impl.h>
 #include <linux/iio/buffer-dma.h>
 #include <linux/iio/buffer-dmaengine.h>
 
index 96dabbd..88a7c5d 100644 (file)
@@ -41,6 +41,7 @@ static const int accel_scale[] = {598, 1196, 2392, 4785};
 static const struct inv_mpu6050_reg_map reg_set_6500 = {
        .sample_rate_div        = INV_MPU6050_REG_SAMPLE_RATE_DIV,
        .lpf                    = INV_MPU6050_REG_CONFIG,
+       .accel_lpf              = INV_MPU6500_REG_ACCEL_CONFIG_2,
        .user_ctrl              = INV_MPU6050_REG_USER_CTRL,
        .fifo_en                = INV_MPU6050_REG_FIFO_EN,
        .gyro_config            = INV_MPU6050_REG_GYRO_CONFIG,
@@ -211,6 +212,37 @@ int inv_mpu6050_set_power_itg(struct inv_mpu6050_state *st, bool power_on)
 EXPORT_SYMBOL_GPL(inv_mpu6050_set_power_itg);
 
 /**
+ *  inv_mpu6050_set_lpf_regs() - set low pass filter registers, chip dependent
+ *
+ *  MPU60xx/MPU9150 use only 1 register for accelerometer + gyroscope
+ *  MPU6500 and above have a dedicated register for accelerometer
+ */
+static int inv_mpu6050_set_lpf_regs(struct inv_mpu6050_state *st,
+                                   enum inv_mpu6050_filter_e val)
+{
+       int result;
+
+       result = regmap_write(st->map, st->reg->lpf, val);
+       if (result)
+               return result;
+
+       switch (st->chip_type) {
+       case INV_MPU6050:
+       case INV_MPU6000:
+       case INV_MPU9150:
+               /* old chips, nothing to do */
+               result = 0;
+               break;
+       default:
+               /* set accel lpf */
+               result = regmap_write(st->map, st->reg->accel_lpf, val);
+               break;
+       }
+
+       return result;
+}
+
+/**
  *  inv_mpu6050_init_config() - Initialize hardware, disable FIFO.
  *
  *  Initial configuration:
@@ -233,8 +265,7 @@ static int inv_mpu6050_init_config(struct iio_dev *indio_dev)
        if (result)
                return result;
 
-       d = INV_MPU6050_FILTER_20HZ;
-       result = regmap_write(st->map, st->reg->lpf, d);
+       result = inv_mpu6050_set_lpf_regs(st, INV_MPU6050_FILTER_20HZ);
        if (result)
                return result;
 
@@ -537,6 +568,8 @@ error_write_raw:
  *                  would be alising. This function basically search for the
  *                  correct low pass parameters based on the fifo rate, e.g,
  *                  sampling frequency.
+ *
+ *  lpf is set automatically when setting sampling rate to avoid any aliases.
  */
 static int inv_mpu6050_set_lpf(struct inv_mpu6050_state *st, int rate)
 {
@@ -552,7 +585,7 @@ static int inv_mpu6050_set_lpf(struct inv_mpu6050_state *st, int rate)
        while ((h < hz[i]) && (i < ARRAY_SIZE(d) - 1))
                i++;
        data = d[i];
-       result = regmap_write(st->map, st->reg->lpf, data);
+       result = inv_mpu6050_set_lpf_regs(st, data);
        if (result)
                return result;
        st->chip_config.lpf = data;
index ef13de7..953a0c0 100644 (file)
@@ -28,6 +28,7 @@
  *  struct inv_mpu6050_reg_map - Notable registers.
  *  @sample_rate_div:  Divider applied to gyro output rate.
  *  @lpf:              Configures internal low pass filter.
+ *  @accel_lpf:                Configures accelerometer low pass filter.
  *  @user_ctrl:                Enables/resets the FIFO.
  *  @fifo_en:          Determines which data will appear in FIFO.
  *  @gyro_config:      gyro config register.
@@ -47,6 +48,7 @@
 struct inv_mpu6050_reg_map {
        u8 sample_rate_div;
        u8 lpf;
+       u8 accel_lpf;
        u8 user_ctrl;
        u8 fifo_en;
        u8 gyro_config;
@@ -188,6 +190,7 @@ struct inv_mpu6050_state {
 #define INV_MPU6050_FIFO_THRESHOLD           500
 
 /* mpu6500 registers */
+#define INV_MPU6500_REG_ACCEL_CONFIG_2      0x1D
 #define INV_MPU6500_REG_ACCEL_OFFSET        0x77
 
 /* delay time in milliseconds */
index 02971e2..ece6926 100644 (file)
@@ -449,12 +449,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
                return ret;
 
        rt = (struct rt6_info *)dst;
-       if (ipv6_addr_any(&fl6.saddr)) {
-               ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
-                                        &fl6.daddr, 0, &fl6.saddr);
-               if (ret)
-                       goto put;
-
+       if (ipv6_addr_any(&src_in->sin6_addr)) {
                src_in->sin6_family = AF_INET6;
                src_in->sin6_addr = fl6.saddr;
        }
@@ -471,9 +466,6 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 
        *pdst = dst;
        return 0;
-put:
-       dst_release(dst);
-       return ret;
 }
 #else
 static int addr6_resolve(struct sockaddr_in6 *src_in,
index ebf7be8..0877283 100644 (file)
 #define BNXT_RE_MAX_SRQC_COUNT         (64 * 1024)
 #define BNXT_RE_MAX_CQ_COUNT           (64 * 1024)
 
+#define BNXT_RE_UD_QP_HW_STALL         0x400000
+
+#define BNXT_RE_RQ_WQE_THRESHOLD       32
+
 struct bnxt_re_work {
        struct work_struct      work;
        unsigned long           event;
index 7ba9e69..c7bd683 100644 (file)
 #include "ib_verbs.h"
 #include <rdma/bnxt_re-abi.h>
 
+static int __from_ib_access_flags(int iflags)
+{
+       int qflags = 0;
+
+       if (iflags & IB_ACCESS_LOCAL_WRITE)
+               qflags |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
+       if (iflags & IB_ACCESS_REMOTE_READ)
+               qflags |= BNXT_QPLIB_ACCESS_REMOTE_READ;
+       if (iflags & IB_ACCESS_REMOTE_WRITE)
+               qflags |= BNXT_QPLIB_ACCESS_REMOTE_WRITE;
+       if (iflags & IB_ACCESS_REMOTE_ATOMIC)
+               qflags |= BNXT_QPLIB_ACCESS_REMOTE_ATOMIC;
+       if (iflags & IB_ACCESS_MW_BIND)
+               qflags |= BNXT_QPLIB_ACCESS_MW_BIND;
+       if (iflags & IB_ZERO_BASED)
+               qflags |= BNXT_QPLIB_ACCESS_ZERO_BASED;
+       if (iflags & IB_ACCESS_ON_DEMAND)
+               qflags |= BNXT_QPLIB_ACCESS_ON_DEMAND;
+       return qflags;
+};
+
+static enum ib_access_flags __to_ib_access_flags(int qflags)
+{
+       enum ib_access_flags iflags = 0;
+
+       if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
+               iflags |= IB_ACCESS_LOCAL_WRITE;
+       if (qflags & BNXT_QPLIB_ACCESS_REMOTE_WRITE)
+               iflags |= IB_ACCESS_REMOTE_WRITE;
+       if (qflags & BNXT_QPLIB_ACCESS_REMOTE_READ)
+               iflags |= IB_ACCESS_REMOTE_READ;
+       if (qflags & BNXT_QPLIB_ACCESS_REMOTE_ATOMIC)
+               iflags |= IB_ACCESS_REMOTE_ATOMIC;
+       if (qflags & BNXT_QPLIB_ACCESS_MW_BIND)
+               iflags |= IB_ACCESS_MW_BIND;
+       if (qflags & BNXT_QPLIB_ACCESS_ZERO_BASED)
+               iflags |= IB_ZERO_BASED;
+       if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
+               iflags |= IB_ACCESS_ON_DEMAND;
+       return iflags;
+};
+
 static int bnxt_re_build_sgl(struct ib_sge *ib_sg_list,
                             struct bnxt_qplib_sge *sg_list, int num)
 {
@@ -149,8 +191,8 @@ int bnxt_re_query_device(struct ib_device *ibdev,
        ib_attr->max_total_mcast_qp_attach = 0;
        ib_attr->max_ah = dev_attr->max_ah;
 
-       ib_attr->max_fmr = dev_attr->max_fmr;
-       ib_attr->max_map_per_fmr = 1;   /* ? */
+       ib_attr->max_fmr = 0;
+       ib_attr->max_map_per_fmr = 0;
 
        ib_attr->max_srq = dev_attr->max_srq;
        ib_attr->max_srq_wr = dev_attr->max_srq_wqes;
@@ -410,6 +452,158 @@ enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
        return IB_LINK_LAYER_ETHERNET;
 }
 
+#define        BNXT_RE_FENCE_PBL_SIZE  DIV_ROUND_UP(BNXT_RE_FENCE_BYTES, PAGE_SIZE)
+
+static void bnxt_re_create_fence_wqe(struct bnxt_re_pd *pd)
+{
+       struct bnxt_re_fence_data *fence = &pd->fence;
+       struct ib_mr *ib_mr = &fence->mr->ib_mr;
+       struct bnxt_qplib_swqe *wqe = &fence->bind_wqe;
+
+       memset(wqe, 0, sizeof(*wqe));
+       wqe->type = BNXT_QPLIB_SWQE_TYPE_BIND_MW;
+       wqe->wr_id = BNXT_QPLIB_FENCE_WRID;
+       wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+       wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+       wqe->bind.zero_based = false;
+       wqe->bind.parent_l_key = ib_mr->lkey;
+       wqe->bind.va = (u64)(unsigned long)fence->va;
+       wqe->bind.length = fence->size;
+       wqe->bind.access_cntl = __from_ib_access_flags(IB_ACCESS_REMOTE_READ);
+       wqe->bind.mw_type = SQ_BIND_MW_TYPE_TYPE1;
+
+       /* Save the initial rkey in fence structure for now;
+        * wqe->bind.r_key will be set at (re)bind time.
+        */
+       fence->bind_rkey = ib_inc_rkey(fence->mw->rkey);
+}
+
+static int bnxt_re_bind_fence_mw(struct bnxt_qplib_qp *qplib_qp)
+{
+       struct bnxt_re_qp *qp = container_of(qplib_qp, struct bnxt_re_qp,
+                                            qplib_qp);
+       struct ib_pd *ib_pd = qp->ib_qp.pd;
+       struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+       struct bnxt_re_fence_data *fence = &pd->fence;
+       struct bnxt_qplib_swqe *fence_wqe = &fence->bind_wqe;
+       struct bnxt_qplib_swqe wqe;
+       int rc;
+
+       memcpy(&wqe, fence_wqe, sizeof(wqe));
+       wqe.bind.r_key = fence->bind_rkey;
+       fence->bind_rkey = ib_inc_rkey(fence->bind_rkey);
+
+       dev_dbg(rdev_to_dev(qp->rdev),
+               "Posting bind fence-WQE: rkey: %#x QP: %d PD: %p\n",
+               wqe.bind.r_key, qp->qplib_qp.id, pd);
+       rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+       if (rc) {
+               dev_err(rdev_to_dev(qp->rdev), "Failed to bind fence-WQE\n");
+               return rc;
+       }
+       bnxt_qplib_post_send_db(&qp->qplib_qp);
+
+       return rc;
+}
+
+static void bnxt_re_destroy_fence_mr(struct bnxt_re_pd *pd)
+{
+       struct bnxt_re_fence_data *fence = &pd->fence;
+       struct bnxt_re_dev *rdev = pd->rdev;
+       struct device *dev = &rdev->en_dev->pdev->dev;
+       struct bnxt_re_mr *mr = fence->mr;
+
+       if (fence->mw) {
+               bnxt_re_dealloc_mw(fence->mw);
+               fence->mw = NULL;
+       }
+       if (mr) {
+               if (mr->ib_mr.rkey)
+                       bnxt_qplib_dereg_mrw(&rdev->qplib_res, &mr->qplib_mr,
+                                            true);
+               if (mr->ib_mr.lkey)
+                       bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+               kfree(mr);
+               fence->mr = NULL;
+       }
+       if (fence->dma_addr) {
+               dma_unmap_single(dev, fence->dma_addr, BNXT_RE_FENCE_BYTES,
+                                DMA_BIDIRECTIONAL);
+               fence->dma_addr = 0;
+       }
+}
+
+static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
+{
+       int mr_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_MW_BIND;
+       struct bnxt_re_fence_data *fence = &pd->fence;
+       struct bnxt_re_dev *rdev = pd->rdev;
+       struct device *dev = &rdev->en_dev->pdev->dev;
+       struct bnxt_re_mr *mr = NULL;
+       dma_addr_t dma_addr = 0;
+       struct ib_mw *mw;
+       u64 pbl_tbl;
+       int rc;
+
+       dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES,
+                                 DMA_BIDIRECTIONAL);
+       rc = dma_mapping_error(dev, dma_addr);
+       if (rc) {
+               dev_err(rdev_to_dev(rdev), "Failed to dma-map fence-MR-mem\n");
+               rc = -EIO;
+               fence->dma_addr = 0;
+               goto fail;
+       }
+       fence->dma_addr = dma_addr;
+
+       /* Allocate a MR */
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+       if (!mr) {
+               rc = -ENOMEM;
+               goto fail;
+       }
+       fence->mr = mr;
+       mr->rdev = rdev;
+       mr->qplib_mr.pd = &pd->qplib_pd;
+       mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+       mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+       rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+       if (rc) {
+               dev_err(rdev_to_dev(rdev), "Failed to alloc fence-HW-MR\n");
+               goto fail;
+       }
+
+       /* Register MR */
+       mr->ib_mr.lkey = mr->qplib_mr.lkey;
+       mr->qplib_mr.va = (u64)(unsigned long)fence->va;
+       mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES;
+       pbl_tbl = dma_addr;
+       rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl,
+                              BNXT_RE_FENCE_PBL_SIZE, false);
+       if (rc) {
+               dev_err(rdev_to_dev(rdev), "Failed to register fence-MR\n");
+               goto fail;
+       }
+       mr->ib_mr.rkey = mr->qplib_mr.rkey;
+
+       /* Create a fence MW only for kernel consumers */
+       mw = bnxt_re_alloc_mw(&pd->ib_pd, IB_MW_TYPE_1, NULL);
+       if (!mw) {
+               dev_err(rdev_to_dev(rdev),
+                       "Failed to create fence-MW for PD: %p\n", pd);
+               rc = -EINVAL;
+               goto fail;
+       }
+       fence->mw = mw;
+
+       bnxt_re_create_fence_wqe(pd);
+       return 0;
+
+fail:
+       bnxt_re_destroy_fence_mr(pd);
+       return rc;
+}
+
 /* Protection Domains */
 int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
 {
@@ -417,6 +611,7 @@ int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
        struct bnxt_re_dev *rdev = pd->rdev;
        int rc;
 
+       bnxt_re_destroy_fence_mr(pd);
        if (ib_pd->uobject && pd->dpi.dbr) {
                struct ib_ucontext *ib_uctx = ib_pd->uobject->context;
                struct bnxt_re_ucontext *ucntx;
@@ -498,6 +693,10 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
                }
        }
 
+       if (!udata)
+               if (bnxt_re_create_fence_mr(pd))
+                       dev_warn(rdev_to_dev(rdev),
+                                "Failed to create Fence-MR\n");
        return &pd->ib_pd;
 dbfail:
        (void)bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
@@ -849,12 +1048,16 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
        /* Shadow QP SQ depth should be same as QP1 RQ depth */
        qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe;
        qp->qplib_qp.sq.max_sge = 2;
+       /* Q full delta can be 1 since it is internal QP */
+       qp->qplib_qp.sq.q_full_delta = 1;
 
        qp->qplib_qp.scq = qp1_qp->scq;
        qp->qplib_qp.rcq = qp1_qp->rcq;
 
        qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe;
        qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge;
+       /* Q full delta can be 1 since it is internal QP */
+       qp->qplib_qp.rq.q_full_delta = 1;
 
        qp->qplib_qp.mtu = qp1_qp->mtu;
 
@@ -917,10 +1120,6 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
        qp->qplib_qp.sig_type = ((qp_init_attr->sq_sig_type ==
                                  IB_SIGNAL_ALL_WR) ? true : false);
 
-       entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
-       qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
-                                       dev_attr->max_qp_wqes + 1);
-
        qp->qplib_qp.sq.max_sge = qp_init_attr->cap.max_send_sge;
        if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges)
                qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges;
@@ -959,6 +1158,9 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
                qp->qplib_qp.rq.max_wqe = min_t(u32, entries,
                                                dev_attr->max_qp_wqes + 1);
 
+               qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
+                                               qp_init_attr->cap.max_recv_wr;
+
                qp->qplib_qp.rq.max_sge = qp_init_attr->cap.max_recv_sge;
                if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
                        qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
@@ -967,6 +1169,12 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
        qp->qplib_qp.mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
 
        if (qp_init_attr->qp_type == IB_QPT_GSI) {
+               /* Allocate 1 more than what's provided */
+               entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
+               qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
+                                               dev_attr->max_qp_wqes + 1);
+               qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
+                                               qp_init_attr->cap.max_send_wr;
                qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
                if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
                        qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
@@ -1006,6 +1214,22 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
                }
 
        } else {
+               /* Allocate 128 + 1 more than what's provided */
+               entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr +
+                                            BNXT_QPLIB_RESERVED_QP_WRS + 1);
+               qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
+                                               dev_attr->max_qp_wqes +
+                                               BNXT_QPLIB_RESERVED_QP_WRS + 1);
+               qp->qplib_qp.sq.q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1;
+
+               /*
+                * Reserving one slot for Phantom WQE. Application can
+                * post one extra entry in this case. But allowing this to avoid
+                * unexpected Queue full condition
+                */
+
+               qp->qplib_qp.sq.q_full_delta -= 1;
+
                qp->qplib_qp.max_rd_atomic = dev_attr->max_qp_rd_atom;
                qp->qplib_qp.max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom;
                if (udata) {
@@ -1025,6 +1249,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 
        qp->ib_qp.qp_num = qp->qplib_qp.id;
        spin_lock_init(&qp->sq_lock);
+       spin_lock_init(&qp->rq_lock);
 
        if (udata) {
                struct bnxt_re_qp_resp resp;
@@ -1129,48 +1354,6 @@ static enum ib_mtu __to_ib_mtu(u32 mtu)
        }
 }
 
-static int __from_ib_access_flags(int iflags)
-{
-       int qflags = 0;
-
-       if (iflags & IB_ACCESS_LOCAL_WRITE)
-               qflags |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
-       if (iflags & IB_ACCESS_REMOTE_READ)
-               qflags |= BNXT_QPLIB_ACCESS_REMOTE_READ;
-       if (iflags & IB_ACCESS_REMOTE_WRITE)
-               qflags |= BNXT_QPLIB_ACCESS_REMOTE_WRITE;
-       if (iflags & IB_ACCESS_REMOTE_ATOMIC)
-               qflags |= BNXT_QPLIB_ACCESS_REMOTE_ATOMIC;
-       if (iflags & IB_ACCESS_MW_BIND)
-               qflags |= BNXT_QPLIB_ACCESS_MW_BIND;
-       if (iflags & IB_ZERO_BASED)
-               qflags |= BNXT_QPLIB_ACCESS_ZERO_BASED;
-       if (iflags & IB_ACCESS_ON_DEMAND)
-               qflags |= BNXT_QPLIB_ACCESS_ON_DEMAND;
-       return qflags;
-};
-
-static enum ib_access_flags __to_ib_access_flags(int qflags)
-{
-       enum ib_access_flags iflags = 0;
-
-       if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
-               iflags |= IB_ACCESS_LOCAL_WRITE;
-       if (qflags & BNXT_QPLIB_ACCESS_REMOTE_WRITE)
-               iflags |= IB_ACCESS_REMOTE_WRITE;
-       if (qflags & BNXT_QPLIB_ACCESS_REMOTE_READ)
-               iflags |= IB_ACCESS_REMOTE_READ;
-       if (qflags & BNXT_QPLIB_ACCESS_REMOTE_ATOMIC)
-               iflags |= IB_ACCESS_REMOTE_ATOMIC;
-       if (qflags & BNXT_QPLIB_ACCESS_MW_BIND)
-               iflags |= IB_ACCESS_MW_BIND;
-       if (qflags & BNXT_QPLIB_ACCESS_ZERO_BASED)
-               iflags |= IB_ZERO_BASED;
-       if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
-               iflags |= IB_ACCESS_ON_DEMAND;
-       return iflags;
-};
-
 static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev,
                                    struct bnxt_re_qp *qp1_qp,
                                    int qp_attr_mask)
@@ -1378,11 +1561,21 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
                entries = roundup_pow_of_two(qp_attr->cap.max_send_wr);
                qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
                                                dev_attr->max_qp_wqes + 1);
+               qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
+                                               qp_attr->cap.max_send_wr;
+               /*
+                * Reserving one slot for Phantom WQE. Some application can
+                * post one extra entry in this case. Allowing this to avoid
+                * unexpected Queue full condition
+                */
+               qp->qplib_qp.sq.q_full_delta -= 1;
                qp->qplib_qp.sq.max_sge = qp_attr->cap.max_send_sge;
                if (qp->qplib_qp.rq.max_wqe) {
                        entries = roundup_pow_of_two(qp_attr->cap.max_recv_wr);
                        qp->qplib_qp.rq.max_wqe =
                                min_t(u32, entries, dev_attr->max_qp_wqes + 1);
+                       qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
+                                                      qp_attr->cap.max_recv_wr;
                        qp->qplib_qp.rq.max_sge = qp_attr->cap.max_recv_sge;
                } else {
                        /* SRQ was used prior, just ignore the RQ caps */
@@ -1883,6 +2076,22 @@ static int bnxt_re_copy_wr_payload(struct bnxt_re_dev *rdev,
        return payload_sz;
 }
 
+static void bnxt_ud_qp_hw_stall_workaround(struct bnxt_re_qp *qp)
+{
+       if ((qp->ib_qp.qp_type == IB_QPT_UD ||
+            qp->ib_qp.qp_type == IB_QPT_GSI ||
+            qp->ib_qp.qp_type == IB_QPT_RAW_ETHERTYPE) &&
+            qp->qplib_qp.wqe_cnt == BNXT_RE_UD_QP_HW_STALL) {
+               int qp_attr_mask;
+               struct ib_qp_attr qp_attr;
+
+               qp_attr_mask = IB_QP_STATE;
+               qp_attr.qp_state = IB_QPS_RTS;
+               bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, qp_attr_mask, NULL);
+               qp->qplib_qp.wqe_cnt = 0;
+       }
+}
+
 static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
                                       struct bnxt_re_qp *qp,
                                struct ib_send_wr *wr)
@@ -1928,6 +2137,7 @@ bad:
                wr = wr->next;
        }
        bnxt_qplib_post_send_db(&qp->qplib_qp);
+       bnxt_ud_qp_hw_stall_workaround(qp);
        spin_unlock_irqrestore(&qp->sq_lock, flags);
        return rc;
 }
@@ -2024,6 +2234,7 @@ bad:
                wr = wr->next;
        }
        bnxt_qplib_post_send_db(&qp->qplib_qp);
+       bnxt_ud_qp_hw_stall_workaround(qp);
        spin_unlock_irqrestore(&qp->sq_lock, flags);
 
        return rc;
@@ -2071,7 +2282,10 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr,
        struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
        struct bnxt_qplib_swqe wqe;
        int rc = 0, payload_sz = 0;
+       unsigned long flags;
+       u32 count = 0;
 
+       spin_lock_irqsave(&qp->rq_lock, flags);
        while (wr) {
                /* House keeping */
                memset(&wqe, 0, sizeof(wqe));
@@ -2100,9 +2314,21 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr,
                        *bad_wr = wr;
                        break;
                }
+
+               /* Ring DB if the RQEs posted reaches a threshold value */
+               if (++count >= BNXT_RE_RQ_WQE_THRESHOLD) {
+                       bnxt_qplib_post_recv_db(&qp->qplib_qp);
+                       count = 0;
+               }
+
                wr = wr->next;
        }
-       bnxt_qplib_post_recv_db(&qp->qplib_qp);
+
+       if (count)
+               bnxt_qplib_post_recv_db(&qp->qplib_qp);
+
+       spin_unlock_irqrestore(&qp->rq_lock, flags);
+
        return rc;
 }
 
@@ -2643,12 +2869,36 @@ static void bnxt_re_process_res_ud_wc(struct ib_wc *wc,
                wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
 }
 
+static int send_phantom_wqe(struct bnxt_re_qp *qp)
+{
+       struct bnxt_qplib_qp *lib_qp = &qp->qplib_qp;
+       unsigned long flags;
+       int rc = 0;
+
+       spin_lock_irqsave(&qp->sq_lock, flags);
+
+       rc = bnxt_re_bind_fence_mw(lib_qp);
+       if (!rc) {
+               lib_qp->sq.phantom_wqe_cnt++;
+               dev_dbg(&lib_qp->sq.hwq.pdev->dev,
+                       "qp %#x sq->prod %#x sw_prod %#x phantom_wqe_cnt %d\n",
+                       lib_qp->id, lib_qp->sq.hwq.prod,
+                       HWQ_CMP(lib_qp->sq.hwq.prod, &lib_qp->sq.hwq),
+                       lib_qp->sq.phantom_wqe_cnt);
+       }
+
+       spin_unlock_irqrestore(&qp->sq_lock, flags);
+       return rc;
+}
+
 int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
 {
        struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
        struct bnxt_re_qp *qp;
        struct bnxt_qplib_cqe *cqe;
        int i, ncqe, budget;
+       struct bnxt_qplib_q *sq;
+       struct bnxt_qplib_qp *lib_qp;
        u32 tbl_idx;
        struct bnxt_re_sqp_entries *sqp_entry = NULL;
        unsigned long flags;
@@ -2661,7 +2911,21 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
        }
        cqe = &cq->cql[0];
        while (budget) {
-               ncqe = bnxt_qplib_poll_cq(&cq->qplib_cq, cqe, budget);
+               lib_qp = NULL;
+               ncqe = bnxt_qplib_poll_cq(&cq->qplib_cq, cqe, budget, &lib_qp);
+               if (lib_qp) {
+                       sq = &lib_qp->sq;
+                       if (sq->send_phantom) {
+                               qp = container_of(lib_qp,
+                                                 struct bnxt_re_qp, qplib_qp);
+                               if (send_phantom_wqe(qp) == -ENOMEM)
+                                       dev_err(rdev_to_dev(cq->rdev),
+                                               "Phantom failed! Scheduled to send again\n");
+                               else
+                                       sq->send_phantom = false;
+                       }
+               }
+
                if (!ncqe)
                        break;
 
@@ -2822,6 +3086,12 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
        struct bnxt_re_dev *rdev = mr->rdev;
        int rc;
 
+       rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+       if (rc) {
+               dev_err(rdev_to_dev(rdev), "Dereg MR failed: %#x\n", rc);
+               return rc;
+       }
+
        if (mr->npages && mr->pages) {
                rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
                                                        &mr->qplib_frpl);
@@ -2829,8 +3099,6 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
                mr->npages = 0;
                mr->pages = NULL;
        }
-       rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
-
        if (!IS_ERR_OR_NULL(mr->ib_umem))
                ib_umem_release(mr->ib_umem);
 
@@ -2914,97 +3182,52 @@ fail:
        return ERR_PTR(rc);
 }
 
-/* Fast Memory Regions */
-struct ib_fmr *bnxt_re_alloc_fmr(struct ib_pd *ib_pd, int mr_access_flags,
-                                struct ib_fmr_attr *fmr_attr)
+struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+                              struct ib_udata *udata)
 {
        struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
        struct bnxt_re_dev *rdev = pd->rdev;
-       struct bnxt_re_fmr *fmr;
+       struct bnxt_re_mw *mw;
        int rc;
 
-       if (fmr_attr->max_pages > MAX_PBL_LVL_2_PGS ||
-           fmr_attr->max_maps > rdev->dev_attr.max_map_per_fmr) {
-               dev_err(rdev_to_dev(rdev), "Allocate FMR exceeded Max limit");
+       mw = kzalloc(sizeof(*mw), GFP_KERNEL);
+       if (!mw)
                return ERR_PTR(-ENOMEM);
-       }
-       fmr = kzalloc(sizeof(*fmr), GFP_KERNEL);
-       if (!fmr)
-               return ERR_PTR(-ENOMEM);
-
-       fmr->rdev = rdev;
-       fmr->qplib_fmr.pd = &pd->qplib_pd;
-       fmr->qplib_fmr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+       mw->rdev = rdev;
+       mw->qplib_mw.pd = &pd->qplib_pd;
 
-       rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &fmr->qplib_fmr);
-       if (rc)
+       mw->qplib_mw.type = (type == IB_MW_TYPE_1 ?
+                              CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1 :
+                              CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B);
+       rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mw->qplib_mw);
+       if (rc) {
+               dev_err(rdev_to_dev(rdev), "Allocate MW failed!");
                goto fail;
+       }
+       mw->ib_mw.rkey = mw->qplib_mw.rkey;
 
-       fmr->qplib_fmr.flags = __from_ib_access_flags(mr_access_flags);
-       fmr->ib_fmr.lkey = fmr->qplib_fmr.lkey;
-       fmr->ib_fmr.rkey = fmr->ib_fmr.lkey;
+       atomic_inc(&rdev->mw_count);
+       return &mw->ib_mw;
 
-       atomic_inc(&rdev->mr_count);
-       return &fmr->ib_fmr;
 fail:
-       kfree(fmr);
+       kfree(mw);
        return ERR_PTR(rc);
 }
 
-int bnxt_re_map_phys_fmr(struct ib_fmr *ib_fmr, u64 *page_list, int list_len,
-                        u64 iova)
+int bnxt_re_dealloc_mw(struct ib_mw *ib_mw)
 {
-       struct bnxt_re_fmr *fmr = container_of(ib_fmr, struct bnxt_re_fmr,
-                                            ib_fmr);
-       struct bnxt_re_dev *rdev = fmr->rdev;
+       struct bnxt_re_mw *mw = container_of(ib_mw, struct bnxt_re_mw, ib_mw);
+       struct bnxt_re_dev *rdev = mw->rdev;
        int rc;
 
-       fmr->qplib_fmr.va = iova;
-       fmr->qplib_fmr.total_size = list_len * PAGE_SIZE;
-
-       rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &fmr->qplib_fmr, page_list,
-                              list_len, true);
-       if (rc)
-               dev_err(rdev_to_dev(rdev), "Failed to map FMR for lkey = 0x%x!",
-                       fmr->ib_fmr.lkey);
-       return rc;
-}
-
-int bnxt_re_unmap_fmr(struct list_head *fmr_list)
-{
-       struct bnxt_re_dev *rdev;
-       struct bnxt_re_fmr *fmr;
-       struct ib_fmr *ib_fmr;
-       int rc = 0;
-
-       /* Validate each FMRs inside the fmr_list */
-       list_for_each_entry(ib_fmr, fmr_list, list) {
-               fmr = container_of(ib_fmr, struct bnxt_re_fmr, ib_fmr);
-               rdev = fmr->rdev;
-
-               if (rdev) {
-                       rc = bnxt_qplib_dereg_mrw(&rdev->qplib_res,
-                                                 &fmr->qplib_fmr, true);
-                       if (rc)
-                               break;
-               }
+       rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mw->qplib_mw);
+       if (rc) {
+               dev_err(rdev_to_dev(rdev), "Free MW failed: %#x\n", rc);
+               return rc;
        }
-       return rc;
-}
-
-int bnxt_re_dealloc_fmr(struct ib_fmr *ib_fmr)
-{
-       struct bnxt_re_fmr *fmr = container_of(ib_fmr, struct bnxt_re_fmr,
-                                              ib_fmr);
-       struct bnxt_re_dev *rdev = fmr->rdev;
-       int rc;
 
-       rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &fmr->qplib_fmr);
-       if (rc)
-               dev_err(rdev_to_dev(rdev), "Failed to free FMR");
-
-       kfree(fmr);
-       atomic_dec(&rdev->mr_count);
+       kfree(mw);
+       atomic_dec(&rdev->mw_count);
        return rc;
 }
 
index 5c3d717..6c160f6 100644 (file)
@@ -44,11 +44,23 @@ struct bnxt_re_gid_ctx {
        u32                     refcnt;
 };
 
+#define BNXT_RE_FENCE_BYTES    64
+struct bnxt_re_fence_data {
+       u32 size;
+       u8 va[BNXT_RE_FENCE_BYTES];
+       dma_addr_t dma_addr;
+       struct bnxt_re_mr *mr;
+       struct ib_mw *mw;
+       struct bnxt_qplib_swqe bind_wqe;
+       u32 bind_rkey;
+};
+
 struct bnxt_re_pd {
        struct bnxt_re_dev      *rdev;
        struct ib_pd            ib_pd;
        struct bnxt_qplib_pd    qplib_pd;
        struct bnxt_qplib_dpi   dpi;
+       struct bnxt_re_fence_data fence;
 };
 
 struct bnxt_re_ah {
@@ -62,6 +74,7 @@ struct bnxt_re_qp {
        struct bnxt_re_dev      *rdev;
        struct ib_qp            ib_qp;
        spinlock_t              sq_lock;        /* protect sq */
+       spinlock_t              rq_lock;        /* protect rq */
        struct bnxt_qplib_qp    qplib_qp;
        struct ib_umem          *sumem;
        struct ib_umem          *rumem;
@@ -181,12 +194,9 @@ int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
 struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type,
                               u32 max_num_sg);
 int bnxt_re_dereg_mr(struct ib_mr *mr);
-struct ib_fmr *bnxt_re_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-                                struct ib_fmr_attr *fmr_attr);
-int bnxt_re_map_phys_fmr(struct ib_fmr *fmr, u64 *page_list, int list_len,
-                        u64 iova);
-int bnxt_re_unmap_fmr(struct list_head *fmr_list);
-int bnxt_re_dealloc_fmr(struct ib_fmr *fmr);
+struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+                              struct ib_udata *udata);
+int bnxt_re_dealloc_mw(struct ib_mw *mw);
 struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                                  u64 virt_addr, int mr_access_flags,
                                  struct ib_udata *udata);
index 5d35540..1fce5e7 100644 (file)
@@ -507,10 +507,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
        ibdev->dereg_mr                 = bnxt_re_dereg_mr;
        ibdev->alloc_mr                 = bnxt_re_alloc_mr;
        ibdev->map_mr_sg                = bnxt_re_map_mr_sg;
-       ibdev->alloc_fmr                = bnxt_re_alloc_fmr;
-       ibdev->map_phys_fmr             = bnxt_re_map_phys_fmr;
-       ibdev->unmap_fmr                = bnxt_re_unmap_fmr;
-       ibdev->dealloc_fmr              = bnxt_re_dealloc_fmr;
 
        ibdev->reg_user_mr              = bnxt_re_reg_user_mr;
        ibdev->alloc_ucontext           = bnxt_re_alloc_ucontext;
index 43d08b5..f05500b 100644 (file)
@@ -284,7 +284,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
        struct cmdq_create_qp1 req;
-       struct creq_create_qp1_resp *resp;
+       struct creq_create_qp1_resp resp;
        struct bnxt_qplib_pbl *pbl;
        struct bnxt_qplib_q *sq = &qp->sq;
        struct bnxt_qplib_q *rq = &qp->rq;
@@ -394,31 +394,12 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
        req.pd_id = cpu_to_le32(qp->pd->id);
 
-       resp = (struct creq_create_qp1_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-                                                    NULL, 0);
-       if (!resp) {
-               dev_err(&res->pdev->dev, "QPLIB: FP: CREATE_QP1 send failed");
-               rc = -EINVAL;
-               goto fail;
-       }
-       if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-               /* Cmd timed out */
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP1 timed out");
-               rc = -ETIMEDOUT;
-               goto fail;
-       }
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP1 failed ");
-               dev_err(&rcfw->pdev->dev,
-                       "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-                       resp->status, le16_to_cpu(req.cookie),
-                       le16_to_cpu(resp->cookie));
-               rc = -EINVAL;
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+                                         (void *)&resp, NULL, 0);
+       if (rc)
                goto fail;
-       }
-       qp->id = le32_to_cpu(resp->xid);
+
+       qp->id = le32_to_cpu(resp.xid);
        qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
        sq->flush_in_progress = false;
        rq->flush_in_progress = false;
@@ -442,7 +423,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
        struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr;
        struct cmdq_create_qp req;
-       struct creq_create_qp_resp *resp;
+       struct creq_create_qp_resp resp;
        struct bnxt_qplib_pbl *pbl;
        struct sq_psn_search **psn_search_ptr;
        unsigned long int psn_search, poff = 0;
@@ -627,31 +608,12 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
        }
        req.pd_id = cpu_to_le32(qp->pd->id);
 
-       resp = (struct creq_create_qp_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-                                                    NULL, 0);
-       if (!resp) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP send failed");
-               rc = -EINVAL;
-               goto fail;
-       }
-       if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-               /* Cmd timed out */
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP timed out");
-               rc = -ETIMEDOUT;
-               goto fail;
-       }
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP failed ");
-               dev_err(&rcfw->pdev->dev,
-                       "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-                       resp->status, le16_to_cpu(req.cookie),
-                       le16_to_cpu(resp->cookie));
-               rc = -EINVAL;
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+                                         (void *)&resp, NULL, 0);
+       if (rc)
                goto fail;
-       }
-       qp->id = le32_to_cpu(resp->xid);
+
+       qp->id = le32_to_cpu(resp.xid);
        qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
        sq->flush_in_progress = false;
        rq->flush_in_progress = false;
@@ -769,10 +731,11 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
        struct cmdq_modify_qp req;
-       struct creq_modify_qp_resp *resp;
+       struct creq_modify_qp_resp resp;
        u16 cmd_flags = 0, pkey;
        u32 temp32[4];
        u32 bmask;
+       int rc;
 
        RCFW_CMD_PREP(req, MODIFY_QP, cmd_flags);
 
@@ -862,27 +825,10 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
        req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(qp->vlan_id);
 
-       resp = (struct creq_modify_qp_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-                                                    NULL, 0);
-       if (!resp) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP send failed");
-               return -EINVAL;
-       }
-       if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-               /* Cmd timed out */
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP timed out");
-               return -ETIMEDOUT;
-       }
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP failed ");
-               dev_err(&rcfw->pdev->dev,
-                       "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-                       resp->status, le16_to_cpu(req.cookie),
-                       le16_to_cpu(resp->cookie));
-               return -EINVAL;
-       }
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+                                         (void *)&resp, NULL, 0);
+       if (rc)
+               return rc;
        qp->cur_qp_state = qp->state;
        return 0;
 }
@@ -891,37 +837,26 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
        struct cmdq_query_qp req;
-       struct creq_query_qp_resp *resp;
+       struct creq_query_qp_resp resp;
+       struct bnxt_qplib_rcfw_sbuf *sbuf;
        struct creq_query_qp_resp_sb *sb;
        u16 cmd_flags = 0;
        u32 temp32[4];
-       int i;
+       int i, rc = 0;
 
        RCFW_CMD_PREP(req, QUERY_QP, cmd_flags);
 
+       sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+       if (!sbuf)
+               return -ENOMEM;
+       sb = sbuf->sb;
+
        req.qp_cid = cpu_to_le32(qp->id);
        req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
-       resp = (struct creq_query_qp_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-                                                    (void **)&sb, 0);
-       if (!resp) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP send failed");
-               return -EINVAL;
-       }
-       if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-               /* Cmd timed out */
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP timed out");
-               return -ETIMEDOUT;
-       }
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP failed ");
-               dev_err(&rcfw->pdev->dev,
-                       "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-                       resp->status, le16_to_cpu(req.cookie),
-                       le16_to_cpu(resp->cookie));
-               return -EINVAL;
-       }
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+                                         (void *)sbuf, 0);
+       if (rc)
+               goto bail;
        /* Extract the context from the side buffer */
        qp->state = sb->en_sqd_async_notify_state &
                        CREQ_QUERY_QP_RESP_SB_STATE_MASK;
@@ -976,7 +911,9 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
        qp->dest_qpn = le32_to_cpu(sb->dest_qp_id);
        memcpy(qp->smac, sb->src_mac, 6);
        qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id);
-       return 0;
+bail:
+       bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+       return rc;
 }
 
 static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
@@ -1021,34 +958,18 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
        struct cmdq_destroy_qp req;
-       struct creq_destroy_qp_resp *resp;
+       struct creq_destroy_qp_resp resp;
        unsigned long flags;
        u16 cmd_flags = 0;
+       int rc;
 
        RCFW_CMD_PREP(req, DESTROY_QP, cmd_flags);
 
        req.qp_cid = cpu_to_le32(qp->id);
-       resp = (struct creq_destroy_qp_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-                                                    NULL, 0);
-       if (!resp) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP send failed");
-               return -EINVAL;
-       }
-       if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-               /* Cmd timed out */
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP timed out");
-               return -ETIMEDOUT;
-       }
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP failed ");
-               dev_err(&rcfw->pdev->dev,
-                       "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-                       resp->status, le16_to_cpu(req.cookie),
-                       le16_to_cpu(resp->cookie));
-               return -EINVAL;
-       }
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+                                         (void *)&resp, NULL, 0);
+       if (rc)
+               return rc;
 
        /* Must walk the associated CQs to nullified the QP ptr */
        spin_lock_irqsave(&qp->scq->hwq.lock, flags);
@@ -1162,8 +1083,12 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
                rc = -EINVAL;
                goto done;
        }
-       if (HWQ_CMP((sq->hwq.prod + 1), &sq->hwq) ==
-           HWQ_CMP(sq->hwq.cons, &sq->hwq)) {
+
+       if (bnxt_qplib_queue_full(sq)) {
+               dev_err(&sq->hwq.pdev->dev,
+                       "QPLIB: prod = %#x cons = %#x qdepth = %#x delta = %#x",
+                       sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements,
+                       sq->q_full_delta);
                rc = -ENOMEM;
                goto done;
        }
@@ -1373,6 +1298,9 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
        }
 
        sq->hwq.prod++;
+
+       qp->wqe_cnt++;
+
 done:
        return rc;
 }
@@ -1411,8 +1339,7 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
                rc = -EINVAL;
                goto done;
        }
-       if (HWQ_CMP((rq->hwq.prod + 1), &rq->hwq) ==
-           HWQ_CMP(rq->hwq.cons, &rq->hwq)) {
+       if (bnxt_qplib_queue_full(rq)) {
                dev_err(&rq->hwq.pdev->dev,
                        "QPLIB: FP: QP (0x%x) RQ is full!", qp->id);
                rc = -EINVAL;
@@ -1483,7 +1410,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
        struct cmdq_create_cq req;
-       struct creq_create_cq_resp *resp;
+       struct creq_create_cq_resp resp;
        struct bnxt_qplib_pbl *pbl;
        u16 cmd_flags = 0;
        int rc;
@@ -1525,30 +1452,12 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
                        (cq->cnq_hw_ring_id & CMDQ_CREATE_CQ_CNQ_ID_MASK) <<
                         CMDQ_CREATE_CQ_CNQ_ID_SFT);
 
-       resp = (struct creq_create_cq_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-                                                    NULL, 0);
-       if (!resp) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ send failed");
-               return -EINVAL;
-       }
-       if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-               /* Cmd timed out */
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ timed out");
-               rc = -ETIMEDOUT;
-               goto fail;
-       }
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ failed ");
-               dev_err(&rcfw->pdev->dev,
-                       "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-                       resp->status, le16_to_cpu(req.cookie),
-                       le16_to_cpu(resp->cookie));
-               rc = -EINVAL;
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+                                         (void *)&resp, NULL, 0);
+       if (rc)
                goto fail;
-       }
-       cq->id = le32_to_cpu(resp->xid);
+
+       cq->id = le32_to_cpu(resp.xid);
        cq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem;
        cq->period = BNXT_QPLIB_QUEUE_START_PERIOD;
        init_waitqueue_head(&cq->waitq);
@@ -1566,33 +1475,17 @@ int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
        struct cmdq_destroy_cq req;
-       struct creq_destroy_cq_resp *resp;
+       struct creq_destroy_cq_resp resp;
        u16 cmd_flags = 0;
+       int rc;
 
        RCFW_CMD_PREP(req, DESTROY_CQ, cmd_flags);
 
        req.cq_cid = cpu_to_le32(cq->id);
-       resp = (struct creq_destroy_cq_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-                                                    NULL, 0);
-       if (!resp) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ send failed");
-               return -EINVAL;
-       }
-       if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-               /* Cmd timed out */
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ timed out");
-               return -ETIMEDOUT;
-       }
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ failed ");
-               dev_err(&rcfw->pdev->dev,
-                       "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-                       resp->status, le16_to_cpu(req.cookie),
-                       le16_to_cpu(resp->cookie));
-               return -EINVAL;
-       }
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+                                         (void *)&resp, NULL, 0);
+       if (rc)
+               return rc;
        bnxt_qplib_free_hwq(res->pdev, &cq->hwq);
        return 0;
 }
@@ -1664,14 +1557,113 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
        return rc;
 }
 
+/* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive)
+ *       CQE is track from sw_cq_cons to max_element but valid only if VALID=1
+ */
+static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
+                    u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons)
+{
+       struct bnxt_qplib_q *sq = &qp->sq;
+       struct bnxt_qplib_swq *swq;
+       u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx;
+       struct cq_base *peek_hwcqe, **peek_hw_cqe_ptr;
+       struct cq_req *peek_req_hwcqe;
+       struct bnxt_qplib_qp *peek_qp;
+       struct bnxt_qplib_q *peek_sq;
+       int i, rc = 0;
+
+       /* Normal mode */
+       /* Check for the psn_search marking before completing */
+       swq = &sq->swq[sw_sq_cons];
+       if (swq->psn_search &&
+           le32_to_cpu(swq->psn_search->flags_next_psn) & 0x80000000) {
+               /* Unmark */
+               swq->psn_search->flags_next_psn = cpu_to_le32
+                       (le32_to_cpu(swq->psn_search->flags_next_psn)
+                                    & ~0x80000000);
+               dev_dbg(&cq->hwq.pdev->dev,
+                       "FP: Process Req cq_cons=0x%x qp=0x%x sq cons sw=0x%x cqe=0x%x marked!\n",
+                       cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+               sq->condition = true;
+               sq->send_phantom = true;
+
+               /* TODO: Only ARM if the previous SQE is ARMALL */
+               bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ_ARMALL);
+
+               rc = -EAGAIN;
+               goto out;
+       }
+       if (sq->condition) {
+               /* Peek at the completions */
+               peek_raw_cq_cons = cq->hwq.cons;
+               peek_sw_cq_cons = cq_cons;
+               i = cq->hwq.max_elements;
+               while (i--) {
+                       peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq);
+                       peek_hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
+                       peek_hwcqe = &peek_hw_cqe_ptr[CQE_PG(peek_sw_cq_cons)]
+                                                    [CQE_IDX(peek_sw_cq_cons)];
+                       /* If the next hwcqe is VALID */
+                       if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons,
+                                         cq->hwq.max_elements)) {
+                               /* If the next hwcqe is a REQ */
+                               if ((peek_hwcqe->cqe_type_toggle &
+                                   CQ_BASE_CQE_TYPE_MASK) ==
+                                   CQ_BASE_CQE_TYPE_REQ) {
+                                       peek_req_hwcqe = (struct cq_req *)
+                                                        peek_hwcqe;
+                                       peek_qp = (struct bnxt_qplib_qp *)
+                                               ((unsigned long)
+                                                le64_to_cpu
+                                                (peek_req_hwcqe->qp_handle));
+                                       peek_sq = &peek_qp->sq;
+                                       peek_sq_cons_idx = HWQ_CMP(le16_to_cpu(
+                                               peek_req_hwcqe->sq_cons_idx) - 1
+                                               , &sq->hwq);
+                                       /* If the hwcqe's sq's wr_id matches */
+                                       if (peek_sq == sq &&
+                                           sq->swq[peek_sq_cons_idx].wr_id ==
+                                           BNXT_QPLIB_FENCE_WRID) {
+                                               /*
+                                                *  Unbreak only if the phantom
+                                                *  comes back
+                                                */
+                                               dev_dbg(&cq->hwq.pdev->dev,
+                                                       "FP:Got Phantom CQE");
+                                               sq->condition = false;
+                                               sq->single = true;
+                                               rc = 0;
+                                               goto out;
+                                       }
+                               }
+                               /* Valid but not the phantom, so keep looping */
+                       } else {
+                               /* Not valid yet, just exit and wait */
+                               rc = -EINVAL;
+                               goto out;
+                       }
+                       peek_sw_cq_cons++;
+                       peek_raw_cq_cons++;
+               }
+               dev_err(&cq->hwq.pdev->dev,
+                       "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x",
+                       cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+               rc = -EINVAL;
+       }
+out:
+       return rc;
+}
+
 static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
                                     struct cq_req *hwcqe,
-                                    struct bnxt_qplib_cqe **pcqe, int *budget)
+                                    struct bnxt_qplib_cqe **pcqe, int *budget,
+                                    u32 cq_cons, struct bnxt_qplib_qp **lib_qp)
 {
        struct bnxt_qplib_qp *qp;
        struct bnxt_qplib_q *sq;
        struct bnxt_qplib_cqe *cqe;
-       u32 sw_cons, cqe_cons;
+       u32 sw_sq_cons, cqe_sq_cons;
+       struct bnxt_qplib_swq *swq;
        int rc = 0;
 
        qp = (struct bnxt_qplib_qp *)((unsigned long)
@@ -1683,13 +1675,13 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
        }
        sq = &qp->sq;
 
-       cqe_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
-       if (cqe_cons > sq->hwq.max_elements) {
+       cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
+       if (cqe_sq_cons > sq->hwq.max_elements) {
                dev_err(&cq->hwq.pdev->dev,
                        "QPLIB: FP: CQ Process req reported ");
                dev_err(&cq->hwq.pdev->dev,
                        "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
-                       cqe_cons, sq->hwq.max_elements);
+                       cqe_sq_cons, sq->hwq.max_elements);
                return -EINVAL;
        }
        /* If we were in the middle of flushing the SQ, continue */
@@ -1698,53 +1690,74 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
 
        /* Require to walk the sq's swq to fabricate CQEs for all previously
         * signaled SWQEs due to CQE aggregation from the current sq cons
-        * to the cqe_cons
+        * to the cqe_sq_cons
         */
        cqe = *pcqe;
        while (*budget) {
-               sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
-               if (sw_cons == cqe_cons)
+               sw_sq_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
+               if (sw_sq_cons == cqe_sq_cons)
+                       /* Done */
                        break;
+
+               swq = &sq->swq[sw_sq_cons];
                memset(cqe, 0, sizeof(*cqe));
                cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
                cqe->qp_handle = (u64)(unsigned long)qp;
                cqe->src_qp = qp->id;
-               cqe->wr_id = sq->swq[sw_cons].wr_id;
-               cqe->type = sq->swq[sw_cons].type;
+               cqe->wr_id = swq->wr_id;
+               if (cqe->wr_id == BNXT_QPLIB_FENCE_WRID)
+                       goto skip;
+               cqe->type = swq->type;
 
                /* For the last CQE, check for status.  For errors, regardless
                 * of the request being signaled or not, it must complete with
                 * the hwcqe error status
                 */
-               if (HWQ_CMP((sw_cons + 1), &sq->hwq) == cqe_cons &&
+               if (HWQ_CMP((sw_sq_cons + 1), &sq->hwq) == cqe_sq_cons &&
                    hwcqe->status != CQ_REQ_STATUS_OK) {
                        cqe->status = hwcqe->status;
                        dev_err(&cq->hwq.pdev->dev,
                                "QPLIB: FP: CQ Processed Req ");
                        dev_err(&cq->hwq.pdev->dev,
                                "QPLIB: wr_id[%d] = 0x%llx with status 0x%x",
-                               sw_cons, cqe->wr_id, cqe->status);
+                               sw_sq_cons, cqe->wr_id, cqe->status);
                        cqe++;
                        (*budget)--;
                        sq->flush_in_progress = true;
                        /* Must block new posting of SQ and RQ */
                        qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+                       sq->condition = false;
+                       sq->single = false;
                } else {
-                       if (sq->swq[sw_cons].flags &
-                           SQ_SEND_FLAGS_SIGNAL_COMP) {
+                       if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
+                               /* Before we complete, do WA 9060 */
+                               if (do_wa9060(qp, cq, cq_cons, sw_sq_cons,
+                                             cqe_sq_cons)) {
+                                       *lib_qp = qp;
+                                       goto out;
+                               }
                                cqe->status = CQ_REQ_STATUS_OK;
                                cqe++;
                                (*budget)--;
                        }
                }
+skip:
                sq->hwq.cons++;
+               if (sq->single)
+                       break;
        }
+out:
        *pcqe = cqe;
-       if (!*budget && HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_cons) {
+       if (HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_sq_cons) {
                /* Out of budget */
                rc = -EAGAIN;
                goto done;
        }
+       /*
+        * Back to normal completion mode only after it has completed all of
+        * the WC for this CQE
+        */
+       sq->single = false;
        if (!sq->flush_in_progress)
                goto done;
 flush:
@@ -2074,7 +2087,7 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq,
 }
 
 int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
-                      int num_cqes)
+                      int num_cqes, struct bnxt_qplib_qp **lib_qp)
 {
        struct cq_base *hw_cqe, **hw_cqe_ptr;
        unsigned long flags;
@@ -2099,7 +2112,8 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
                case CQ_BASE_CQE_TYPE_REQ:
                        rc = bnxt_qplib_cq_process_req(cq,
                                                       (struct cq_req *)hw_cqe,
-                                                      &cqe, &budget);
+                                                      &cqe, &budget,
+                                                      sw_cons, lib_qp);
                        break;
                case CQ_BASE_CQE_TYPE_RES_RC:
                        rc = bnxt_qplib_cq_process_res_rc(cq,
index f0150f8..36b7b7d 100644 (file)
@@ -88,6 +88,7 @@ struct bnxt_qplib_swq {
 
 struct bnxt_qplib_swqe {
        /* General */
+#define        BNXT_QPLIB_FENCE_WRID   0x46454E43      /* "FENC" */
        u64                             wr_id;
        u8                              reqs_type;
        u8                              type;
@@ -216,9 +217,16 @@ struct bnxt_qplib_q {
        struct scatterlist              *sglist;
        u32                             nmap;
        u32                             max_wqe;
+       u16                             q_full_delta;
        u16                             max_sge;
        u32                             psn;
        bool                            flush_in_progress;
+       bool                            condition;
+       bool                            single;
+       bool                            send_phantom;
+       u32                             phantom_wqe_cnt;
+       u32                             phantom_cqe_cnt;
+       u32                             next_cq_cons;
 };
 
 struct bnxt_qplib_qp {
@@ -242,6 +250,7 @@ struct bnxt_qplib_qp {
        u8                              timeout;
        u8                              retry_cnt;
        u8                              rnr_retry;
+       u64                             wqe_cnt;
        u32                             min_rnr_timer;
        u32                             max_rd_atomic;
        u32                             max_dest_rd_atomic;
@@ -301,6 +310,13 @@ struct bnxt_qplib_qp {
        (!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) ==         \
           !((raw_cons) & (cp_bit)))
 
+static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *qplib_q)
+{
+       return HWQ_CMP((qplib_q->hwq.prod + qplib_q->q_full_delta),
+                      &qplib_q->hwq) == HWQ_CMP(qplib_q->hwq.cons,
+                                                &qplib_q->hwq);
+}
+
 struct bnxt_qplib_cqe {
        u8                              status;
        u8                              type;
@@ -432,7 +448,7 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
 int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
-                      int num);
+                      int num, struct bnxt_qplib_qp **qp);
 void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type);
 void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
 int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq);
index 23fb726..16e4275 100644 (file)
 #include <linux/spinlock.h>
 #include <linux/pci.h>
 #include <linux/prefetch.h>
+#include <linux/delay.h>
+
 #include "roce_hsi.h"
 #include "qplib_res.h"
 #include "qplib_rcfw.h"
 static void bnxt_qplib_service_creq(unsigned long data);
 
 /* Hardware communication channel */
-int bnxt_qplib_rcfw_wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
 {
        u16 cbit;
        int rc;
 
-       cookie &= RCFW_MAX_COOKIE_VALUE;
        cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
-       if (!test_bit(cbit, rcfw->cmdq_bitmap))
-               dev_warn(&rcfw->pdev->dev,
-                        "QPLIB: CMD bit %d for cookie 0x%x is not set?",
-                        cbit, cookie);
-
        rc = wait_event_timeout(rcfw->waitq,
                                !test_bit(cbit, rcfw->cmdq_bitmap),
                                msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS));
-       if (!rc) {
-               dev_warn(&rcfw->pdev->dev,
-                        "QPLIB: Bono Error: timeout %d msec, msg {0x%x}\n",
-                        RCFW_CMD_WAIT_TIME_MS, cookie);
-       }
-
-       return rc;
+       return rc ? 0 : -ETIMEDOUT;
 };
 
-int bnxt_qplib_rcfw_block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
 {
-       u32 count = -1;
+       u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT;
        u16 cbit;
 
-       cookie &= RCFW_MAX_COOKIE_VALUE;
        cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
        if (!test_bit(cbit, rcfw->cmdq_bitmap))
                goto done;
        do {
+               mdelay(1); /* 1m sec */
                bnxt_qplib_service_creq((unsigned long)rcfw);
        } while (test_bit(cbit, rcfw->cmdq_bitmap) && --count);
 done:
-       return count;
+       return count ? 0 : -ETIMEDOUT;
 };
 
-void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
-                                  struct cmdq_base *req, void **crsbe,
-                                  u8 is_block)
+static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
+                         struct creq_base *resp, void *sb, u8 is_block)
 {
-       struct bnxt_qplib_crsq *crsq = &rcfw->crsq;
        struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr;
        struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
-       struct bnxt_qplib_hwq *crsb = &rcfw->crsb;
-       struct bnxt_qplib_crsqe *crsqe = NULL;
-       struct bnxt_qplib_crsbe **crsb_ptr;
+       struct bnxt_qplib_crsq *crsqe;
        u32 sw_prod, cmdq_prod;
-       u8 retry_cnt = 0xFF;
-       dma_addr_t dma_addr;
        unsigned long flags;
        u32 size, opcode;
        u16 cookie, cbit;
        int pg, idx;
        u8 *preq;
 
-retry:
        opcode = req->opcode;
        if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
            (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
@@ -112,63 +95,50 @@ retry:
                dev_err(&rcfw->pdev->dev,
                        "QPLIB: RCFW not initialized, reject opcode 0x%x",
                        opcode);
-               return NULL;
+               return -EINVAL;
        }
 
        if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
            opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
                dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!");
-               return NULL;
+               return -EINVAL;
        }
 
        /* Cmdq are in 16-byte units, each request can consume 1 or more
         * cmdqe
         */
        spin_lock_irqsave(&cmdq->lock, flags);
-       if (req->cmd_size > cmdq->max_elements -
-           ((HWQ_CMP(cmdq->prod, cmdq) - HWQ_CMP(cmdq->cons, cmdq)) &
-            (cmdq->max_elements - 1))) {
+       if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) {
                dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!");
                spin_unlock_irqrestore(&cmdq->lock, flags);
-
-               if (!retry_cnt--)
-                       return NULL;
-               goto retry;
+               return -EAGAIN;
        }
 
-       retry_cnt = 0xFF;
 
-       cookie = atomic_inc_return(&rcfw->seq_num) & RCFW_MAX_COOKIE_VALUE;
+       cookie = rcfw->seq_num & RCFW_MAX_COOKIE_VALUE;
        cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
        if (is_block)
                cookie |= RCFW_CMD_IS_BLOCKING;
+
+       set_bit(cbit, rcfw->cmdq_bitmap);
        req->cookie = cpu_to_le16(cookie);
-       if (test_and_set_bit(cbit, rcfw->cmdq_bitmap)) {
-               dev_err(&rcfw->pdev->dev,
-                       "QPLIB: RCFW MAX outstanding cmd reached!");
-               atomic_dec(&rcfw->seq_num);
+       crsqe = &rcfw->crsqe_tbl[cbit];
+       if (crsqe->resp) {
                spin_unlock_irqrestore(&cmdq->lock, flags);
-
-               if (!retry_cnt--)
-                       return NULL;
-               goto retry;
+               return -EBUSY;
        }
-       /* Reserve a resp buffer slot if requested */
-       if (req->resp_size && crsbe) {
-               spin_lock(&crsb->lock);
-               sw_prod = HWQ_CMP(crsb->prod, crsb);
-               crsb_ptr = (struct bnxt_qplib_crsbe **)crsb->pbl_ptr;
-               *crsbe = (void *)&crsb_ptr[get_crsb_pg(sw_prod)]
-                                         [get_crsb_idx(sw_prod)];
-               bnxt_qplib_crsb_dma_next(crsb->pbl_dma_ptr, sw_prod, &dma_addr);
-               req->resp_addr = cpu_to_le64(dma_addr);
-               crsb->prod++;
-               spin_unlock(&crsb->lock);
-
-               req->resp_size = (sizeof(struct bnxt_qplib_crsbe) +
-                                 BNXT_QPLIB_CMDQE_UNITS - 1) /
-                                BNXT_QPLIB_CMDQE_UNITS;
+       memset(resp, 0, sizeof(*resp));
+       crsqe->resp = (struct creq_qp_event *)resp;
+       crsqe->resp->cookie = req->cookie;
+       crsqe->req_size = req->cmd_size;
+       if (req->resp_size && sb) {
+               struct bnxt_qplib_rcfw_sbuf *sbuf = sb;
+
+               req->resp_addr = cpu_to_le64(sbuf->dma_addr);
+               req->resp_size = (sbuf->size + BNXT_QPLIB_CMDQE_UNITS - 1) /
+                                 BNXT_QPLIB_CMDQE_UNITS;
        }
+
        cmdq_ptr = (struct bnxt_qplib_cmdqe **)cmdq->pbl_ptr;
        preq = (u8 *)req;
        size = req->cmd_size * BNXT_QPLIB_CMDQE_UNITS;
@@ -190,23 +160,24 @@ retry:
                preq += min_t(u32, size, sizeof(*cmdqe));
                size -= min_t(u32, size, sizeof(*cmdqe));
                cmdq->prod++;
+               rcfw->seq_num++;
        } while (size > 0);
 
+       rcfw->seq_num++;
+
        cmdq_prod = cmdq->prod;
        if (rcfw->flags & FIRMWARE_FIRST_FLAG) {
-               /* The very first doorbell write is required to set this flag
-                * which prompts the FW to reset its internal pointers
+               /* The very first doorbell write
+                * is required to set this flag
+                * which prompts the FW to reset
+                * its internal pointers
                 */
                cmdq_prod |= FIRMWARE_FIRST_FLAG;
                rcfw->flags &= ~FIRMWARE_FIRST_FLAG;
        }
-       sw_prod = HWQ_CMP(crsq->prod, crsq);
-       crsqe = &crsq->crsq[sw_prod];
-       memset(crsqe, 0, sizeof(*crsqe));
-       crsq->prod++;
-       crsqe->req_size = req->cmd_size;
 
        /* ring CMDQ DB */
+       wmb();
        writel(cmdq_prod, rcfw->cmdq_bar_reg_iomem +
               rcfw->cmdq_bar_reg_prod_off);
        writel(RCFW_CMDQ_TRIG_VAL, rcfw->cmdq_bar_reg_iomem +
@@ -214,9 +185,56 @@ retry:
 done:
        spin_unlock_irqrestore(&cmdq->lock, flags);
        /* Return the CREQ response pointer */
-       return crsqe ? &crsqe->qp_event : NULL;
+       return 0;
 }
 
+int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+                                struct cmdq_base *req,
+                                struct creq_base *resp,
+                                void *sb, u8 is_block)
+{
+       struct creq_qp_event *evnt = (struct creq_qp_event *)resp;
+       u16 cookie;
+       u8 opcode, retry_cnt = 0xFF;
+       int rc = 0;
+
+       do {
+               opcode = req->opcode;
+               rc = __send_message(rcfw, req, resp, sb, is_block);
+               cookie = le16_to_cpu(req->cookie) & RCFW_MAX_COOKIE_VALUE;
+               if (!rc)
+                       break;
+
+               if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) {
+                       /* send failed */
+                       dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x send failed",
+                               cookie, opcode);
+                       return rc;
+               }
+               is_block ? mdelay(1) : usleep_range(500, 1000);
+
+       } while (retry_cnt--);
+
+       if (is_block)
+               rc = __block_for_resp(rcfw, cookie);
+       else
+               rc = __wait_for_resp(rcfw, cookie);
+       if (rc) {
+               /* timed out */
+               dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec",
+                       cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
+               return rc;
+       }
+
+       if (evnt->status) {
+               /* failed with status */
+               dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x status %#x",
+                       cookie, opcode, evnt->status);
+               rc = -EFAULT;
+       }
+
+       return rc;
+}
 /* Completions */
 static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
                                         struct creq_func_event *func_event)
@@ -260,12 +278,12 @@ static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
 static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
                                       struct creq_qp_event *qp_event)
 {
-       struct bnxt_qplib_crsq *crsq = &rcfw->crsq;
        struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
-       struct bnxt_qplib_crsqe *crsqe;
-       u16 cbit, cookie, blocked = 0;
+       struct bnxt_qplib_crsq *crsqe;
        unsigned long flags;
-       u32 sw_cons;
+       u16 cbit, blocked = 0;
+       u16 cookie;
+       __le16  mcookie;
 
        switch (qp_event->event) {
        case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
@@ -275,24 +293,31 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
        default:
                /* Command Response */
                spin_lock_irqsave(&cmdq->lock, flags);
-               sw_cons = HWQ_CMP(crsq->cons, crsq);
-               crsqe = &crsq->crsq[sw_cons];
-               crsq->cons++;
-               memcpy(&crsqe->qp_event, qp_event, sizeof(crsqe->qp_event));
-
-               cookie = le16_to_cpu(crsqe->qp_event.cookie);
+               cookie = le16_to_cpu(qp_event->cookie);
+               mcookie = qp_event->cookie;
                blocked = cookie & RCFW_CMD_IS_BLOCKING;
                cookie &= RCFW_MAX_COOKIE_VALUE;
                cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+               crsqe = &rcfw->crsqe_tbl[cbit];
+               if (crsqe->resp &&
+                   crsqe->resp->cookie  == mcookie) {
+                       memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
+                       crsqe->resp = NULL;
+               } else {
+                       dev_err(&rcfw->pdev->dev,
+                               "QPLIB: CMD %s resp->cookie = %#x, evnt->cookie = %#x",
+                               crsqe->resp ? "mismatch" : "collision",
+                               crsqe->resp ? crsqe->resp->cookie : 0, mcookie);
+               }
                if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
                        dev_warn(&rcfw->pdev->dev,
                                 "QPLIB: CMD bit %d was not requested", cbit);
-
                cmdq->cons += crsqe->req_size;
-               spin_unlock_irqrestore(&cmdq->lock, flags);
+               crsqe->req_size = 0;
+
                if (!blocked)
                        wake_up(&rcfw->waitq);
-               break;
+               spin_unlock_irqrestore(&cmdq->lock, flags);
        }
        return 0;
 }
@@ -305,12 +330,12 @@ static void bnxt_qplib_service_creq(unsigned long data)
        struct creq_base *creqe, **creq_ptr;
        u32 sw_cons, raw_cons;
        unsigned long flags;
-       u32 type;
+       u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
 
-       /* Service the CREQ until empty */
+       /* Service the CREQ until budget is over */
        spin_lock_irqsave(&creq->lock, flags);
        raw_cons = creq->cons;
-       while (1) {
+       while (budget > 0) {
                sw_cons = HWQ_CMP(raw_cons, creq);
                creq_ptr = (struct creq_base **)creq->pbl_ptr;
                creqe = &creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)];
@@ -320,15 +345,9 @@ static void bnxt_qplib_service_creq(unsigned long data)
                type = creqe->type & CREQ_BASE_TYPE_MASK;
                switch (type) {
                case CREQ_BASE_TYPE_QP_EVENT:
-                       if (!bnxt_qplib_process_qp_event
-                           (rcfw, (struct creq_qp_event *)creqe))
-                               rcfw->creq_qp_event_processed++;
-                       else {
-                               dev_warn(&rcfw->pdev->dev, "QPLIB: crsqe with");
-                               dev_warn(&rcfw->pdev->dev,
-                                        "QPLIB: type = 0x%x not handled",
-                                        type);
-                       }
+                       bnxt_qplib_process_qp_event
+                               (rcfw, (struct creq_qp_event *)creqe);
+                       rcfw->creq_qp_event_processed++;
                        break;
                case CREQ_BASE_TYPE_FUNC_EVENT:
                        if (!bnxt_qplib_process_func_event
@@ -346,7 +365,9 @@ static void bnxt_qplib_service_creq(unsigned long data)
                        break;
                }
                raw_cons++;
+               budget--;
        }
+
        if (creq->cons != raw_cons) {
                creq->cons = raw_cons;
                CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, raw_cons,
@@ -375,23 +396,16 @@ static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance)
 /* RCFW */
 int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw)
 {
-       struct creq_deinitialize_fw_resp *resp;
        struct cmdq_deinitialize_fw req;
+       struct creq_deinitialize_fw_resp resp;
        u16 cmd_flags = 0;
+       int rc;
 
        RCFW_CMD_PREP(req, DEINITIALIZE_FW, cmd_flags);
-       resp = (struct creq_deinitialize_fw_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-                                                    NULL, 0);
-       if (!resp)
-               return -EINVAL;
-
-       if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie)))
-               return -ETIMEDOUT;
-
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie))
-               return -EFAULT;
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+                                         NULL, 0);
+       if (rc)
+               return rc;
 
        clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
        return 0;
@@ -417,9 +431,10 @@ static int __get_pbl_pg_idx(struct bnxt_qplib_pbl *pbl)
 int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
                         struct bnxt_qplib_ctx *ctx, int is_virtfn)
 {
-       struct creq_initialize_fw_resp *resp;
        struct cmdq_initialize_fw req;
+       struct creq_initialize_fw_resp resp;
        u16 cmd_flags = 0, level;
+       int rc;
 
        RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags);
 
@@ -482,37 +497,19 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
 
 skip_ctx_setup:
        req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id);
-       resp = (struct creq_initialize_fw_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-                                                    NULL, 0);
-       if (!resp) {
-               dev_err(&rcfw->pdev->dev,
-                       "QPLIB: RCFW: INITIALIZE_FW send failed");
-               return -EINVAL;
-       }
-       if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-               /* Cmd timed out */
-               dev_err(&rcfw->pdev->dev,
-                       "QPLIB: RCFW: INITIALIZE_FW timed out");
-               return -ETIMEDOUT;
-       }
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-               dev_err(&rcfw->pdev->dev,
-                       "QPLIB: RCFW: INITIALIZE_FW failed");
-               return -EINVAL;
-       }
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+                                         NULL, 0);
+       if (rc)
+               return rc;
        set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
        return 0;
 }
 
 void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
 {
-       bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->crsb);
-       kfree(rcfw->crsq.crsq);
+       kfree(rcfw->crsqe_tbl);
        bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->cmdq);
        bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->creq);
-
        rcfw->pdev = NULL;
 }
 
@@ -539,21 +536,11 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
                goto fail;
        }
 
-       rcfw->crsq.max_elements = rcfw->cmdq.max_elements;
-       rcfw->crsq.crsq = kcalloc(rcfw->crsq.max_elements,
-                                 sizeof(*rcfw->crsq.crsq), GFP_KERNEL);
-       if (!rcfw->crsq.crsq)
+       rcfw->crsqe_tbl = kcalloc(rcfw->cmdq.max_elements,
+                                 sizeof(*rcfw->crsqe_tbl), GFP_KERNEL);
+       if (!rcfw->crsqe_tbl)
                goto fail;
 
-       rcfw->crsb.max_elements = BNXT_QPLIB_CRSBE_MAX_CNT;
-       if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->crsb, NULL, 0,
-                                     &rcfw->crsb.max_elements,
-                                     BNXT_QPLIB_CRSBE_UNITS, 0, PAGE_SIZE,
-                                     HWQ_TYPE_CTX)) {
-               dev_err(&rcfw->pdev->dev,
-                       "QPLIB: HW channel CRSB allocation failed");
-               goto fail;
-       }
        return 0;
 
 fail:
@@ -606,7 +593,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
        int rc;
 
        /* General */
-       atomic_set(&rcfw->seq_num, 0);
+       rcfw->seq_num = 0;
        rcfw->flags = FIRMWARE_FIRST_FLAG;
        bmap_size = BITS_TO_LONGS(RCFW_MAX_OUTSTANDING_CMD *
                                  sizeof(unsigned long));
@@ -636,10 +623,6 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 
        rcfw->cmdq_bar_reg_trig_off = RCFW_COMM_TRIG_OFFSET;
 
-       /* CRSQ */
-       rcfw->crsq.prod = 0;
-       rcfw->crsq.cons = 0;
-
        /* CREQ */
        rcfw->creq_bar_reg = RCFW_COMM_CONS_PCI_BAR_REGION;
        res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
@@ -692,3 +675,34 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
        __iowrite32_copy(rcfw->cmdq_bar_reg_iomem, &init, sizeof(init) / 4);
        return 0;
 }
+
+struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
+               struct bnxt_qplib_rcfw *rcfw,
+               u32 size)
+{
+       struct bnxt_qplib_rcfw_sbuf *sbuf;
+
+       sbuf = kzalloc(sizeof(*sbuf), GFP_ATOMIC);
+       if (!sbuf)
+               return NULL;
+
+       sbuf->size = size;
+       sbuf->sb = dma_zalloc_coherent(&rcfw->pdev->dev, sbuf->size,
+                                      &sbuf->dma_addr, GFP_ATOMIC);
+       if (!sbuf->sb)
+               goto bail;
+
+       return sbuf;
+bail:
+       kfree(sbuf);
+       return NULL;
+}
+
+void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw,
+                              struct bnxt_qplib_rcfw_sbuf *sbuf)
+{
+       if (sbuf->sb)
+               dma_free_coherent(&rcfw->pdev->dev, sbuf->size,
+                                 sbuf->sb, sbuf->dma_addr);
+       kfree(sbuf);
+}
index d3567d7..09ce121 100644 (file)
@@ -73,6 +73,7 @@
 #define RCFW_MAX_OUTSTANDING_CMD       BNXT_QPLIB_CMDQE_MAX_CNT
 #define RCFW_MAX_COOKIE_VALUE          0x7FFF
 #define RCFW_CMD_IS_BLOCKING           0x8000
+#define RCFW_BLOCKED_CMD_WAIT_COUNT    0x4E20
 
 /* Cmdq contains a fix number of a 16-Byte slots */
 struct bnxt_qplib_cmdqe {
@@ -94,32 +95,6 @@ struct bnxt_qplib_crsbe {
        u8                      data[1024];
 };
 
-/* CRSQ SB */
-#define BNXT_QPLIB_CRSBE_MAX_CNT       4
-#define BNXT_QPLIB_CRSBE_UNITS         sizeof(struct bnxt_qplib_crsbe)
-#define BNXT_QPLIB_CRSBE_CNT_PER_PG    (PAGE_SIZE / BNXT_QPLIB_CRSBE_UNITS)
-
-#define MAX_CRSB_IDX                   (BNXT_QPLIB_CRSBE_MAX_CNT - 1)
-#define MAX_CRSB_IDX_PER_PG            (BNXT_QPLIB_CRSBE_CNT_PER_PG - 1)
-
-static inline u32 get_crsb_pg(u32 val)
-{
-       return (val & ~MAX_CRSB_IDX_PER_PG) / BNXT_QPLIB_CRSBE_CNT_PER_PG;
-}
-
-static inline u32 get_crsb_idx(u32 val)
-{
-       return val & MAX_CRSB_IDX_PER_PG;
-}
-
-static inline void bnxt_qplib_crsb_dma_next(dma_addr_t *pg_map_arr,
-                                           u32 prod, dma_addr_t *dma_addr)
-{
-               *dma_addr = pg_map_arr[(prod) / BNXT_QPLIB_CRSBE_CNT_PER_PG];
-               *dma_addr += ((prod) % BNXT_QPLIB_CRSBE_CNT_PER_PG) *
-                             BNXT_QPLIB_CRSBE_UNITS;
-}
-
 /* CREQ */
 /* Allocate 1 per QP for async error notification for now */
 #define BNXT_QPLIB_CREQE_MAX_CNT       (64 * 1024)
@@ -158,17 +133,19 @@ static inline u32 get_creq_idx(u32 val)
 #define CREQ_DB(db, raw_cons, cp_bit)                          \
        writel(CREQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
 
+#define CREQ_ENTRY_POLL_BUDGET         0x100
+
 /* HWQ */
-struct bnxt_qplib_crsqe {
-       struct creq_qp_event    qp_event;
+
+struct bnxt_qplib_crsq {
+       struct creq_qp_event    *resp;
        u32                     req_size;
 };
 
-struct bnxt_qplib_crsq {
-       struct bnxt_qplib_crsqe *crsq;
-       u32                     prod;
-       u32                     cons;
-       u32                     max_elements;
+struct bnxt_qplib_rcfw_sbuf {
+       void *sb;
+       dma_addr_t dma_addr;
+       u32 size;
 };
 
 /* RCFW Communication Channels */
@@ -185,7 +162,7 @@ struct bnxt_qplib_rcfw {
        wait_queue_head_t       waitq;
        int                     (*aeq_handler)(struct bnxt_qplib_rcfw *,
                                               struct creq_func_event *);
-       atomic_t                seq_num;
+       u32                     seq_num;
 
        /* Bar region info */
        void __iomem            *cmdq_bar_reg_iomem;
@@ -203,8 +180,7 @@ struct bnxt_qplib_rcfw {
 
        /* Actual Cmd and Resp Queues */
        struct bnxt_qplib_hwq   cmdq;
-       struct bnxt_qplib_crsq  crsq;
-       struct bnxt_qplib_hwq   crsb;
+       struct bnxt_qplib_crsq  *crsqe_tbl;
 };
 
 void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
@@ -219,11 +195,14 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
                                        (struct bnxt_qplib_rcfw *,
                                         struct creq_func_event *));
 
-int bnxt_qplib_rcfw_block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie);
-int bnxt_qplib_rcfw_wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie);
-void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
-                                  struct cmdq_base *req, void **crsbe,
-                                  u8 is_block);
+struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
+                               struct bnxt_qplib_rcfw *rcfw,
+                               u32 size);
+void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw,
+                              struct bnxt_qplib_rcfw_sbuf *sbuf);
+int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+                                struct cmdq_base *req, struct creq_base *resp,
+                                void *sbuf, u8 is_block);
 
 int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw);
 int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
index 6277d80..2e48555 100644 (file)
@@ -48,6 +48,10 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
 
 #define HWQ_CMP(idx, hwq)      ((idx) & ((hwq)->max_elements - 1))
 
+#define HWQ_FREE_SLOTS(hwq)    (hwq->max_elements - \
+                               ((HWQ_CMP(hwq->prod, hwq)\
+                               - HWQ_CMP(hwq->cons, hwq))\
+                               & (hwq->max_elements - 1)))
 enum bnxt_qplib_hwq_type {
        HWQ_TYPE_CTX,
        HWQ_TYPE_QUEUE,
index 7b31ecc..fde18cf 100644 (file)
@@ -55,37 +55,30 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
                            struct bnxt_qplib_dev_attr *attr)
 {
        struct cmdq_query_func req;
-       struct creq_query_func_resp *resp;
+       struct creq_query_func_resp resp;
+       struct bnxt_qplib_rcfw_sbuf *sbuf;
        struct creq_query_func_resp_sb *sb;
        u16 cmd_flags = 0;
        u32 temp;
        u8 *tqm_alloc;
-       int i;
+       int i, rc = 0;
 
        RCFW_CMD_PREP(req, QUERY_FUNC, cmd_flags);
 
-       req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
-       resp = (struct creq_query_func_resp *)
-               bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void **)&sb,
-                                            0);
-       if (!resp) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC send failed");
-               return -EINVAL;
-       }
-       if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-               /* Cmd timed out */
-               dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC timed out");
-               return -ETIMEDOUT;
-       }
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC failed ");
+       sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+       if (!sbuf) {
                dev_err(&rcfw->pdev->dev,
-                       "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-                       resp->status, le16_to_cpu(req.cookie),
-                       le16_to_cpu(resp->cookie));
-               return -EINVAL;
+                       "QPLIB: SP: QUERY_FUNC alloc side buffer failed");
+               return -ENOMEM;
        }
+
+       sb = sbuf->sb;
+       req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+                                         (void *)sbuf, 0);
+       if (rc)
+               goto bail;
+
        /* Extract the context from the side buffer */
        attr->max_qp = le32_to_cpu(sb->max_qp);
        attr->max_qp_rd_atom =
@@ -95,6 +88,11 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
                sb->max_qp_init_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
                BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom;
        attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr);
+       /*
+        * 128 WQEs needs to be reserved for the HW (8916). Prevent
+        * reporting the max number
+        */
+       attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS;
        attr->max_qp_sges = sb->max_sge;
        attr->max_cq = le32_to_cpu(sb->max_cq);
        attr->max_cq_wqes = le32_to_cpu(sb->max_cqe);
@@ -130,7 +128,10 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
                attr->tqm_alloc_reqs[i * 4 + 2] = *(++tqm_alloc);
                attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
        }
-       return 0;
+
+bail:
+       bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+       return rc;
 }
 
 /* SGID */
@@ -178,8 +179,9 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
        /* Remove GID from the SGID table */
        if (update) {
                struct cmdq_delete_gid req;
-               struct creq_delete_gid_resp *resp;
+               struct creq_delete_gid_resp resp;
                u16 cmd_flags = 0;
+               int rc;
 
                RCFW_CMD_PREP(req, DELETE_GID, cmd_flags);
                if (sgid_tbl->hw_id[index] == 0xFFFF) {
@@ -188,31 +190,10 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
                        return -EINVAL;
                }
                req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
-               resp = (struct creq_delete_gid_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, NULL,
-                                                    0);
-               if (!resp) {
-                       dev_err(&res->pdev->dev,
-                               "QPLIB: SP: DELETE_GID send failed");
-                       return -EINVAL;
-               }
-               if (!bnxt_qplib_rcfw_wait_for_resp(rcfw,
-                                                  le16_to_cpu(req.cookie))) {
-                       /* Cmd timed out */
-                       dev_err(&res->pdev->dev,
-                               "QPLIB: SP: DELETE_GID timed out");
-                       return -ETIMEDOUT;
-               }
-               if (resp->status ||
-                   le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-                       dev_err(&res->pdev->dev,
-                               "QPLIB: SP: DELETE_GID failed ");
-                       dev_err(&res->pdev->dev,
-                               "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-                               resp->status, le16_to_cpu(req.cookie),
-                               le16_to_cpu(resp->cookie));
-                       return -EINVAL;
-               }
+               rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+                                                 (void *)&resp, NULL, 0);
+               if (rc)
+                       return rc;
        }
        memcpy(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
               sizeof(bnxt_qplib_gid_zero));
@@ -234,7 +215,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
                                                   struct bnxt_qplib_res,
                                                   sgid_tbl);
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
-       int i, free_idx, rc = 0;
+       int i, free_idx;
 
        if (!sgid_tbl) {
                dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
@@ -266,10 +247,11 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
        }
        if (update) {
                struct cmdq_add_gid req;
-               struct creq_add_gid_resp *resp;
+               struct creq_add_gid_resp resp;
                u16 cmd_flags = 0;
                u32 temp32[4];
                u16 temp16[3];
+               int rc;
 
                RCFW_CMD_PREP(req, ADD_GID, cmd_flags);
 
@@ -290,31 +272,11 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
                req.src_mac[1] = cpu_to_be16(temp16[1]);
                req.src_mac[2] = cpu_to_be16(temp16[2]);
 
-               resp = (struct creq_add_gid_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-                                                    NULL, 0);
-               if (!resp) {
-                       dev_err(&res->pdev->dev,
-                               "QPLIB: SP: ADD_GID send failed");
-                       return -EINVAL;
-               }
-               if (!bnxt_qplib_rcfw_wait_for_resp(rcfw,
-                                                  le16_to_cpu(req.cookie))) {
-                       /* Cmd timed out */
-                       dev_err(&res->pdev->dev,
-                               "QPIB: SP: ADD_GID timed out");
-                       return -ETIMEDOUT;
-               }
-               if (resp->status ||
-                   le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-                       dev_err(&res->pdev->dev, "QPLIB: SP: ADD_GID failed ");
-                       dev_err(&res->pdev->dev,
-                               "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-                               resp->status, le16_to_cpu(req.cookie),
-                               le16_to_cpu(resp->cookie));
-                       return -EINVAL;
-               }
-               sgid_tbl->hw_id[free_idx] = le32_to_cpu(resp->xid);
+               rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+                                                 (void *)&resp, NULL, 0);
+               if (rc)
+                       return rc;
+               sgid_tbl->hw_id[free_idx] = le32_to_cpu(resp.xid);
        }
        /* Add GID to the sgid_tbl */
        memcpy(&sgid_tbl->tbl[free_idx], gid, sizeof(*gid));
@@ -325,7 +287,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 
        *index = free_idx;
        /* unlock */
-       return rc;
+       return 0;
 }
 
 /* pkeys */
@@ -422,10 +384,11 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
        struct cmdq_create_ah req;
-       struct creq_create_ah_resp *resp;
+       struct creq_create_ah_resp resp;
        u16 cmd_flags = 0;
        u32 temp32[4];
        u16 temp16[3];
+       int rc;
 
        RCFW_CMD_PREP(req, CREATE_AH, cmd_flags);
 
@@ -450,28 +413,12 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
        req.dest_mac[1] = cpu_to_le16(temp16[1]);
        req.dest_mac[2] = cpu_to_le16(temp16[2]);
 
-       resp = (struct creq_create_ah_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-                                                    NULL, 1);
-       if (!resp) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH send failed");
-               return -EINVAL;
-       }
-       if (!bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-               /* Cmd timed out */
-               dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH timed out");
-               return -ETIMEDOUT;
-       }
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH failed ");
-               dev_err(&rcfw->pdev->dev,
-                       "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-                       resp->status, le16_to_cpu(req.cookie),
-                       le16_to_cpu(resp->cookie));
-               return -EINVAL;
-       }
-       ah->id = le32_to_cpu(resp->xid);
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+                                         NULL, 1);
+       if (rc)
+               return rc;
+
+       ah->id = le32_to_cpu(resp.xid);
        return 0;
 }
 
@@ -479,35 +426,19 @@ int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
        struct cmdq_destroy_ah req;
-       struct creq_destroy_ah_resp *resp;
+       struct creq_destroy_ah_resp resp;
        u16 cmd_flags = 0;
+       int rc;
 
        /* Clean up the AH table in the device */
        RCFW_CMD_PREP(req, DESTROY_AH, cmd_flags);
 
        req.ah_cid = cpu_to_le32(ah->id);
 
-       resp = (struct creq_destroy_ah_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-                                                    NULL, 1);
-       if (!resp) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH send failed");
-               return -EINVAL;
-       }
-       if (!bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-               /* Cmd timed out */
-               dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH timed out");
-               return -ETIMEDOUT;
-       }
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH failed ");
-               dev_err(&rcfw->pdev->dev,
-                       "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-                       resp->status, le16_to_cpu(req.cookie),
-                       le16_to_cpu(resp->cookie));
-               return -EINVAL;
-       }
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+                                         NULL, 1);
+       if (rc)
+               return rc;
        return 0;
 }
 
@@ -516,8 +447,9 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
        struct cmdq_deallocate_key req;
-       struct creq_deallocate_key_resp *resp;
+       struct creq_deallocate_key_resp resp;
        u16 cmd_flags = 0;
+       int rc;
 
        if (mrw->lkey == 0xFFFFFFFF) {
                dev_info(&res->pdev->dev,
@@ -536,27 +468,11 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
        else
                req.key = cpu_to_le32(mrw->lkey);
 
-       resp = (struct creq_deallocate_key_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-                                                    NULL, 0);
-       if (!resp) {
-               dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR send failed");
-               return -EINVAL;
-       }
-       if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-               /* Cmd timed out */
-               dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR timed out");
-               return -ETIMEDOUT;
-       }
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-               dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR failed ");
-               dev_err(&res->pdev->dev,
-                       "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-                       resp->status, le16_to_cpu(req.cookie),
-                       le16_to_cpu(resp->cookie));
-               return -EINVAL;
-       }
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
+                                         NULL, 0);
+       if (rc)
+               return rc;
+
        /* Free the qplib's MRW memory */
        if (mrw->hwq.max_elements)
                bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
@@ -568,9 +484,10 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
        struct cmdq_allocate_mrw req;
-       struct creq_allocate_mrw_resp *resp;
+       struct creq_allocate_mrw_resp resp;
        u16 cmd_flags = 0;
        unsigned long tmp;
+       int rc;
 
        RCFW_CMD_PREP(req, ALLOCATE_MRW, cmd_flags);
 
@@ -584,33 +501,17 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
        tmp = (unsigned long)mrw;
        req.mrw_handle = cpu_to_le64(tmp);
 
-       resp = (struct creq_allocate_mrw_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-                                                    NULL, 0);
-       if (!resp) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW send failed");
-               return -EINVAL;
-       }
-       if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
-               /* Cmd timed out */
-               dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW timed out");
-               return -ETIMEDOUT;
-       }
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW failed ");
-               dev_err(&rcfw->pdev->dev,
-                       "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-                       resp->status, le16_to_cpu(req.cookie),
-                       le16_to_cpu(resp->cookie));
-               return -EINVAL;
-       }
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+                                         (void *)&resp, NULL, 0);
+       if (rc)
+               return rc;
+
        if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1)  ||
            (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A) ||
            (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B))
-               mrw->rkey = le32_to_cpu(resp->xid);
+               mrw->rkey = le32_to_cpu(resp.xid);
        else
-               mrw->lkey = le32_to_cpu(resp->xid);
+               mrw->lkey = le32_to_cpu(resp.xid);
        return 0;
 }
 
@@ -619,40 +520,17 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
        struct cmdq_deregister_mr req;
-       struct creq_deregister_mr_resp *resp;
+       struct creq_deregister_mr_resp resp;
        u16 cmd_flags = 0;
        int rc;
 
        RCFW_CMD_PREP(req, DEREGISTER_MR, cmd_flags);
 
        req.lkey = cpu_to_le32(mrw->lkey);
-       resp = (struct creq_deregister_mr_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-                                                    NULL, block);
-       if (!resp) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: SP: DEREG_MR send failed");
-               return -EINVAL;
-       }
-       if (block)
-               rc = bnxt_qplib_rcfw_block_for_resp(rcfw,
-                                                   le16_to_cpu(req.cookie));
-       else
-               rc = bnxt_qplib_rcfw_wait_for_resp(rcfw,
-                                                  le16_to_cpu(req.cookie));
-       if (!rc) {
-               /* Cmd timed out */
-               dev_err(&res->pdev->dev, "QPLIB: SP: DEREG_MR timed out");
-               return -ETIMEDOUT;
-       }
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-               dev_err(&rcfw->pdev->dev, "QPLIB: SP: DEREG_MR failed ");
-               dev_err(&rcfw->pdev->dev,
-                       "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-                       resp->status, le16_to_cpu(req.cookie),
-                       le16_to_cpu(resp->cookie));
-               return -EINVAL;
-       }
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+                                         (void *)&resp, NULL, block);
+       if (rc)
+               return rc;
 
        /* Free the qplib's MR memory */
        if (mrw->hwq.max_elements) {
@@ -669,7 +547,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
        struct cmdq_register_mr req;
-       struct creq_register_mr_resp *resp;
+       struct creq_register_mr_resp resp;
        u16 cmd_flags = 0, level;
        int pg_ptrs, pages, i, rc;
        dma_addr_t **pbl_ptr;
@@ -730,36 +608,11 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
        req.key = cpu_to_le32(mr->lkey);
        req.mr_size = cpu_to_le64(mr->total_size);
 
-       resp = (struct creq_register_mr_resp *)
-                       bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
-                                                    NULL, block);
-       if (!resp) {
-               dev_err(&res->pdev->dev, "SP: REG_MR send failed");
-               rc = -EINVAL;
-               goto fail;
-       }
-       if (block)
-               rc = bnxt_qplib_rcfw_block_for_resp(rcfw,
-                                                   le16_to_cpu(req.cookie));
-       else
-               rc = bnxt_qplib_rcfw_wait_for_resp(rcfw,
-                                                  le16_to_cpu(req.cookie));
-       if (!rc) {
-               /* Cmd timed out */
-               dev_err(&res->pdev->dev, "SP: REG_MR timed out");
-               rc = -ETIMEDOUT;
-               goto fail;
-       }
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-               dev_err(&res->pdev->dev, "QPLIB: SP: REG_MR failed ");
-               dev_err(&res->pdev->dev,
-                       "QPLIB: SP: with status 0x%x cmdq 0x%x resp 0x%x",
-                       resp->status, le16_to_cpu(req.cookie),
-                       le16_to_cpu(resp->cookie));
-               rc = -EINVAL;
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+                                         (void *)&resp, NULL, block);
+       if (rc)
                goto fail;
-       }
+
        return 0;
 
 fail:
@@ -804,35 +657,15 @@ int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids)
 {
        struct bnxt_qplib_rcfw *rcfw = res->rcfw;
        struct cmdq_map_tc_to_cos req;
-       struct creq_map_tc_to_cos_resp *resp;
+       struct creq_map_tc_to_cos_resp resp;
        u16 cmd_flags = 0;
-       int tleft;
+       int rc = 0;
 
        RCFW_CMD_PREP(req, MAP_TC_TO_COS, cmd_flags);
        req.cos0 = cpu_to_le16(cids[0]);
        req.cos1 = cpu_to_le16(cids[1]);
 
-       resp = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, NULL, 0);
-       if (!resp) {
-               dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS send failed");
-               return -EINVAL;
-       }
-
-       tleft = bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie));
-       if (!tleft) {
-               dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS timed out");
-               return -ETIMEDOUT;
-       }
-
-       if (resp->status ||
-           le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
-               dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS failed ");
-               dev_err(&res->pdev->dev,
-                       "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
-                       resp->status, le16_to_cpu(req.cookie),
-                       le16_to_cpu(resp->cookie));
-               return -EINVAL;
-       }
-
+       rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+                                         (void *)&resp, NULL, 0);
        return 0;
 }
index 1442a61..a543f95 100644 (file)
@@ -40,6 +40,8 @@
 #ifndef __BNXT_QPLIB_SP_H__
 #define __BNXT_QPLIB_SP_H__
 
+#define BNXT_QPLIB_RESERVED_QP_WRS     128
+
 struct bnxt_qplib_dev_attr {
        char                            fw_ver[32];
        u16                             max_sgid;
index f96a96d..ae0b79a 100644 (file)
@@ -767,7 +767,7 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
                kfree(entry);
        }
 
-       list_for_each_safe(pos, nxt, &uctx->qpids) {
+       list_for_each_safe(pos, nxt, &uctx->cqids) {
                entry = list_entry(pos, struct c4iw_qid_list, entry);
                list_del_init(&entry->entry);
                kfree(entry);
@@ -880,13 +880,15 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
        rdev->free_workq = create_singlethread_workqueue("iw_cxgb4_free");
        if (!rdev->free_workq) {
                err = -ENOMEM;
-               goto err_free_status_page;
+               goto err_free_status_page_and_wr_log;
        }
 
        rdev->status_page->db_off = 0;
 
        return 0;
-err_free_status_page:
+err_free_status_page_and_wr_log:
+       if (c4iw_wr_log && rdev->wr_log)
+               kfree(rdev->wr_log);
        free_page((unsigned long)rdev->status_page);
 destroy_ocqp_pool:
        c4iw_ocqp_pool_destroy(rdev);
@@ -903,9 +905,11 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
 {
        destroy_workqueue(rdev->free_workq);
        kfree(rdev->wr_log);
+       c4iw_release_dev_ucontext(rdev, &rdev->uctx);
        free_page((unsigned long)rdev->status_page);
        c4iw_pblpool_destroy(rdev);
        c4iw_rqtpool_destroy(rdev);
+       c4iw_ocqp_pool_destroy(rdev);
        c4iw_destroy_resource(&rdev->resource);
 }
 
index 0c79983..9ecc089 100644 (file)
@@ -3692,8 +3692,10 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
        dev->ib_dev.check_mr_status     = mlx5_ib_check_mr_status;
        dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
        dev->ib_dev.get_dev_fw_str      = get_dev_fw_str;
-       dev->ib_dev.alloc_rdma_netdev   = mlx5_ib_alloc_rdma_netdev;
-       dev->ib_dev.free_rdma_netdev    = mlx5_ib_free_rdma_netdev;
+       if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) {
+               dev->ib_dev.alloc_rdma_netdev   = mlx5_ib_alloc_rdma_netdev;
+               dev->ib_dev.free_rdma_netdev    = mlx5_ib_free_rdma_netdev;
+       }
        if (mlx5_core_is_pf(mdev)) {
                dev->ib_dev.get_vf_config       = mlx5_ib_get_vf_config;
                dev->ib_dev.set_vf_link_state   = mlx5_ib_set_vf_link_state;
index aa08c76..d961f79 100644 (file)
 #define QEDR_MSG_QP   "  QP"
 #define QEDR_MSG_GSI  " GSI"
 
-#define QEDR_CQ_MAGIC_NUMBER   (0x11223344)
+#define QEDR_CQ_MAGIC_NUMBER   (0x11223344)
+
+#define FW_PAGE_SIZE           (RDMA_RING_PAGE_SIZE)
+#define FW_PAGE_SHIFT          (12)
 
 struct qedr_dev;
 
index 17685cf..d6723c3 100644 (file)
@@ -653,14 +653,15 @@ static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
 
 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
                               struct qedr_pbl *pbl,
-                              struct qedr_pbl_info *pbl_info)
+                              struct qedr_pbl_info *pbl_info, u32 pg_shift)
 {
        int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
+       u32 fw_pg_cnt, fw_pg_per_umem_pg;
        struct qedr_pbl *pbl_tbl;
        struct scatterlist *sg;
        struct regpair *pbe;
+       u64 pg_addr;
        int entry;
-       u32 addr;
 
        if (!pbl_info->num_pbes)
                return;
@@ -683,29 +684,35 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
 
        shift = umem->page_shift;
 
+       fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
+
        for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
                pages = sg_dma_len(sg) >> shift;
+               pg_addr = sg_dma_address(sg);
                for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
-                       /* store the page address in pbe */
-                       pbe->lo = cpu_to_le32(sg_dma_address(sg) +
-                                             (pg_cnt << shift));
-                       addr = upper_32_bits(sg_dma_address(sg) +
-                                            (pg_cnt << shift));
-                       pbe->hi = cpu_to_le32(addr);
-                       pbe_cnt++;
-                       total_num_pbes++;
-                       pbe++;
-
-                       if (total_num_pbes == pbl_info->num_pbes)
-                               return;
-
-                       /* If the given pbl is full storing the pbes,
-                        * move to next pbl.
-                        */
-                       if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
-                               pbl_tbl++;
-                               pbe = (struct regpair *)pbl_tbl->va;
-                               pbe_cnt = 0;
+                       for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
+                               pbe->lo = cpu_to_le32(pg_addr);
+                               pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
+
+                               pg_addr += BIT(pg_shift);
+                               pbe_cnt++;
+                               total_num_pbes++;
+                               pbe++;
+
+                               if (total_num_pbes == pbl_info->num_pbes)
+                                       return;
+
+                               /* If the given pbl is full storing the pbes,
+                                * move to next pbl.
+                                */
+                               if (pbe_cnt ==
+                                   (pbl_info->pbl_size / sizeof(u64))) {
+                                       pbl_tbl++;
+                                       pbe = (struct regpair *)pbl_tbl->va;
+                                       pbe_cnt = 0;
+                               }
+
+                               fw_pg_cnt++;
                        }
                }
        }
@@ -754,7 +761,7 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
                                       u64 buf_addr, size_t buf_len,
                                       int access, int dmasync)
 {
-       int page_cnt;
+       u32 fw_pages;
        int rc;
 
        q->buf_addr = buf_addr;
@@ -766,8 +773,10 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
                return PTR_ERR(q->umem);
        }
 
-       page_cnt = ib_umem_page_count(q->umem);
-       rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, 0);
+       fw_pages = ib_umem_page_count(q->umem) <<
+           (q->umem->page_shift - FW_PAGE_SHIFT);
+
+       rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
        if (rc)
                goto err0;
 
@@ -777,7 +786,8 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
                goto err0;
        }
 
-       qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info);
+               qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
+                                  FW_PAGE_SHIFT);
 
        return 0;
 
@@ -2226,7 +2236,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
                goto err1;
 
        qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
-                          &mr->info.pbl_info);
+                          &mr->info.pbl_info, mr->umem->page_shift);
 
        rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
        if (rc) {
@@ -3209,6 +3219,10 @@ static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
                case IB_WC_REG_MR:
                        qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
                        break;
+               case IB_WC_RDMA_READ:
+               case IB_WC_SEND:
+                       wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
+                       break;
                default:
                        break;
                }
index ecdba2f..1ac5b85 100644 (file)
@@ -68,6 +68,7 @@
 static inline u32 rxe_crc32(struct rxe_dev *rxe,
                            u32 crc, void *next, size_t len)
 {
+       u32 retval;
        int err;
 
        SHASH_DESC_ON_STACK(shash, rxe->tfm);
@@ -81,7 +82,9 @@ static inline u32 rxe_crc32(struct rxe_dev *rxe,
                return crc32_le(crc, next, len);
        }
 
-       return *(u32 *)shash_desc_ctx(shash);
+       retval = *(u32 *)shash_desc_ctx(shash);
+       barrier_data(shash_desc_ctx(shash));
+       return retval;
 }
 
 int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
index 83d709e..073e667 100644 (file)
@@ -740,13 +740,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,
 
                sge = ibwr->sg_list;
                for (i = 0; i < num_sge; i++, sge++) {
-                       if (qp->is_user && copy_from_user(p, (__user void *)
-                                           (uintptr_t)sge->addr, sge->length))
-                               return -EFAULT;
-
-                       else if (!qp->is_user)
-                               memcpy(p, (void *)(uintptr_t)sge->addr,
-                                      sge->length);
+                       memcpy(p, (void *)(uintptr_t)sge->addr,
+                                       sge->length);
 
                        p += sge->length;
                }
index 0060b2f..efe7402 100644 (file)
@@ -863,7 +863,6 @@ dev_stop:
        set_bit(IPOIB_STOP_REAPER, &priv->flags);
        cancel_delayed_work(&priv->ah_reap_task);
        set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
-       napi_enable(&priv->napi);
        ipoib_ib_dev_stop(dev);
        return -1;
 }
index a115c0b..1015a63 100644 (file)
@@ -1596,6 +1596,8 @@ static void ipoib_dev_uninit_default(struct net_device *dev)
 
        ipoib_transport_dev_cleanup(dev);
 
+       netif_napi_del(&priv->napi);
+
        ipoib_cm_dev_cleanup(dev);
 
        kfree(priv->rx_ring);
@@ -1649,6 +1651,7 @@ out_rx_ring_cleanup:
        kfree(priv->rx_ring);
 
 out:
+       netif_napi_del(&priv->napi);
        return -ENOMEM;
 }
 
@@ -2237,6 +2240,7 @@ event_failed:
 
 device_init_failed:
        free_netdev(priv->dev);
+       kfree(priv);
 
 alloc_mem_failed:
        return ERR_PTR(result);
@@ -2277,7 +2281,7 @@ static void ipoib_add_one(struct ib_device *device)
 
 static void ipoib_remove_one(struct ib_device *device, void *client_data)
 {
-       struct ipoib_dev_priv *priv, *tmp;
+       struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv;
        struct list_head *dev_list = client_data;
 
        if (!dev_list)
@@ -2300,7 +2304,14 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
                flush_workqueue(priv->wq);
 
                unregister_netdev(priv->dev);
-               free_netdev(priv->dev);
+               if (device->free_rdma_netdev)
+                       device->free_rdma_netdev(priv->dev);
+               else
+                       free_netdev(priv->dev);
+
+               list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list)
+                       kfree(cpriv);
+
                kfree(priv);
        }
 
index 36dc4fc..081b33d 100644 (file)
@@ -133,13 +133,13 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
        snprintf(intf_name, sizeof intf_name, "%s.%04x",
                 ppriv->dev->name, pkey);
 
+       if (!rtnl_trylock())
+               return restart_syscall();
+
        priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
        if (!priv)
                return -ENOMEM;
 
-       if (!rtnl_trylock())
-               return restart_syscall();
-
        down_write(&ppriv->vlan_rwsem);
 
        /*
@@ -167,8 +167,10 @@ out:
 
        rtnl_unlock();
 
-       if (result)
+       if (result) {
                free_netdev(priv->dev);
+               kfree(priv);
+       }
 
        return result;
 }
@@ -209,6 +211,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 
        if (dev) {
                free_netdev(dev);
+               kfree(priv);
                return 0;
        }
 
index e37d372..f600f3a 100644 (file)
@@ -248,7 +248,8 @@ static struct soc_button_info *soc_button_get_button_info(struct device *dev)
 
        if (!btns_desc) {
                dev_err(dev, "ACPI Button Descriptors not found\n");
-               return ERR_PTR(-ENODEV);
+               button_info = ERR_PTR(-ENODEV);
+               goto out;
        }
 
        /* The first package describes the collection */
@@ -264,24 +265,31 @@ static struct soc_button_info *soc_button_get_button_info(struct device *dev)
        }
        if (collection_uid == -1) {
                dev_err(dev, "Invalid Button Collection Descriptor\n");
-               return ERR_PTR(-ENODEV);
+               button_info = ERR_PTR(-ENODEV);
+               goto out;
        }
 
        /* There are package.count - 1 buttons + 1 terminating empty entry */
        button_info = devm_kcalloc(dev, btns_desc->package.count,
                                   sizeof(*button_info), GFP_KERNEL);
-       if (!button_info)
-               return ERR_PTR(-ENOMEM);
+       if (!button_info) {
+               button_info = ERR_PTR(-ENOMEM);
+               goto out;
+       }
 
        /* Parse the button descriptors */
        for (i = 1, btn = 0; i < btns_desc->package.count; i++, btn++) {
                if (soc_button_parse_btn_desc(dev,
                                              &btns_desc->package.elements[i],
                                              collection_uid,
-                                             &button_info[btn]))
-                       return ERR_PTR(-ENODEV);
+                                             &button_info[btn])) {
+                       button_info = ERR_PTR(-ENODEV);
+                       goto out;
+               }
        }
 
+out:
+       kfree(buf.pointer);
        return button_info;
 }
 
index dea63e2..f5206e2 100644 (file)
@@ -31,9 +31,6 @@
 #define F54_GET_REPORT          1
 #define F54_FORCE_CAL           2
 
-/* Fixed sizes of reports */
-#define F54_QUERY_LEN                  27
-
 /* F54 capabilities */
 #define F54_CAP_BASELINE       (1 << 2)
 #define F54_CAP_IMAGE8         (1 << 3)
@@ -95,7 +92,6 @@ struct rmi_f54_reports {
 struct f54_data {
        struct rmi_function *fn;
 
-       u8 qry[F54_QUERY_LEN];
        u8 num_rx_electrodes;
        u8 num_tx_electrodes;
        u8 capabilities;
@@ -632,22 +628,23 @@ static int rmi_f54_detect(struct rmi_function *fn)
 {
        int error;
        struct f54_data *f54;
+       u8 buf[6];
 
        f54 = dev_get_drvdata(&fn->dev);
 
        error = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr,
-                              &f54->qry, sizeof(f54->qry));
+                              buf, sizeof(buf));
        if (error) {
                dev_err(&fn->dev, "%s: Failed to query F54 properties\n",
                        __func__);
                return error;
        }
 
-       f54->num_rx_electrodes = f54->qry[0];
-       f54->num_tx_electrodes = f54->qry[1];
-       f54->capabilities = f54->qry[2];
-       f54->clock_rate = f54->qry[3] | (f54->qry[4] << 8);
-       f54->family = f54->qry[5];
+       f54->num_rx_electrodes = buf[0];
+       f54->num_tx_electrodes = buf[1];
+       f54->capabilities = buf[2];
+       f54->clock_rate = buf[3] | (buf[4] << 8);
+       f54->family = buf[5];
 
        rmi_dbg(RMI_DEBUG_FN, &fn->dev, "F54 num_rx_electrodes: %d\n",
                f54->num_rx_electrodes);
index 09720d9..f932a83 100644 (file)
@@ -723,6 +723,13 @@ static const struct dmi_system_id __initconst i8042_dmi_notimeout_table[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U574"),
                },
        },
+       {
+               /* Fujitsu UH554 laptop */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK UH544"),
+               },
+       },
        { }
 };
 
index 63cacf5..0f1219f 100644 (file)
@@ -3879,11 +3879,9 @@ static void irte_ga_prepare(void *entry,
                            u8 vector, u32 dest_apicid, int devid)
 {
        struct irte_ga *irte = (struct irte_ga *) entry;
-       struct iommu_dev_data *dev_data = search_dev_data(devid);
 
        irte->lo.val                      = 0;
        irte->hi.val                      = 0;
-       irte->lo.fields_remap.guest_mode  = dev_data ? dev_data->use_vapic : 0;
        irte->lo.fields_remap.int_type    = delivery_mode;
        irte->lo.fields_remap.dm          = dest_mode;
        irte->hi.fields.vector            = vector;
@@ -3939,10 +3937,10 @@ static void irte_ga_set_affinity(void *entry, u16 devid, u16 index,
        struct irte_ga *irte = (struct irte_ga *) entry;
        struct iommu_dev_data *dev_data = search_dev_data(devid);
 
-       if (!dev_data || !dev_data->use_vapic) {
+       if (!dev_data || !dev_data->use_vapic ||
+           !irte->lo.fields_remap.guest_mode) {
                irte->hi.fields.vector = vector;
                irte->lo.fields_remap.destination = dest_apicid;
-               irte->lo.fields_remap.guest_mode = 0;
                modify_irte_ga(devid, index, irte, NULL);
        }
 }
index cbf7763..c8b0329 100644 (file)
@@ -1808,10 +1808,9 @@ IOMMU_INIT_POST(detect_intel_iommu);
  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
  * "Remapping Hardware Unit Hot Plug".
  */
-static u8 dmar_hp_uuid[] = {
-       /* 0000 */    0xA6, 0xA3, 0xC1, 0xD8, 0x9B, 0xBE, 0x9B, 0x4C,
-       /* 0008 */    0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF
-};
+static guid_t dmar_hp_guid =
+       GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
+                 0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
 
 /*
  * Currently there's only one revision and BIOS will not check the revision id,
@@ -1824,7 +1823,7 @@ static u8 dmar_hp_uuid[] = {
 
 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
 {
-       return acpi_check_dsm(handle, dmar_hp_uuid, DMAR_DSM_REV_ID, 1 << func);
+       return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
 }
 
 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
@@ -1843,7 +1842,7 @@ static int dmar_walk_dsm_resource(acpi_handle handle, int func,
        if (!dmar_detect_dsm(handle, func))
                return 0;
 
-       obj = acpi_evaluate_dsm_typed(handle, dmar_hp_uuid, DMAR_DSM_REV_ID,
+       obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
                                      func, NULL, ACPI_TYPE_BUFFER);
        if (!obj)
                return -ENODEV;
index eb7fbe1..929f855 100644 (file)
@@ -140,7 +140,7 @@ static inline void gic_map_to_vpe(unsigned int intr, unsigned int vpe)
 }
 
 #ifdef CONFIG_CLKSRC_MIPS_GIC
-u64 gic_read_count(void)
+u64 notrace gic_read_count(void)
 {
        unsigned int hi, hi2, lo;
 
@@ -167,7 +167,7 @@ unsigned int gic_get_count_width(void)
        return bits;
 }
 
-void gic_write_compare(u64 cnt)
+void notrace gic_write_compare(u64 cnt)
 {
        if (mips_cm_is64) {
                gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_COMPARE), cnt);
@@ -179,7 +179,7 @@ void gic_write_compare(u64 cnt)
        }
 }
 
-void gic_write_cpu_compare(u64 cnt, int cpu)
+void notrace gic_write_cpu_compare(u64 cnt, int cpu)
 {
        unsigned long flags;
 
index bb3ac5f..72a391e 100644 (file)
@@ -142,7 +142,7 @@ static struct irq_chip xtensa_mx_irq_chip = {
 int __init xtensa_mx_init_legacy(struct device_node *interrupt_parent)
 {
        struct irq_domain *root_domain =
-               irq_domain_add_legacy(NULL, NR_IRQS, 0, 0,
+               irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0,
                                &xtensa_mx_irq_domain_ops,
                                &xtensa_mx_irq_chip);
        irq_set_default_host(root_domain);
index 472ae17..f728755 100644 (file)
@@ -89,7 +89,7 @@ static struct irq_chip xtensa_irq_chip = {
 int __init xtensa_pic_init_legacy(struct device_node *interrupt_parent)
 {
        struct irq_domain *root_domain =
-               irq_domain_add_legacy(NULL, NR_IRQS, 0, 0,
+               irq_domain_add_legacy(NULL, NR_IRQS - 1, 1, 0,
                                &xtensa_irq_domain_ops, &xtensa_irq_chip);
        irq_set_default_host(root_domain);
        return 0;
index 1548259..2cfd938 100644 (file)
@@ -242,7 +242,7 @@ static int bcm6328_hwled(struct device *dev, struct device_node *nc, u32 reg,
 
                spin_lock_irqsave(lock, flags);
                val = bcm6328_led_read(addr);
-               val |= (BIT(reg) << (((sel % 4) * 4) + 16));
+               val |= (BIT(reg % 4) << (((sel % 4) * 4) + 16));
                bcm6328_led_write(addr, val);
                spin_unlock_irqrestore(lock, flags);
        }
@@ -269,7 +269,7 @@ static int bcm6328_hwled(struct device *dev, struct device_node *nc, u32 reg,
 
                spin_lock_irqsave(lock, flags);
                val = bcm6328_led_read(addr);
-               val |= (BIT(reg) << ((sel % 4) * 4));
+               val |= (BIT(reg % 4) << ((sel % 4) * 4));
                bcm6328_led_write(addr, val);
                spin_unlock_irqrestore(lock, flags);
        }
index afa3b40..e95ea65 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/sched/loadavg.h>
 #include <linux/leds.h>
 #include <linux/reboot.h>
-#include <linux/suspend.h>
 #include "../leds.h"
 
 static int panic_heartbeats;
@@ -163,30 +162,6 @@ static struct led_trigger heartbeat_led_trigger = {
        .deactivate = heartbeat_trig_deactivate,
 };
 
-static int heartbeat_pm_notifier(struct notifier_block *nb,
-                                unsigned long pm_event, void *unused)
-{
-       int rc;
-
-       switch (pm_event) {
-       case PM_SUSPEND_PREPARE:
-       case PM_HIBERNATION_PREPARE:
-       case PM_RESTORE_PREPARE:
-               led_trigger_unregister(&heartbeat_led_trigger);
-               break;
-       case PM_POST_SUSPEND:
-       case PM_POST_HIBERNATION:
-       case PM_POST_RESTORE:
-               rc = led_trigger_register(&heartbeat_led_trigger);
-               if (rc)
-                       pr_err("could not re-register heartbeat trigger\n");
-               break;
-       default:
-               break;
-       }
-       return NOTIFY_DONE;
-}
-
 static int heartbeat_reboot_notifier(struct notifier_block *nb,
                                     unsigned long code, void *unused)
 {
@@ -201,10 +176,6 @@ static int heartbeat_panic_notifier(struct notifier_block *nb,
        return NOTIFY_DONE;
 }
 
-static struct notifier_block heartbeat_pm_nb = {
-       .notifier_call = heartbeat_pm_notifier,
-};
-
 static struct notifier_block heartbeat_reboot_nb = {
        .notifier_call = heartbeat_reboot_notifier,
 };
@@ -221,14 +192,12 @@ static int __init heartbeat_trig_init(void)
                atomic_notifier_chain_register(&panic_notifier_list,
                                               &heartbeat_panic_nb);
                register_reboot_notifier(&heartbeat_reboot_nb);
-               register_pm_notifier(&heartbeat_pm_nb);
        }
        return rc;
 }
 
 static void __exit heartbeat_trig_exit(void)
 {
-       unregister_pm_notifier(&heartbeat_pm_nb);
        unregister_reboot_notifier(&heartbeat_reboot_nb);
        atomic_notifier_chain_unregister(&panic_notifier_list,
                                         &heartbeat_panic_nb);
index 6a4aa60..ddae430 100644 (file)
@@ -252,8 +252,9 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
        }
        mutex_unlock(&dev->mlock);
 
-       if (nvm_reserve_luns(dev, s->lun_begin, s->lun_end))
-               return -ENOMEM;
+       ret = nvm_reserve_luns(dev, s->lun_begin, s->lun_end);
+       if (ret)
+               return ret;
 
        t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
        if (!t) {
@@ -640,6 +641,7 @@ EXPORT_SYMBOL(nvm_max_phys_sects);
 int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 {
        struct nvm_dev *dev = tgt_dev->parent;
+       int ret;
 
        if (!dev->ops->submit_io)
                return -ENODEV;
@@ -647,7 +649,12 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
        nvm_rq_tgt_to_dev(tgt_dev, rqd);
 
        rqd->dev = tgt_dev;
-       return dev->ops->submit_io(dev, rqd);
+
+       /* In case of error, fail with right address format */
+       ret = dev->ops->submit_io(dev, rqd);
+       if (ret)
+               nvm_rq_dev_to_tgt(tgt_dev, rqd);
+       return ret;
 }
 EXPORT_SYMBOL(nvm_submit_io);
 
index 59bcea8..024a8fc 100644 (file)
@@ -31,9 +31,13 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
         */
 retry:
        ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
-       if (ret == NVM_IO_REQUEUE) {
+       switch (ret) {
+       case NVM_IO_REQUEUE:
                io_schedule();
                goto retry;
+       case NVM_IO_ERR:
+               pblk_pipeline_stop(pblk);
+               goto out;
        }
 
        if (unlikely(!bio_has_data(bio)))
@@ -58,6 +62,8 @@ retry:
        atomic_long_add(nr_entries, &pblk->req_writes);
 #endif
 
+       pblk_rl_inserted(&pblk->rl, nr_entries);
+
 out:
        pblk_write_should_kick(pblk);
        return ret;
index 5e44768..11fe0c5 100644 (file)
@@ -17,7 +17,6 @@
  */
 
 #include "pblk.h"
-#include <linux/time.h>
 
 static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
                         struct ppa_addr *ppa)
@@ -34,7 +33,7 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
                pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
                                                        line->id, pos);
 
-       pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb);
+       pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, pblk->bb_wq);
 }
 
 static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
@@ -54,6 +53,8 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
                *ppa = rqd->ppa_addr;
                pblk_mark_bb(pblk, line, ppa);
        }
+
+       atomic_dec(&pblk->inflight_io);
 }
 
 /* Erase completion assumes that only one block is erased at the time */
@@ -61,13 +62,12 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
 {
        struct pblk *pblk = rqd->private;
 
-       up(&pblk->erase_sem);
        __pblk_end_io_erase(pblk, rqd);
-       mempool_free(rqd, pblk->r_rq_pool);
+       mempool_free(rqd, pblk->g_rq_pool);
 }
 
-static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
-                                 u64 paddr)
+void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+                          u64 paddr)
 {
        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        struct list_head *move_list = NULL;
@@ -88,7 +88,7 @@ static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
                spin_unlock(&line->lock);
                return;
        }
-       line->vsc--;
+       le32_add_cpu(line->vsc, -1);
 
        if (line->state == PBLK_LINESTATE_CLOSED)
                move_list = pblk_line_gc_list(pblk, line);
@@ -130,18 +130,6 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
        __pblk_map_invalidate(pblk, line, paddr);
 }
 
-void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
-                            u64 paddr)
-{
-       __pblk_map_invalidate(pblk, line, paddr);
-
-       pblk_rb_sync_init(&pblk->rwb, NULL);
-       line->left_ssecs--;
-       if (!line->left_ssecs)
-               pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
-       pblk_rb_sync_end(&pblk->rwb, NULL);
-}
-
 static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
                                  unsigned int nr_secs)
 {
@@ -172,8 +160,8 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
                pool = pblk->w_rq_pool;
                rq_size = pblk_w_rq_size;
        } else {
-               pool = pblk->r_rq_pool;
-               rq_size = pblk_r_rq_size;
+               pool = pblk->g_rq_pool;
+               rq_size = pblk_g_rq_size;
        }
 
        rqd = mempool_alloc(pool, GFP_KERNEL);
@@ -189,7 +177,7 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
        if (rw == WRITE)
                pool = pblk->w_rq_pool;
        else
-               pool = pblk->r_rq_pool;
+               pool = pblk->g_rq_pool;
 
        mempool_free(rqd, pool);
 }
@@ -271,35 +259,26 @@ void pblk_end_io_sync(struct nvm_rq *rqd)
        complete(waiting);
 }
 
-void pblk_flush_writer(struct pblk *pblk)
+void pblk_wait_for_meta(struct pblk *pblk)
 {
-       struct bio *bio;
-       int ret;
-       DECLARE_COMPLETION_ONSTACK(wait);
-
-       bio = bio_alloc(GFP_KERNEL, 1);
-       if (!bio)
-               return;
-
-       bio->bi_iter.bi_sector = 0; /* internal bio */
-       bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
-       bio->bi_private = &wait;
-       bio->bi_end_io = pblk_end_bio_sync;
+       do {
+               if (!atomic_read(&pblk->inflight_io))
+                       break;
 
-       ret = pblk_write_to_cache(pblk, bio, 0);
-       if (ret == NVM_IO_OK) {
-               if (!wait_for_completion_io_timeout(&wait,
-                               msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
-                       pr_err("pblk: flush cache timed out\n");
-               }
-       } else if (ret != NVM_IO_DONE) {
-               pr_err("pblk: tear down bio failed\n");
-       }
+               schedule();
+       } while (1);
+}
 
-       if (bio->bi_error)
-               pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error);
+static void pblk_flush_writer(struct pblk *pblk)
+{
+       pblk_rb_flush(&pblk->rwb);
+       do {
+               if (!pblk_rb_sync_count(&pblk->rwb))
+                       break;
 
-       bio_put(bio);
+               pblk_write_kick(pblk);
+               schedule();
+       } while (1);
 }
 
 struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
@@ -307,28 +286,31 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
        struct pblk_line_meta *lm = &pblk->lm;
        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        struct list_head *move_list = NULL;
+       int vsc = le32_to_cpu(*line->vsc);
 
-       if (!line->vsc) {
+       lockdep_assert_held(&line->lock);
+
+       if (!vsc) {
                if (line->gc_group != PBLK_LINEGC_FULL) {
                        line->gc_group = PBLK_LINEGC_FULL;
                        move_list = &l_mg->gc_full_list;
                }
-       } else if (line->vsc < lm->mid_thrs) {
+       } else if (vsc < lm->high_thrs) {
                if (line->gc_group != PBLK_LINEGC_HIGH) {
                        line->gc_group = PBLK_LINEGC_HIGH;
                        move_list = &l_mg->gc_high_list;
                }
-       } else if (line->vsc < lm->high_thrs) {
+       } else if (vsc < lm->mid_thrs) {
                if (line->gc_group != PBLK_LINEGC_MID) {
                        line->gc_group = PBLK_LINEGC_MID;
                        move_list = &l_mg->gc_mid_list;
                }
-       } else if (line->vsc < line->sec_in_line) {
+       } else if (vsc < line->sec_in_line) {
                if (line->gc_group != PBLK_LINEGC_LOW) {
                        line->gc_group = PBLK_LINEGC_LOW;
                        move_list = &l_mg->gc_low_list;
                }
-       } else if (line->vsc == line->sec_in_line) {
+       } else if (vsc == line->sec_in_line) {
                if (line->gc_group != PBLK_LINEGC_EMPTY) {
                        line->gc_group = PBLK_LINEGC_EMPTY;
                        move_list = &l_mg->gc_empty_list;
@@ -338,7 +320,7 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
                line->gc_group = PBLK_LINEGC_NONE;
                move_list =  &l_mg->corrupt_list;
                pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
-                                               line->id, line->vsc,
+                                               line->id, vsc,
                                                line->sec_in_line,
                                                lm->high_thrs, lm->mid_thrs);
        }
@@ -397,6 +379,11 @@ void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
 #endif
 }
 
+void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
+{
+       pblk->sec_per_write = sec_per_write;
+}
+
 int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
 {
        struct nvm_tgt_dev *dev = pblk->dev;
@@ -431,21 +418,23 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
                }
        }
 #endif
+
+       atomic_inc(&pblk->inflight_io);
+
        return nvm_submit_io(dev, rqd);
 }
 
 struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
                              unsigned int nr_secs, unsigned int len,
-                             gfp_t gfp_mask)
+                             int alloc_type, gfp_t gfp_mask)
 {
        struct nvm_tgt_dev *dev = pblk->dev;
-       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        void *kaddr = data;
        struct page *page;
        struct bio *bio;
        int i, ret;
 
-       if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META)
+       if (alloc_type == PBLK_KMALLOC_META)
                return bio_map_kern(dev->q, kaddr, len, gfp_mask);
 
        bio = bio_kmalloc(gfp_mask, nr_secs);
@@ -478,7 +467,7 @@ out:
 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
                   unsigned long secs_to_flush)
 {
-       int max = pblk->max_write_pgs;
+       int max = pblk->sec_per_write;
        int min = pblk->min_write_pgs;
        int secs_to_sync = 0;
 
@@ -492,12 +481,26 @@ int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
        return secs_to_sync;
 }
 
-static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line,
-                            int nr_secs)
+void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
+{
+       u64 addr;
+       int i;
+
+       addr = find_next_zero_bit(line->map_bitmap,
+                                       pblk->lm.sec_per_line, line->cur_sec);
+       line->cur_sec = addr - nr_secs;
+
+       for (i = 0; i < nr_secs; i++, line->cur_sec--)
+               WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
+}
+
+u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
 {
        u64 addr;
        int i;
 
+       lockdep_assert_held(&line->lock);
+
        /* logic error: ppa out-of-bounds. Prevent generating bad address */
        if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
                WARN(1, "pblk: page allocation out of bounds\n");
@@ -528,27 +531,38 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
        return addr;
 }
 
+u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
+{
+       u64 paddr;
+
+       spin_lock(&line->lock);
+       paddr = find_next_zero_bit(line->map_bitmap,
+                                       pblk->lm.sec_per_line, line->cur_sec);
+       spin_unlock(&line->lock);
+
+       return paddr;
+}
+
 /*
  * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
  * taking the per LUN semaphore.
  */
 static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
-                                    u64 paddr, int dir)
+                                    void *emeta_buf, u64 paddr, int dir)
 {
        struct nvm_tgt_dev *dev = pblk->dev;
        struct nvm_geo *geo = &dev->geo;
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        struct pblk_line_meta *lm = &pblk->lm;
+       void *ppa_list, *meta_list;
        struct bio *bio;
        struct nvm_rq rqd;
-       struct ppa_addr *ppa_list;
-       dma_addr_t dma_ppa_list;
-       void *emeta = line->emeta;
+       dma_addr_t dma_ppa_list, dma_meta_list;
        int min = pblk->min_write_pgs;
-       int left_ppas = lm->emeta_sec;
+       int left_ppas = lm->emeta_sec[0];
        int id = line->id;
        int rq_ppas, rq_len;
        int cmd_op, bio_op;
-       int flags;
        int i, j;
        int ret;
        DECLARE_COMPLETION_ONSTACK(wait);
@@ -556,25 +570,28 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
        if (dir == WRITE) {
                bio_op = REQ_OP_WRITE;
                cmd_op = NVM_OP_PWRITE;
-               flags = pblk_set_progr_mode(pblk, WRITE);
        } else if (dir == READ) {
                bio_op = REQ_OP_READ;
                cmd_op = NVM_OP_PREAD;
-               flags = pblk_set_read_mode(pblk);
        } else
                return -EINVAL;
 
-       ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_ppa_list);
-       if (!ppa_list)
+       meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+                                                       &dma_meta_list);
+       if (!meta_list)
                return -ENOMEM;
 
+       ppa_list = meta_list + pblk_dma_meta_size;
+       dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
 next_rq:
        memset(&rqd, 0, sizeof(struct nvm_rq));
 
        rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
        rq_len = rq_ppas * geo->sec_size;
 
-       bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL);
+       bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
+                                       l_mg->emeta_alloc_type, GFP_KERNEL);
        if (IS_ERR(bio)) {
                ret = PTR_ERR(bio);
                goto free_rqd_dma;
@@ -584,27 +601,38 @@ next_rq:
        bio_set_op_attrs(bio, bio_op, 0);
 
        rqd.bio = bio;
-       rqd.opcode = cmd_op;
-       rqd.flags = flags;
-       rqd.nr_ppas = rq_ppas;
+       rqd.meta_list = meta_list;
        rqd.ppa_list = ppa_list;
+       rqd.dma_meta_list = dma_meta_list;
        rqd.dma_ppa_list = dma_ppa_list;
+       rqd.opcode = cmd_op;
+       rqd.nr_ppas = rq_ppas;
        rqd.end_io = pblk_end_io_sync;
        rqd.private = &wait;
 
        if (dir == WRITE) {
+               struct pblk_sec_meta *meta_list = rqd.meta_list;
+
+               rqd.flags = pblk_set_progr_mode(pblk, WRITE);
                for (i = 0; i < rqd.nr_ppas; ) {
                        spin_lock(&line->lock);
                        paddr = __pblk_alloc_page(pblk, line, min);
                        spin_unlock(&line->lock);
-                       for (j = 0; j < min; j++, i++, paddr++)
+                       for (j = 0; j < min; j++, i++, paddr++) {
+                               meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
                                rqd.ppa_list[i] =
                                        addr_to_gen_ppa(pblk, paddr, id);
+                       }
                }
        } else {
                for (i = 0; i < rqd.nr_ppas; ) {
                        struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
                        int pos = pblk_dev_ppa_to_pos(geo, ppa);
+                       int read_type = PBLK_READ_RANDOM;
+
+                       if (pblk_io_aligned(pblk, rq_ppas))
+                               read_type = PBLK_READ_SEQUENTIAL;
+                       rqd.flags = pblk_set_read_mode(pblk, read_type);
 
                        while (test_bit(pos, line->blk_bitmap)) {
                                paddr += min;
@@ -645,9 +673,11 @@ next_rq:
                                msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
                pr_err("pblk: emeta I/O timed out\n");
        }
+       atomic_dec(&pblk->inflight_io);
        reinit_completion(&wait);
 
-       bio_put(bio);
+       if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META))
+               bio_put(bio);
 
        if (rqd.error) {
                if (dir == WRITE)
@@ -656,12 +686,12 @@ next_rq:
                        pblk_log_read_err(pblk, &rqd);
        }
 
-       emeta += rq_len;
+       emeta_buf += rq_len;
        left_ppas -= rq_ppas;
        if (left_ppas)
                goto next_rq;
 free_rqd_dma:
-       nvm_dev_dma_free(dev->parent, ppa_list, dma_ppa_list);
+       nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
        return ret;
 }
 
@@ -697,21 +727,24 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
                bio_op = REQ_OP_WRITE;
                cmd_op = NVM_OP_PWRITE;
                flags = pblk_set_progr_mode(pblk, WRITE);
-               lba_list = pblk_line_emeta_to_lbas(line->emeta);
+               lba_list = emeta_to_lbas(pblk, line->emeta->buf);
        } else if (dir == READ) {
                bio_op = REQ_OP_READ;
                cmd_op = NVM_OP_PREAD;
-               flags = pblk_set_read_mode(pblk);
+               flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
        } else
                return -EINVAL;
 
        memset(&rqd, 0, sizeof(struct nvm_rq));
 
-       rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
-                                                       &rqd.dma_ppa_list);
-       if (!rqd.ppa_list)
+       rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+                                                       &rqd.dma_meta_list);
+       if (!rqd.meta_list)
                return -ENOMEM;
 
+       rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
+       rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
+
        bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
        if (IS_ERR(bio)) {
                ret = PTR_ERR(bio);
@@ -729,9 +762,15 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
        rqd.private = &wait;
 
        for (i = 0; i < lm->smeta_sec; i++, paddr++) {
+               struct pblk_sec_meta *meta_list = rqd.meta_list;
+
                rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
-               if (dir == WRITE)
-                       lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
+
+               if (dir == WRITE) {
+                       __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
+                       meta_list[i].lba = lba_list[paddr] = addr_empty;
+               }
        }
 
        /*
@@ -750,6 +789,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
                                msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
                pr_err("pblk: smeta I/O timed out\n");
        }
+       atomic_dec(&pblk->inflight_io);
 
        if (rqd.error) {
                if (dir == WRITE)
@@ -759,7 +799,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
        }
 
 free_ppa_list:
-       nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+       nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
 
        return ret;
 }
@@ -771,9 +811,11 @@ int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
        return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
 }
 
-int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line)
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
+                        void *emeta_buf)
 {
-       return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ);
+       return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
+                                               line->emeta_ssec, READ);
 }
 
 static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -789,7 +831,7 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
 static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
 {
        struct nvm_rq rqd;
-       int ret;
+       int ret = 0;
        DECLARE_COMPLETION_ONSTACK(wait);
 
        memset(&rqd, 0, sizeof(struct nvm_rq));
@@ -824,14 +866,14 @@ out:
        rqd.private = pblk;
        __pblk_end_io_erase(pblk, &rqd);
 
-       return 0;
+       return ret;
 }
 
 int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
 {
        struct pblk_line_meta *lm = &pblk->lm;
        struct ppa_addr ppa;
-       int bit = -1;
+       int ret, bit = -1;
 
        /* Erase only good blocks, one at a time */
        do {
@@ -850,27 +892,59 @@ int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
                WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
                spin_unlock(&line->lock);
 
-               if (pblk_blk_erase_sync(pblk, ppa)) {
+               ret = pblk_blk_erase_sync(pblk, ppa);
+               if (ret) {
                        pr_err("pblk: failed to erase line %d\n", line->id);
-                       return -ENOMEM;
+                       return ret;
                }
        } while (1);
 
        return 0;
 }
 
+static void pblk_line_setup_metadata(struct pblk_line *line,
+                                    struct pblk_line_mgmt *l_mg,
+                                    struct pblk_line_meta *lm)
+{
+       int meta_line;
+
+       lockdep_assert_held(&l_mg->free_lock);
+
+retry_meta:
+       meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+       if (meta_line == PBLK_DATA_LINES) {
+               spin_unlock(&l_mg->free_lock);
+               io_schedule();
+               spin_lock(&l_mg->free_lock);
+               goto retry_meta;
+       }
+
+       set_bit(meta_line, &l_mg->meta_bitmap);
+       line->meta_line = meta_line;
+
+       line->smeta = l_mg->sline_meta[meta_line];
+       line->emeta = l_mg->eline_meta[meta_line];
+
+       memset(line->smeta, 0, lm->smeta_len);
+       memset(line->emeta->buf, 0, lm->emeta_len[0]);
+
+       line->emeta->mem = 0;
+       atomic_set(&line->emeta->sync, 0);
+}
+
 /* For now lines are always assumed full lines. Thus, smeta former and current
  * lun bitmaps are omitted.
  */
-static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
+static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
                                  struct pblk_line *cur)
 {
        struct nvm_tgt_dev *dev = pblk->dev;
        struct nvm_geo *geo = &dev->geo;
        struct pblk_line_meta *lm = &pblk->lm;
        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-       struct line_smeta *smeta = line->smeta;
-       struct line_emeta *emeta = line->emeta;
+       struct pblk_emeta *emeta = line->emeta;
+       struct line_emeta *emeta_buf = emeta->buf;
+       struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
        int nr_blk_line;
 
        /* After erasing the line, new bad blocks might appear and we risk
@@ -893,42 +967,44 @@ static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
        }
 
        /* Run-time metadata */
-       line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta);
+       line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);
 
        /* Mark LUNs allocated in this line (all for now) */
        bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
 
-       smeta->header.identifier = cpu_to_le32(PBLK_MAGIC);
-       memcpy(smeta->header.uuid, pblk->instance_uuid, 16);
-       smeta->header.id = cpu_to_le32(line->id);
-       smeta->header.type = cpu_to_le16(line->type);
-       smeta->header.version = cpu_to_le16(1);
+       smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
+       memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
+       smeta_buf->header.id = cpu_to_le32(line->id);
+       smeta_buf->header.type = cpu_to_le16(line->type);
+       smeta_buf->header.version = cpu_to_le16(1);
 
        /* Start metadata */
-       smeta->seq_nr = cpu_to_le64(line->seq_nr);
-       smeta->window_wr_lun = cpu_to_le32(geo->nr_luns);
+       smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
+       smeta_buf->window_wr_lun = cpu_to_le32(geo->nr_luns);
 
        /* Fill metadata among lines */
        if (cur) {
                memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
-               smeta->prev_id = cpu_to_le32(cur->id);
-               cur->emeta->next_id = cpu_to_le32(line->id);
+               smeta_buf->prev_id = cpu_to_le32(cur->id);
+               cur->emeta->buf->next_id = cpu_to_le32(line->id);
        } else {
-               smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
+               smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
        }
 
        /* All smeta must be set at this point */
-       smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta));
-       smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta));
+       smeta_buf->header.crc = cpu_to_le32(
+                       pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
+       smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));
 
        /* End metadata */
-       memcpy(&emeta->header, &smeta->header, sizeof(struct line_header));
-       emeta->seq_nr = cpu_to_le64(line->seq_nr);
-       emeta->nr_lbas = cpu_to_le64(line->sec_in_line);
-       emeta->nr_valid_lbas = cpu_to_le64(0);
-       emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
-       emeta->crc = cpu_to_le32(0);
-       emeta->prev_id = smeta->prev_id;
+       memcpy(&emeta_buf->header, &smeta_buf->header,
+                                               sizeof(struct line_header));
+       emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
+       emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
+       emeta_buf->nr_valid_lbas = cpu_to_le64(0);
+       emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
+       emeta_buf->crc = cpu_to_le32(0);
+       emeta_buf->prev_id = smeta_buf->prev_id;
 
        return 1;
 }
@@ -965,7 +1041,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
        /* Mark smeta metadata sectors as bad sectors */
        bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
        off = bit * geo->sec_per_pl;
-retry_smeta:
        bitmap_set(line->map_bitmap, off, lm->smeta_sec);
        line->sec_in_line -= lm->smeta_sec;
        line->smeta_ssec = off;
@@ -973,8 +1048,7 @@ retry_smeta:
 
        if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
                pr_debug("pblk: line smeta I/O failed. Retry\n");
-               off += geo->sec_per_pl;
-               goto retry_smeta;
+               return 1;
        }
 
        bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
@@ -983,8 +1057,8 @@ retry_smeta:
         * blocks to make sure that there are enough sectors to store emeta
         */
        bit = lm->sec_per_line;
-       off = lm->sec_per_line - lm->emeta_sec;
-       bitmap_set(line->invalid_bitmap, off, lm->emeta_sec);
+       off = lm->sec_per_line - lm->emeta_sec[0];
+       bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
        while (nr_bb) {
                off -= geo->sec_per_pl;
                if (!test_bit(off, line->invalid_bitmap)) {
@@ -993,9 +1067,11 @@ retry_smeta:
                }
        }
 
-       line->sec_in_line -= lm->emeta_sec;
+       line->sec_in_line -= lm->emeta_sec[0];
        line->emeta_ssec = off;
-       line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line;
+       line->nr_valid_lbas = 0;
+       line->left_msecs = line->sec_in_line;
+       *line->vsc = cpu_to_le32(line->sec_in_line);
 
        if (lm->sec_per_line - line->sec_in_line !=
                bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
@@ -1034,14 +1110,20 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
 
        spin_lock(&line->lock);
        if (line->state != PBLK_LINESTATE_FREE) {
+               mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
+               mempool_free(line->map_bitmap, pblk->line_meta_pool);
                spin_unlock(&line->lock);
-               WARN(1, "pblk: corrupted line state\n");
-               return -EINTR;
+               WARN(1, "pblk: corrupted line %d, state %d\n",
+                                                       line->id, line->state);
+               return -EAGAIN;
        }
+
        line->state = PBLK_LINESTATE_OPEN;
 
        atomic_set(&line->left_eblks, blk_in_line);
        atomic_set(&line->left_seblks, blk_in_line);
+
+       line->meta_distance = lm->meta_distance;
        spin_unlock(&line->lock);
 
        /* Bad blocks do not need to be erased */
@@ -1091,15 +1173,15 @@ struct pblk_line *pblk_line_get(struct pblk *pblk)
 {
        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        struct pblk_line_meta *lm = &pblk->lm;
-       struct pblk_line *line = NULL;
-       int bit;
+       struct pblk_line *line;
+       int ret, bit;
 
        lockdep_assert_held(&l_mg->free_lock);
 
-retry_get:
+retry:
        if (list_empty(&l_mg->free_list)) {
                pr_err("pblk: no free lines\n");
-               goto out;
+               return NULL;
        }
 
        line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
@@ -1115,16 +1197,22 @@ retry_get:
                list_add_tail(&line->list, &l_mg->bad_list);
 
                pr_debug("pblk: line %d is bad\n", line->id);
-               goto retry_get;
+               goto retry;
        }
 
-       if (pblk_line_prepare(pblk, line)) {
-               pr_err("pblk: failed to prepare line %d\n", line->id);
-               list_add(&line->list, &l_mg->free_list);
-               return NULL;
+       ret = pblk_line_prepare(pblk, line);
+       if (ret) {
+               if (ret == -EAGAIN) {
+                       list_add(&line->list, &l_mg->corrupt_list);
+                       goto retry;
+               } else {
+                       pr_err("pblk: failed to prepare line %d\n", line->id);
+                       list_add(&line->list, &l_mg->free_list);
+                       l_mg->nr_free_lines++;
+                       return NULL;
+               }
        }
 
-out:
        return line;
 }
 
@@ -1134,6 +1222,7 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        struct pblk_line *retry_line;
 
+retry:
        spin_lock(&l_mg->free_lock);
        retry_line = pblk_line_get(pblk);
        if (!retry_line) {
@@ -1150,23 +1239,25 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
        l_mg->data_line = retry_line;
        spin_unlock(&l_mg->free_lock);
 
-       if (pblk_line_erase(pblk, retry_line)) {
-               spin_lock(&l_mg->free_lock);
-               l_mg->data_line = NULL;
-               spin_unlock(&l_mg->free_lock);
-               return NULL;
-       }
-
        pblk_rl_free_lines_dec(&pblk->rl, retry_line);
 
+       if (pblk_line_erase(pblk, retry_line))
+               goto retry;
+
        return retry_line;
 }
 
+static void pblk_set_space_limit(struct pblk *pblk)
+{
+       struct pblk_rl *rl = &pblk->rl;
+
+       atomic_set(&rl->rb_space, 0);
+}
+
 struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
 {
        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        struct pblk_line *line;
-       int meta_line;
        int is_next = 0;
 
        spin_lock(&l_mg->free_lock);
@@ -1180,30 +1271,37 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
        line->type = PBLK_LINETYPE_DATA;
        l_mg->data_line = line;
 
-       meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
-       set_bit(meta_line, &l_mg->meta_bitmap);
-       line->smeta = l_mg->sline_meta[meta_line].meta;
-       line->emeta = l_mg->eline_meta[meta_line].meta;
-       line->meta_line = meta_line;
+       pblk_line_setup_metadata(line, l_mg, &pblk->lm);
 
        /* Allocate next line for preparation */
        l_mg->data_next = pblk_line_get(pblk);
-       if (l_mg->data_next) {
+       if (!l_mg->data_next) {
+               /* If we cannot get a new line, we need to stop the pipeline.
+                * Only allow as many writes in as we can store safely and then
+                * fail gracefully
+                */
+               pblk_set_space_limit(pblk);
+
+               l_mg->data_next = NULL;
+       } else {
                l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
                l_mg->data_next->type = PBLK_LINETYPE_DATA;
                is_next = 1;
        }
        spin_unlock(&l_mg->free_lock);
 
+       if (pblk_line_erase(pblk, line)) {
+               line = pblk_line_retry(pblk, line);
+               if (!line)
+                       return NULL;
+       }
+
        pblk_rl_free_lines_dec(&pblk->rl, line);
        if (is_next)
                pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
 
-       if (pblk_line_erase(pblk, line))
-               return NULL;
-
 retry_setup:
-       if (!pblk_line_set_metadata(pblk, line, NULL)) {
+       if (!pblk_line_init_metadata(pblk, line, NULL)) {
                line = pblk_line_retry(pblk, line);
                if (!line)
                        return NULL;
@@ -1222,69 +1320,89 @@ retry_setup:
        return line;
 }
 
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
+static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line)
+{
+       lockdep_assert_held(&pblk->l_mg.free_lock);
+
+       pblk_set_space_limit(pblk);
+       pblk->state = PBLK_STATE_STOPPING;
+}
+
+void pblk_pipeline_stop(struct pblk *pblk)
+{
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       int ret;
+
+       spin_lock(&l_mg->free_lock);
+       if (pblk->state == PBLK_STATE_RECOVERING ||
+                                       pblk->state == PBLK_STATE_STOPPED) {
+               spin_unlock(&l_mg->free_lock);
+               return;
+       }
+       pblk->state = PBLK_STATE_RECOVERING;
+       spin_unlock(&l_mg->free_lock);
+
+       pblk_flush_writer(pblk);
+       pblk_wait_for_meta(pblk);
+
+       ret = pblk_recov_pad(pblk);
+       if (ret) {
+               pr_err("pblk: could not close data on teardown(%d)\n", ret);
+               return;
+       }
+
+       flush_workqueue(pblk->bb_wq);
+       pblk_line_close_meta_sync(pblk);
+
+       spin_lock(&l_mg->free_lock);
+       pblk->state = PBLK_STATE_STOPPED;
+       l_mg->data_line = NULL;
+       l_mg->data_next = NULL;
+       spin_unlock(&l_mg->free_lock);
+}
+
+void pblk_line_replace_data(struct pblk *pblk)
 {
-       struct pblk_line_meta *lm = &pblk->lm;
        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        struct pblk_line *cur, *new;
        unsigned int left_seblks;
-       int meta_line;
        int is_next = 0;
 
        cur = l_mg->data_line;
        new = l_mg->data_next;
        if (!new)
-               return NULL;
+               return;
        l_mg->data_line = new;
 
-retry_line:
+       spin_lock(&l_mg->free_lock);
+       if (pblk->state != PBLK_STATE_RUNNING) {
+               l_mg->data_line = NULL;
+               l_mg->data_next = NULL;
+               spin_unlock(&l_mg->free_lock);
+               return;
+       }
+
+       pblk_line_setup_metadata(new, l_mg, &pblk->lm);
+       spin_unlock(&l_mg->free_lock);
+
+retry_erase:
        left_seblks = atomic_read(&new->left_seblks);
        if (left_seblks) {
                /* If line is not fully erased, erase it */
                if (atomic_read(&new->left_eblks)) {
                        if (pblk_line_erase(pblk, new))
-                               return NULL;
+                               return;
                } else {
                        io_schedule();
                }
-               goto retry_line;
+               goto retry_erase;
        }
 
-       spin_lock(&l_mg->free_lock);
-       /* Allocate next line for preparation */
-       l_mg->data_next = pblk_line_get(pblk);
-       if (l_mg->data_next) {
-               l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
-               l_mg->data_next->type = PBLK_LINETYPE_DATA;
-               is_next = 1;
-       }
-
-retry_meta:
-       meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
-       if (meta_line == PBLK_DATA_LINES) {
-               spin_unlock(&l_mg->free_lock);
-               io_schedule();
-               spin_lock(&l_mg->free_lock);
-               goto retry_meta;
-       }
-
-       set_bit(meta_line, &l_mg->meta_bitmap);
-       new->smeta = l_mg->sline_meta[meta_line].meta;
-       new->emeta = l_mg->eline_meta[meta_line].meta;
-       new->meta_line = meta_line;
-
-       memset(new->smeta, 0, lm->smeta_len);
-       memset(new->emeta, 0, lm->emeta_len);
-       spin_unlock(&l_mg->free_lock);
-
-       if (is_next)
-               pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
-
 retry_setup:
-       if (!pblk_line_set_metadata(pblk, new, cur)) {
+       if (!pblk_line_init_metadata(pblk, new, cur)) {
                new = pblk_line_retry(pblk, new);
                if (!new)
-                       return NULL;
+                       return;
 
                goto retry_setup;
        }
@@ -1292,12 +1410,30 @@ retry_setup:
        if (!pblk_line_init_bb(pblk, new, 1)) {
                new = pblk_line_retry(pblk, new);
                if (!new)
-                       return NULL;
+                       return;
 
                goto retry_setup;
        }
 
-       return new;
+       /* Allocate next line for preparation */
+       spin_lock(&l_mg->free_lock);
+       l_mg->data_next = pblk_line_get(pblk);
+       if (!l_mg->data_next) {
+               /* If we cannot get a new line, we need to stop the pipeline.
+                * Only allow as many writes in as we can store safely and then
+                * fail gracefully
+                */
+               pblk_stop_writes(pblk, new);
+               l_mg->data_next = NULL;
+       } else {
+               l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
+               l_mg->data_next->type = PBLK_LINETYPE_DATA;
+               is_next = 1;
+       }
+       spin_unlock(&l_mg->free_lock);
+
+       if (is_next)
+               pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
 }
 
 void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
@@ -1307,6 +1443,8 @@ void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
        if (line->invalid_bitmap)
                mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
 
+       *line->vsc = cpu_to_le32(EMPTY_ENTRY);
+
        line->map_bitmap = NULL;
        line->invalid_bitmap = NULL;
        line->smeta = NULL;
@@ -1339,8 +1477,8 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
        struct nvm_rq *rqd;
        int err;
 
-       rqd = mempool_alloc(pblk->r_rq_pool, GFP_KERNEL);
-       memset(rqd, 0, pblk_r_rq_size);
+       rqd = mempool_alloc(pblk->g_rq_pool, GFP_KERNEL);
+       memset(rqd, 0, pblk_g_rq_size);
 
        pblk_setup_e_rq(pblk, rqd, ppa);
 
@@ -1368,7 +1506,8 @@ struct pblk_line *pblk_line_get_data(struct pblk *pblk)
        return pblk->l_mg.data_line;
 }
 
-struct pblk_line *pblk_line_get_data_next(struct pblk *pblk)
+/* For now, always erase next line */
+struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
 {
        return pblk->l_mg.data_next;
 }
@@ -1378,18 +1517,58 @@ int pblk_line_is_full(struct pblk_line *line)
        return (line->left_msecs == 0);
 }
 
+void pblk_line_close_meta_sync(struct pblk *pblk)
+{
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_line *line, *tline;
+       LIST_HEAD(list);
+
+       spin_lock(&l_mg->close_lock);
+       if (list_empty(&l_mg->emeta_list)) {
+               spin_unlock(&l_mg->close_lock);
+               return;
+       }
+
+       list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev);
+       spin_unlock(&l_mg->close_lock);
+
+       list_for_each_entry_safe(line, tline, &list, list) {
+               struct pblk_emeta *emeta = line->emeta;
+
+               while (emeta->mem < lm->emeta_len[0]) {
+                       int ret;
+
+                       ret = pblk_submit_meta_io(pblk, line);
+                       if (ret) {
+                               pr_err("pblk: sync meta line %d failed (%d)\n",
+                                                       line->id, ret);
+                               return;
+                       }
+               }
+       }
+
+       pblk_wait_for_meta(pblk);
+       flush_workqueue(pblk->close_wq);
+}
+
+static void pblk_line_should_sync_meta(struct pblk *pblk)
+{
+       if (pblk_rl_is_limit(&pblk->rl))
+               pblk_line_close_meta_sync(pblk);
+}
+
 void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
 {
        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        struct list_head *move_list;
 
-       line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta));
-
-       if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE))
-               pr_err("pblk: line %d close I/O failed\n", line->id);
+#ifdef CONFIG_NVM_DEBUG
+       struct pblk_line_meta *lm = &pblk->lm;
 
-       WARN(!bitmap_full(line->map_bitmap, line->sec_in_line),
+       WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
                                "pblk: corrupt closed line %d\n", line->id);
+#endif
 
        spin_lock(&l_mg->free_lock);
        WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
@@ -1410,6 +1589,31 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
 
        spin_unlock(&line->lock);
        spin_unlock(&l_mg->gc_lock);
+
+       pblk_gc_should_kick(pblk);
+}
+
+void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
+{
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_emeta *emeta = line->emeta;
+       struct line_emeta *emeta_buf = emeta->buf;
+
+       /* No need for exact vsc value; avoid a big line lock and take aprox. */
+       memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
+       memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
+
+       emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
+       emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
+
+       spin_lock(&l_mg->close_lock);
+       spin_lock(&line->lock);
+       list_add_tail(&line->list, &l_mg->emeta_list);
+       spin_unlock(&line->lock);
+       spin_unlock(&l_mg->close_lock);
+
+       pblk_line_should_sync_meta(pblk);
 }
 
 void pblk_line_close_ws(struct work_struct *work)
@@ -1449,7 +1653,8 @@ void pblk_line_mark_bb(struct work_struct *work)
 }
 
 void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
-                     void (*work)(struct work_struct *))
+                     void (*work)(struct work_struct *),
+                     struct workqueue_struct *wq)
 {
        struct pblk_line_ws *line_ws;
 
@@ -1462,7 +1667,7 @@ void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
        line_ws->priv = priv;
 
        INIT_WORK(&line_ws->ws, work);
-       queue_work(pblk->kw_wq, &line_ws->ws);
+       queue_work(wq, &line_ws->ws);
 }
 
 void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
@@ -1471,7 +1676,7 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
        struct nvm_tgt_dev *dev = pblk->dev;
        struct nvm_geo *geo = &dev->geo;
        struct pblk_lun *rlun;
-       int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun;
+       int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
        int ret;
 
        /*
@@ -1488,10 +1693,10 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
        /* If the LUN has been locked for this same request, do no attempt to
         * lock it again
         */
-       if (test_and_set_bit(lun_id, lun_bitmap))
+       if (test_and_set_bit(pos, lun_bitmap))
                return;
 
-       rlun = &pblk->luns[lun_id];
+       rlun = &pblk->luns[pos];
        ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
        if (ret) {
                switch (ret) {
index eaf479c..6090d28 100644 (file)
@@ -20,8 +20,7 @@
 
 static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
 {
-       kfree(gc_rq->data);
-       kfree(gc_rq->lba_list);
+       vfree(gc_rq->data);
        kfree(gc_rq);
 }
 
@@ -37,10 +36,8 @@ static int pblk_gc_write(struct pblk *pblk)
                return 1;
        }
 
-       list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) {
-               list_move_tail(&gc_rq->list, &w_list);
-               gc->w_entries--;
-       }
+       list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
+       gc->w_entries = 0;
        spin_unlock(&gc->w_lock);
 
        list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
@@ -48,9 +45,8 @@ static int pblk_gc_write(struct pblk *pblk)
                                gc_rq->nr_secs, gc_rq->secs_to_gc,
                                gc_rq->line, PBLK_IOTYPE_GC);
 
-               kref_put(&gc_rq->line->ref, pblk_line_put);
-
                list_del(&gc_rq->list);
+               kref_put(&gc_rq->line->ref, pblk_line_put);
                pblk_gc_free_gc_rq(gc_rq);
        }
 
@@ -66,52 +62,41 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc)
  * Responsible for managing all memory related to a gc request. Also in case of
  * failure
  */
-static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line,
-                                  u64 *lba_list, unsigned int nr_secs)
+static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
 {
        struct nvm_tgt_dev *dev = pblk->dev;
        struct nvm_geo *geo = &dev->geo;
        struct pblk_gc *gc = &pblk->gc;
-       struct pblk_gc_rq *gc_rq;
+       struct pblk_line *line = gc_rq->line;
        void *data;
        unsigned int secs_to_gc;
-       int ret = NVM_IO_OK;
+       int ret = 0;
 
-       data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL);
+       data = vmalloc(gc_rq->nr_secs * geo->sec_size);
        if (!data) {
-               ret = NVM_IO_ERR;
-               goto free_lba_list;
+               ret = -ENOMEM;
+               goto out;
        }
 
        /* Read from GC victim block */
-       if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs,
+       if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs,
                                                        &secs_to_gc, line)) {
-               ret = NVM_IO_ERR;
+               ret = -EFAULT;
                goto free_data;
        }
 
        if (!secs_to_gc)
-               goto free_data;
-
-       gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
-       if (!gc_rq) {
-               ret = NVM_IO_ERR;
-               goto free_data;
-       }
+               goto free_rq;
 
-       gc_rq->line = line;
        gc_rq->data = data;
-       gc_rq->lba_list = lba_list;
-       gc_rq->nr_secs = nr_secs;
        gc_rq->secs_to_gc = secs_to_gc;
 
-       kref_get(&line->ref);
-
 retry:
        spin_lock(&gc->w_lock);
-       if (gc->w_entries > 256) {
+       if (gc->w_entries >= PBLK_GC_W_QD) {
                spin_unlock(&gc->w_lock);
-               usleep_range(256, 1024);
+               pblk_gc_writer_kick(&pblk->gc);
+               usleep_range(128, 256);
                goto retry;
        }
        gc->w_entries++;
@@ -120,13 +105,14 @@ retry:
 
        pblk_gc_writer_kick(&pblk->gc);
 
-       return NVM_IO_OK;
+       return 0;
 
+free_rq:
+       kfree(gc_rq);
 free_data:
-       kfree(data);
-free_lba_list:
-       kfree(lba_list);
-
+       vfree(data);
+out:
+       kref_put(&line->ref, pblk_line_put);
        return ret;
 }
 
@@ -150,140 +136,206 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
 
 static void pblk_gc_line_ws(struct work_struct *work)
 {
+       struct pblk_line_ws *line_rq_ws = container_of(work,
+                                               struct pblk_line_ws, ws);
+       struct pblk *pblk = line_rq_ws->pblk;
+       struct pblk_gc *gc = &pblk->gc;
+       struct pblk_line *line = line_rq_ws->line;
+       struct pblk_gc_rq *gc_rq = line_rq_ws->priv;
+
+       up(&gc->gc_sem);
+
+       if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
+               pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
+                                               line->id, *line->vsc,
+                                               gc_rq->nr_secs);
+       }
+
+       mempool_free(line_rq_ws, pblk->line_ws_pool);
+}
+
+static void pblk_gc_line_prepare_ws(struct work_struct *work)
+{
        struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
                                                                        ws);
        struct pblk *pblk = line_ws->pblk;
-       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        struct pblk_line *line = line_ws->line;
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        struct pblk_line_meta *lm = &pblk->lm;
-       __le64 *lba_list = line_ws->priv;
-       u64 *gc_list;
-       int sec_left;
-       int nr_ppas, bit;
-       int put_line = 1;
+       struct pblk_gc *gc = &pblk->gc;
+       struct line_emeta *emeta_buf;
+       struct pblk_line_ws *line_rq_ws;
+       struct pblk_gc_rq *gc_rq;
+       __le64 *lba_list;
+       int sec_left, nr_secs, bit;
+       int ret;
 
-       pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
+       emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
+                                                               GFP_KERNEL);
+       if (!emeta_buf) {
+               pr_err("pblk: cannot use GC emeta\n");
+               return;
+       }
 
-       spin_lock(&line->lock);
-       sec_left = line->vsc;
-       if (!sec_left) {
-               /* Lines are erased before being used (l_mg->data_/log_next) */
-               spin_unlock(&line->lock);
-               goto out;
+       ret = pblk_line_read_emeta(pblk, line, emeta_buf);
+       if (ret) {
+               pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
+               goto fail_free_emeta;
+       }
+
+       /* If this read fails, it means that emeta is corrupted. For now, leave
+        * the line untouched. TODO: Implement a recovery routine that scans and
+        * moves all sectors on the line.
+        */
+       lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
+       if (!lba_list) {
+               pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
+               goto fail_free_emeta;
        }
-       spin_unlock(&line->lock);
 
+       sec_left = pblk_line_vsc(line);
        if (sec_left < 0) {
                pr_err("pblk: corrupted GC line (%d)\n", line->id);
-               put_line = 0;
-               pblk_put_line_back(pblk, line);
-               goto out;
+               goto fail_free_emeta;
        }
 
        bit = -1;
 next_rq:
-       gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL);
-       if (!gc_list) {
-               put_line = 0;
-               pblk_put_line_back(pblk, line);
-               goto out;
-       }
+       gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
+       if (!gc_rq)
+               goto fail_free_emeta;
 
-       nr_ppas = 0;
+       nr_secs = 0;
        do {
                bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
                                                                bit + 1);
                if (bit > line->emeta_ssec)
                        break;
 
-               gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]);
-       } while (nr_ppas < pblk->max_write_pgs);
+               gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
+       } while (nr_secs < pblk->max_write_pgs);
 
-       if (unlikely(!nr_ppas)) {
-               kfree(gc_list);
+       if (unlikely(!nr_secs)) {
+               kfree(gc_rq);
                goto out;
        }
 
-       if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) {
-               pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n",
-                                               line->id, line->vsc,
-                                               nr_ppas, nr_ppas);
-               put_line = 0;
-               pblk_put_line_back(pblk, line);
-               goto out;
-       }
+       gc_rq->nr_secs = nr_secs;
+       gc_rq->line = line;
+
+       line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+       if (!line_rq_ws)
+               goto fail_free_gc_rq;
 
-       sec_left -= nr_ppas;
+       line_rq_ws->pblk = pblk;
+       line_rq_ws->line = line;
+       line_rq_ws->priv = gc_rq;
+
+       down(&gc->gc_sem);
+       kref_get(&line->ref);
+
+       INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws);
+       queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws);
+
+       sec_left -= nr_secs;
        if (sec_left > 0)
                goto next_rq;
 
 out:
-       pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
+       pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
        mempool_free(line_ws, pblk->line_ws_pool);
-       atomic_dec(&pblk->gc.inflight_gc);
-       if (put_line)
-               kref_put(&line->ref, pblk_line_put);
+
+       kref_put(&line->ref, pblk_line_put);
+       atomic_dec(&gc->inflight_gc);
+
+       return;
+
+fail_free_gc_rq:
+       kfree(gc_rq);
+fail_free_emeta:
+       pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
+       pblk_put_line_back(pblk, line);
+       kref_put(&line->ref, pblk_line_put);
+       mempool_free(line_ws, pblk->line_ws_pool);
+       atomic_dec(&gc->inflight_gc);
+
+       pr_err("pblk: Failed to GC line %d\n", line->id);
 }
 
 static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
 {
-       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_gc *gc = &pblk->gc;
        struct pblk_line_ws *line_ws;
-       __le64 *lba_list;
-       int ret;
 
-       line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
-       line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type,
-                                                               GFP_KERNEL);
-       if (!line->emeta) {
-               pr_err("pblk: cannot use GC emeta\n");
-               goto fail_free_ws;
-       }
-
-       ret = pblk_line_read_emeta(pblk, line);
-       if (ret) {
-               pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
-               goto fail_free_emeta;
-       }
+       pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
 
-       /* If this read fails, it means that emeta is corrupted. For now, leave
-        * the line untouched. TODO: Implement a recovery routine that scans and
-        * moves all sectors on the line.
-        */
-       lba_list = pblk_recov_get_lba_list(pblk, line->emeta);
-       if (!lba_list) {
-               pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
-               goto fail_free_emeta;
-       }
+       line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+       if (!line_ws)
+               return -ENOMEM;
 
        line_ws->pblk = pblk;
        line_ws->line = line;
-       line_ws->priv = lba_list;
 
-       INIT_WORK(&line_ws->ws, pblk_gc_line_ws);
-       queue_work(pblk->gc.gc_reader_wq, &line_ws->ws);
+       INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
+       queue_work(gc->gc_reader_wq, &line_ws->ws);
 
        return 0;
+}
 
-fail_free_emeta:
-       pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
-fail_free_ws:
-       mempool_free(line_ws, pblk->line_ws_pool);
-       pblk_put_line_back(pblk, line);
+static int pblk_gc_read(struct pblk *pblk)
+{
+       struct pblk_gc *gc = &pblk->gc;
+       struct pblk_line *line;
+
+       spin_lock(&gc->r_lock);
+       if (list_empty(&gc->r_list)) {
+               spin_unlock(&gc->r_lock);
+               return 1;
+       }
+
+       line = list_first_entry(&gc->r_list, struct pblk_line, list);
+       list_del(&line->list);
+       spin_unlock(&gc->r_lock);
+
+       pblk_gc_kick(pblk);
 
-       return 1;
+       if (pblk_gc_line(pblk, line))
+               pr_err("pblk: failed to GC line %d\n", line->id);
+
+       return 0;
 }
 
-static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list)
+static void pblk_gc_reader_kick(struct pblk_gc *gc)
 {
-       struct pblk_line *line, *tline;
+       wake_up_process(gc->gc_reader_ts);
+}
 
-       list_for_each_entry_safe(line, tline, gc_list, list) {
-               if (pblk_gc_line(pblk, line))
-                       pr_err("pblk: failed to GC line %d\n", line->id);
-               list_del(&line->list);
+static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
+                                                struct list_head *group_list)
+{
+       struct pblk_line *line, *victim;
+       int line_vsc, victim_vsc;
+
+       victim = list_first_entry(group_list, struct pblk_line, list);
+       list_for_each_entry(line, group_list, list) {
+               line_vsc = le32_to_cpu(*line->vsc);
+               victim_vsc = le32_to_cpu(*victim->vsc);
+               if (line_vsc < victim_vsc)
+                       victim = line;
        }
+
+       return victim;
+}
+
+static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
+{
+       unsigned int nr_blocks_free, nr_blocks_need;
+
+       nr_blocks_need = pblk_rl_high_thrs(rl);
+       nr_blocks_free = pblk_rl_nr_free_blks(rl);
+
+       /* This is not critical, no need to take lock here */
+       return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
 }
 
 /*
@@ -296,71 +348,83 @@ static void pblk_gc_run(struct pblk *pblk)
 {
        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        struct pblk_gc *gc = &pblk->gc;
-       struct pblk_line *line, *tline;
-       unsigned int nr_blocks_free, nr_blocks_need;
+       struct pblk_line *line;
        struct list_head *group_list;
-       int run_gc, gc_group = 0;
-       int prev_gc = 0;
-       int inflight_gc = atomic_read(&gc->inflight_gc);
-       LIST_HEAD(gc_list);
+       bool run_gc;
+       int inflight_gc, gc_group = 0, prev_group = 0;
+
+       do {
+               spin_lock(&l_mg->gc_lock);
+               if (list_empty(&l_mg->gc_full_list)) {
+                       spin_unlock(&l_mg->gc_lock);
+                       break;
+               }
+
+               line = list_first_entry(&l_mg->gc_full_list,
+                                                       struct pblk_line, list);
 
-       spin_lock(&l_mg->gc_lock);
-       list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) {
                spin_lock(&line->lock);
                WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
                line->state = PBLK_LINESTATE_GC;
                spin_unlock(&line->lock);
 
                list_del(&line->list);
+               spin_unlock(&l_mg->gc_lock);
+
                kref_put(&line->ref, pblk_line_put);
-       }
-       spin_unlock(&l_mg->gc_lock);
+       } while (1);
 
-       nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl);
-       nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl);
-       run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
+       run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
+       if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
+               return;
 
 next_gc_group:
        group_list = l_mg->gc_lists[gc_group++];
-       spin_lock(&l_mg->gc_lock);
-       while (run_gc && !list_empty(group_list)) {
-               /* No need to queue up more GC lines than we can handle */
-               if (!run_gc || inflight_gc > gc->gc_jobs_active) {
+
+       do {
+               spin_lock(&l_mg->gc_lock);
+               if (list_empty(group_list)) {
                        spin_unlock(&l_mg->gc_lock);
-                       pblk_gc_lines(pblk, &gc_list);
-                       return;
+                       break;
                }
 
-               line = list_first_entry(group_list, struct pblk_line, list);
-               nr_blocks_free += atomic_read(&line->blk_in_line);
+               line = pblk_gc_get_victim_line(pblk, group_list);
 
                spin_lock(&line->lock);
                WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
                line->state = PBLK_LINESTATE_GC;
-               list_move_tail(&line->list, &gc_list);
-               atomic_inc(&gc->inflight_gc);
-               inflight_gc++;
                spin_unlock(&line->lock);
 
-               prev_gc = 1;
-               run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
-       }
-       spin_unlock(&l_mg->gc_lock);
+               list_del(&line->list);
+               spin_unlock(&l_mg->gc_lock);
+
+               spin_lock(&gc->r_lock);
+               list_add_tail(&line->list, &gc->r_list);
+               spin_unlock(&gc->r_lock);
 
-       pblk_gc_lines(pblk, &gc_list);
+               inflight_gc = atomic_inc_return(&gc->inflight_gc);
+               pblk_gc_reader_kick(gc);
 
-       if (!prev_gc && pblk->rl.rb_state > gc_group &&
-                                               gc_group < PBLK_NR_GC_LISTS)
+               prev_group = 1;
+
+               /* No need to queue up more GC lines than we can handle */
+               run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
+               if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
+                       break;
+       } while (1);
+
+       if (!prev_group && pblk->rl.rb_state > gc_group &&
+                                               gc_group < PBLK_GC_NR_LISTS)
                goto next_gc_group;
 }
 
-
-static void pblk_gc_kick(struct pblk *pblk)
+void pblk_gc_kick(struct pblk *pblk)
 {
        struct pblk_gc *gc = &pblk->gc;
 
        wake_up_process(gc->gc_ts);
        pblk_gc_writer_kick(gc);
+       pblk_gc_reader_kick(gc);
        mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
 }
 
@@ -398,42 +462,34 @@ static int pblk_gc_writer_ts(void *data)
        return 0;
 }
 
-static void pblk_gc_start(struct pblk *pblk)
+static int pblk_gc_reader_ts(void *data)
 {
-       pblk->gc.gc_active = 1;
+       struct pblk *pblk = data;
 
-       pr_debug("pblk: gc start\n");
+       while (!kthread_should_stop()) {
+               if (!pblk_gc_read(pblk))
+                       continue;
+               set_current_state(TASK_INTERRUPTIBLE);
+               io_schedule();
+       }
+
+       return 0;
 }
 
-int pblk_gc_status(struct pblk *pblk)
+static void pblk_gc_start(struct pblk *pblk)
 {
-       struct pblk_gc *gc = &pblk->gc;
-       int ret;
-
-       spin_lock(&gc->lock);
-       ret = gc->gc_active;
-       spin_unlock(&gc->lock);
-
-       return ret;
+       pblk->gc.gc_active = 1;
+       pr_debug("pblk: gc start\n");
 }
 
-static void __pblk_gc_should_start(struct pblk *pblk)
+void pblk_gc_should_start(struct pblk *pblk)
 {
        struct pblk_gc *gc = &pblk->gc;
 
-       lockdep_assert_held(&gc->lock);
-
        if (gc->gc_enabled && !gc->gc_active)
                pblk_gc_start(pblk);
-}
 
-void pblk_gc_should_start(struct pblk *pblk)
-{
-       struct pblk_gc *gc = &pblk->gc;
-
-       spin_lock(&gc->lock);
-       __pblk_gc_should_start(pblk);
-       spin_unlock(&gc->lock);
+       pblk_gc_kick(pblk);
 }
 
 /*
@@ -442,10 +498,7 @@ void pblk_gc_should_start(struct pblk *pblk)
  */
 static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
 {
-       spin_lock(&pblk->gc.lock);
        pblk->gc.gc_active = 0;
-       spin_unlock(&pblk->gc.lock);
-
        pr_debug("pblk: gc stop\n");
 }
 
@@ -468,20 +521,25 @@ void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
        spin_unlock(&gc->lock);
 }
 
-void pblk_gc_sysfs_force(struct pblk *pblk, int force)
+int pblk_gc_sysfs_force(struct pblk *pblk, int force)
 {
        struct pblk_gc *gc = &pblk->gc;
-       int rsv = 0;
+
+       if (force < 0 || force > 1)
+               return -EINVAL;
 
        spin_lock(&gc->lock);
-       if (force) {
-               gc->gc_enabled = 1;
-               rsv = 64;
-       }
-       pblk_rl_set_gc_rsc(&pblk->rl, rsv);
        gc->gc_forced = force;
-       __pblk_gc_should_start(pblk);
+
+       if (force)
+               gc->gc_enabled = 1;
+       else
+               gc->gc_enabled = 0;
        spin_unlock(&gc->lock);
+
+       pblk_gc_should_start(pblk);
+
+       return 0;
 }
 
 int pblk_gc_init(struct pblk *pblk)
@@ -503,30 +561,58 @@ int pblk_gc_init(struct pblk *pblk)
                goto fail_free_main_kthread;
        }
 
+       gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
+                                                       "pblk-gc-reader-ts");
+       if (IS_ERR(gc->gc_reader_ts)) {
+               pr_err("pblk: could not allocate GC reader kthread\n");
+               ret = PTR_ERR(gc->gc_reader_ts);
+               goto fail_free_writer_kthread;
+       }
+
        setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
        mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
 
        gc->gc_active = 0;
        gc->gc_forced = 0;
        gc->gc_enabled = 1;
-       gc->gc_jobs_active = 8;
        gc->w_entries = 0;
        atomic_set(&gc->inflight_gc, 0);
 
-       gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq",
-                       WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active);
+       /* Workqueue that reads valid sectors from a line and submit them to the
+        * GC writer to be recycled.
+        */
+       gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
+                       WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
+       if (!gc->gc_line_reader_wq) {
+               pr_err("pblk: could not allocate GC line reader workqueue\n");
+               ret = -ENOMEM;
+               goto fail_free_reader_kthread;
+       }
+
+       /* Workqueue that prepare lines for GC */
+       gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
+                                       WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
        if (!gc->gc_reader_wq) {
                pr_err("pblk: could not allocate GC reader workqueue\n");
                ret = -ENOMEM;
-               goto fail_free_writer_kthread;
+               goto fail_free_reader_line_wq;
        }
 
        spin_lock_init(&gc->lock);
        spin_lock_init(&gc->w_lock);
+       spin_lock_init(&gc->r_lock);
+
+       sema_init(&gc->gc_sem, 128);
+
        INIT_LIST_HEAD(&gc->w_list);
+       INIT_LIST_HEAD(&gc->r_list);
 
        return 0;
 
+fail_free_reader_line_wq:
+       destroy_workqueue(gc->gc_line_reader_wq);
+fail_free_reader_kthread:
+       kthread_stop(gc->gc_reader_ts);
 fail_free_writer_kthread:
        kthread_stop(gc->gc_writer_ts);
 fail_free_main_kthread:
@@ -540,6 +626,7 @@ void pblk_gc_exit(struct pblk *pblk)
        struct pblk_gc *gc = &pblk->gc;
 
        flush_workqueue(gc->gc_reader_wq);
+       flush_workqueue(gc->gc_line_reader_wq);
 
        del_timer(&gc->gc_timer);
        pblk_gc_stop(pblk, 1);
@@ -547,9 +634,15 @@ void pblk_gc_exit(struct pblk *pblk)
        if (gc->gc_ts)
                kthread_stop(gc->gc_ts);
 
-       if (pblk->gc.gc_reader_wq)
-               destroy_workqueue(pblk->gc.gc_reader_wq);
+       if (gc->gc_reader_wq)
+               destroy_workqueue(gc->gc_reader_wq);
+
+       if (gc->gc_line_reader_wq)
+               destroy_workqueue(gc->gc_line_reader_wq);
 
        if (gc->gc_writer_ts)
                kthread_stop(gc->gc_writer_ts);
+
+       if (gc->gc_reader_ts)
+               kthread_stop(gc->gc_reader_ts);
 }
index ae8cd6d..1b0f612 100644 (file)
 
 #include "pblk.h"
 
-static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache,
-                                       *pblk_w_rq_cache, *pblk_line_meta_cache;
+static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
+                               *pblk_w_rq_cache, *pblk_line_meta_cache;
 static DECLARE_RWSEM(pblk_lock);
+struct bio_set *pblk_bio_set;
 
 static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
                          struct bio *bio)
@@ -33,7 +34,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
         * constraint. Writes can be of arbitrary size.
         */
        if (bio_data_dir(bio) == READ) {
-               blk_queue_split(q, &bio, q->bio_split);
+               blk_queue_split(q, &bio);
                ret = pblk_submit_read(pblk, bio);
                if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
                        bio_put(bio);
@@ -46,7 +47,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
         * available for user I/O.
         */
        if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
-               blk_queue_split(q, &bio, q->bio_split);
+               blk_queue_split(q, &bio);
 
        return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
 }
@@ -199,9 +200,9 @@ static int pblk_init_global_caches(struct pblk *pblk)
                return -ENOMEM;
        }
 
-       pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size,
+       pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
                                0, 0, NULL);
-       if (!pblk_r_rq_cache) {
+       if (!pblk_g_rq_cache) {
                kmem_cache_destroy(pblk_blk_ws_cache);
                kmem_cache_destroy(pblk_rec_cache);
                up_write(&pblk_lock);
@@ -213,7 +214,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
        if (!pblk_w_rq_cache) {
                kmem_cache_destroy(pblk_blk_ws_cache);
                kmem_cache_destroy(pblk_rec_cache);
-               kmem_cache_destroy(pblk_r_rq_cache);
+               kmem_cache_destroy(pblk_g_rq_cache);
                up_write(&pblk_lock);
                return -ENOMEM;
        }
@@ -225,7 +226,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
        if (!pblk_line_meta_cache) {
                kmem_cache_destroy(pblk_blk_ws_cache);
                kmem_cache_destroy(pblk_rec_cache);
-               kmem_cache_destroy(pblk_r_rq_cache);
+               kmem_cache_destroy(pblk_g_rq_cache);
                kmem_cache_destroy(pblk_w_rq_cache);
                up_write(&pblk_lock);
                return -ENOMEM;
@@ -239,27 +240,10 @@ static int pblk_core_init(struct pblk *pblk)
 {
        struct nvm_tgt_dev *dev = pblk->dev;
        struct nvm_geo *geo = &dev->geo;
-       int max_write_ppas;
-       int mod;
 
-       pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
-       max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
-       pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
-                               max_write_ppas : nvm_max_phys_sects(dev);
        pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
                                                geo->nr_planes * geo->nr_luns;
 
-       if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
-               pr_err("pblk: cannot support device max_phys_sect\n");
-               return -EINVAL;
-       }
-
-       div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
-       if (mod) {
-               pr_err("pblk: bad configuration of sectors/pages\n");
-               return -EINVAL;
-       }
-
        if (pblk_init_global_caches(pblk))
                return -ENOMEM;
 
@@ -267,7 +251,7 @@ static int pblk_core_init(struct pblk *pblk)
        if (!pblk->page_pool)
                return -ENOMEM;
 
-       pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns,
+       pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE,
                                                        pblk_blk_ws_cache);
        if (!pblk->line_ws_pool)
                goto free_page_pool;
@@ -276,41 +260,51 @@ static int pblk_core_init(struct pblk *pblk)
        if (!pblk->rec_pool)
                goto free_blk_ws_pool;
 
-       pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache);
-       if (!pblk->r_rq_pool)
+       pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE,
+                                                       pblk_g_rq_cache);
+       if (!pblk->g_rq_pool)
                goto free_rec_pool;
 
-       pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache);
+       pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns * 2,
+                                                       pblk_w_rq_cache);
        if (!pblk->w_rq_pool)
-               goto free_r_rq_pool;
+               goto free_g_rq_pool;
 
        pblk->line_meta_pool =
-                       mempool_create_slab_pool(16, pblk_line_meta_cache);
+                       mempool_create_slab_pool(PBLK_META_POOL_SIZE,
+                                                       pblk_line_meta_cache);
        if (!pblk->line_meta_pool)
                goto free_w_rq_pool;
 
-       pblk->kw_wq = alloc_workqueue("pblk-aux-wq",
-                                       WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
-       if (!pblk->kw_wq)
+       pblk->close_wq = alloc_workqueue("pblk-close-wq",
+                       WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS);
+       if (!pblk->close_wq)
                goto free_line_meta_pool;
 
+       pblk->bb_wq = alloc_workqueue("pblk-bb-wq",
+                       WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+       if (!pblk->bb_wq)
+               goto free_close_wq;
+
        if (pblk_set_ppaf(pblk))
-               goto free_kw_wq;
+               goto free_bb_wq;
 
        if (pblk_rwb_init(pblk))
-               goto free_kw_wq;
+               goto free_bb_wq;
 
        INIT_LIST_HEAD(&pblk->compl_list);
        return 0;
 
-free_kw_wq:
-       destroy_workqueue(pblk->kw_wq);
+free_bb_wq:
+       destroy_workqueue(pblk->bb_wq);
+free_close_wq:
+       destroy_workqueue(pblk->close_wq);
 free_line_meta_pool:
        mempool_destroy(pblk->line_meta_pool);
 free_w_rq_pool:
        mempool_destroy(pblk->w_rq_pool);
-free_r_rq_pool:
-       mempool_destroy(pblk->r_rq_pool);
+free_g_rq_pool:
+       mempool_destroy(pblk->g_rq_pool);
 free_rec_pool:
        mempool_destroy(pblk->rec_pool);
 free_blk_ws_pool:
@@ -322,19 +316,22 @@ free_page_pool:
 
 static void pblk_core_free(struct pblk *pblk)
 {
-       if (pblk->kw_wq)
-               destroy_workqueue(pblk->kw_wq);
+       if (pblk->close_wq)
+               destroy_workqueue(pblk->close_wq);
+
+       if (pblk->bb_wq)
+               destroy_workqueue(pblk->bb_wq);
 
        mempool_destroy(pblk->page_pool);
        mempool_destroy(pblk->line_ws_pool);
        mempool_destroy(pblk->rec_pool);
-       mempool_destroy(pblk->r_rq_pool);
+       mempool_destroy(pblk->g_rq_pool);
        mempool_destroy(pblk->w_rq_pool);
        mempool_destroy(pblk->line_meta_pool);
 
        kmem_cache_destroy(pblk_blk_ws_cache);
        kmem_cache_destroy(pblk_rec_cache);
-       kmem_cache_destroy(pblk_r_rq_cache);
+       kmem_cache_destroy(pblk_g_rq_cache);
        kmem_cache_destroy(pblk_w_rq_cache);
        kmem_cache_destroy(pblk_line_meta_cache);
 }
@@ -344,6 +341,12 @@ static void pblk_luns_free(struct pblk *pblk)
        kfree(pblk->luns);
 }
 
+static void pblk_free_line_bitmaps(struct pblk_line *line)
+{
+       kfree(line->blk_bitmap);
+       kfree(line->erase_bitmap);
+}
+
 static void pblk_lines_free(struct pblk *pblk)
 {
        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -355,8 +358,7 @@ static void pblk_lines_free(struct pblk *pblk)
                line = &pblk->lines[i];
 
                pblk_line_free(pblk, line);
-               kfree(line->blk_bitmap);
-               kfree(line->erase_bitmap);
+               pblk_free_line_bitmaps(line);
        }
        spin_unlock(&l_mg->free_lock);
 }
@@ -368,11 +370,15 @@ static void pblk_line_meta_free(struct pblk *pblk)
 
        kfree(l_mg->bb_template);
        kfree(l_mg->bb_aux);
+       kfree(l_mg->vsc_list);
 
+       spin_lock(&l_mg->free_lock);
        for (i = 0; i < PBLK_DATA_LINES; i++) {
-               pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
-               pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
+               kfree(l_mg->sline_meta[i]);
+               pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
+               kfree(l_mg->eline_meta[i]);
        }
+       spin_unlock(&l_mg->free_lock);
 
        kfree(pblk->lines);
 }
@@ -411,13 +417,31 @@ out:
        return ret;
 }
 
-static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
+static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line,
+                       int blk_per_line)
 {
-       struct pblk_line_meta *lm = &pblk->lm;
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
        struct pblk_lun *rlun;
        int bb_cnt = 0;
        int i;
 
+       for (i = 0; i < blk_per_line; i++) {
+               rlun = &pblk->luns[i];
+               if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
+                       continue;
+
+               set_bit(pblk_ppa_to_pos(geo, rlun->bppa), line->blk_bitmap);
+               bb_cnt++;
+       }
+
+       return bb_cnt;
+}
+
+static int pblk_alloc_line_bitmaps(struct pblk *pblk, struct pblk_line *line)
+{
+       struct pblk_line_meta *lm = &pblk->lm;
+
        line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
        if (!line->blk_bitmap)
                return -ENOMEM;
@@ -428,16 +452,7 @@ static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
                return -ENOMEM;
        }
 
-       for (i = 0; i < lm->blk_per_line; i++) {
-               rlun = &pblk->luns[i];
-               if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
-                       continue;
-
-               set_bit(i, line->blk_bitmap);
-               bb_cnt++;
-       }
-
-       return bb_cnt;
+       return 0;
 }
 
 static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
@@ -505,12 +520,32 @@ static int pblk_lines_configure(struct pblk *pblk, int flags)
 }
 
 /* See comment over struct line_emeta definition */
-static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm)
+static unsigned int calc_emeta_len(struct pblk *pblk)
 {
-       return (sizeof(struct line_emeta) +
-                       ((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) +
-                       (pblk->l_mg.nr_lines * sizeof(u32)) +
-                       lm->blk_bitmap_len);
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+
+       /* Round to sector size so that lba_list starts on its own sector */
+       lm->emeta_sec[1] = DIV_ROUND_UP(
+                       sizeof(struct line_emeta) + lm->blk_bitmap_len,
+                       geo->sec_size);
+       lm->emeta_len[1] = lm->emeta_sec[1] * geo->sec_size;
+
+       /* Round to sector size so that vsc_list starts on its own sector */
+       lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
+       lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
+                       geo->sec_size);
+       lm->emeta_len[2] = lm->emeta_sec[2] * geo->sec_size;
+
+       lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
+                       geo->sec_size);
+       lm->emeta_len[3] = lm->emeta_sec[3] * geo->sec_size;
+
+       lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);
+
+       return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
 }
 
 static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
@@ -534,6 +569,78 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
        atomic_set(&pblk->rl.free_blocks, nr_free_blks);
 }
 
+static int pblk_lines_alloc_metadata(struct pblk *pblk)
+{
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct pblk_line_meta *lm = &pblk->lm;
+       int i;
+
+       /* smeta is always small enough to fit on a kmalloc memory allocation,
+        * emeta depends on the number of LUNs allocated to the pblk instance
+        */
+       for (i = 0; i < PBLK_DATA_LINES; i++) {
+               l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL);
+               if (!l_mg->sline_meta[i])
+                       goto fail_free_smeta;
+       }
+
+       /* emeta allocates three different buffers for managing metadata with
+        * in-memory and in-media layouts
+        */
+       for (i = 0; i < PBLK_DATA_LINES; i++) {
+               struct pblk_emeta *emeta;
+
+               emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL);
+               if (!emeta)
+                       goto fail_free_emeta;
+
+               if (lm->emeta_len[0] > KMALLOC_MAX_CACHE_SIZE) {
+                       l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
+
+                       emeta->buf = vmalloc(lm->emeta_len[0]);
+                       if (!emeta->buf) {
+                               kfree(emeta);
+                               goto fail_free_emeta;
+                       }
+
+                       emeta->nr_entries = lm->emeta_sec[0];
+                       l_mg->eline_meta[i] = emeta;
+               } else {
+                       l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
+
+                       emeta->buf = kmalloc(lm->emeta_len[0], GFP_KERNEL);
+                       if (!emeta->buf) {
+                               kfree(emeta);
+                               goto fail_free_emeta;
+                       }
+
+                       emeta->nr_entries = lm->emeta_sec[0];
+                       l_mg->eline_meta[i] = emeta;
+               }
+       }
+
+       l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
+       if (!l_mg->vsc_list)
+               goto fail_free_emeta;
+
+       for (i = 0; i < l_mg->nr_lines; i++)
+               l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);
+
+       return 0;
+
+fail_free_emeta:
+       while (--i >= 0) {
+               vfree(l_mg->eline_meta[i]->buf);
+               kfree(l_mg->eline_meta[i]);
+       }
+
+fail_free_smeta:
+       for (i = 0; i < PBLK_DATA_LINES; i++)
+               kfree(l_mg->sline_meta[i]);
+
+       return -ENOMEM;
+}
+
 static int pblk_lines_init(struct pblk *pblk)
 {
        struct nvm_tgt_dev *dev = pblk->dev;
@@ -542,10 +649,32 @@ static int pblk_lines_init(struct pblk *pblk)
        struct pblk_line_meta *lm = &pblk->lm;
        struct pblk_line *line;
        unsigned int smeta_len, emeta_len;
-       long nr_bad_blks, nr_meta_blks, nr_free_blks;
-       int bb_distance;
-       int i;
-       int ret;
+       long nr_bad_blks, nr_free_blks;
+       int bb_distance, max_write_ppas, mod;
+       int i, ret;
+
+       pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
+       max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
+       pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
+                               max_write_ppas : nvm_max_phys_sects(dev);
+       pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
+
+       if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
+               pr_err("pblk: cannot support device max_phys_sect\n");
+               return -EINVAL;
+       }
+
+       div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
+       if (mod) {
+               pr_err("pblk: bad configuration of sectors/pages\n");
+               return -EINVAL;
+       }
+
+       l_mg->nr_lines = geo->blks_per_lun;
+       l_mg->log_line = l_mg->data_line = NULL;
+       l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
+       l_mg->nr_free_lines = 0;
+       bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
 
        lm->sec_per_line = geo->sec_per_blk * geo->nr_luns;
        lm->blk_per_line = geo->nr_luns;
@@ -554,20 +683,17 @@ static int pblk_lines_init(struct pblk *pblk)
        lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
        lm->high_thrs = lm->sec_per_line / 2;
        lm->mid_thrs = lm->sec_per_line / 4;
+       lm->meta_distance = (geo->nr_luns / 2) * pblk->min_write_pgs;
 
        /* Calculate necessary pages for smeta. See comment over struct
         * line_smeta definition
         */
-       lm->smeta_len = sizeof(struct line_smeta) +
-                               PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
-
        i = 1;
 add_smeta_page:
        lm->smeta_sec = i * geo->sec_per_pl;
        lm->smeta_len = lm->smeta_sec * geo->sec_size;
 
-       smeta_len = sizeof(struct line_smeta) +
-                               PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
+       smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
        if (smeta_len > lm->smeta_len) {
                i++;
                goto add_smeta_page;
@@ -578,66 +704,28 @@ add_smeta_page:
         */
        i = 1;
 add_emeta_page:
-       lm->emeta_sec = i * geo->sec_per_pl;
-       lm->emeta_len = lm->emeta_sec * geo->sec_size;
+       lm->emeta_sec[0] = i * geo->sec_per_pl;
+       lm->emeta_len[0] = lm->emeta_sec[0] * geo->sec_size;
 
-       emeta_len = calc_emeta_len(pblk, lm);
-       if (emeta_len > lm->emeta_len) {
+       emeta_len = calc_emeta_len(pblk);
+       if (emeta_len > lm->emeta_len[0]) {
                i++;
                goto add_emeta_page;
        }
-       lm->emeta_bb = geo->nr_luns - i;
-
-       nr_meta_blks = (lm->smeta_sec + lm->emeta_sec +
-                               (geo->sec_per_blk / 2)) / geo->sec_per_blk;
-       lm->min_blk_line = nr_meta_blks + 1;
-
-       l_mg->nr_lines = geo->blks_per_lun;
-       l_mg->log_line = l_mg->data_line = NULL;
-       l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
-       l_mg->nr_free_lines = 0;
-       bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
 
-       /* smeta is always small enough to fit on a kmalloc memory allocation,
-        * emeta depends on the number of LUNs allocated to the pblk instance
-        */
-       l_mg->smeta_alloc_type = PBLK_KMALLOC_META;
-       for (i = 0; i < PBLK_DATA_LINES; i++) {
-               l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL);
-               if (!l_mg->sline_meta[i].meta)
-                       while (--i >= 0) {
-                               kfree(l_mg->sline_meta[i].meta);
-                               ret = -ENOMEM;
-                               goto fail;
-                       }
+       lm->emeta_bb = geo->nr_luns - i;
+       lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0],
+                                                       geo->sec_per_blk);
+       if (lm->min_blk_line > lm->blk_per_line) {
+               pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
+                                                       lm->blk_per_line);
+               ret = -EINVAL;
+               goto fail;
        }
 
-       if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) {
-               l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
-
-               for (i = 0; i < PBLK_DATA_LINES; i++) {
-                       l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len);
-                       if (!l_mg->eline_meta[i].meta)
-                               while (--i >= 0) {
-                                       vfree(l_mg->eline_meta[i].meta);
-                                       ret = -ENOMEM;
-                                       goto fail;
-                               }
-               }
-       } else {
-               l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
-
-               for (i = 0; i < PBLK_DATA_LINES; i++) {
-                       l_mg->eline_meta[i].meta =
-                                       kmalloc(lm->emeta_len, GFP_KERNEL);
-                       if (!l_mg->eline_meta[i].meta)
-                               while (--i >= 0) {
-                                       kfree(l_mg->eline_meta[i].meta);
-                                       ret = -ENOMEM;
-                                       goto fail;
-                               }
-               }
-       }
+       ret = pblk_lines_alloc_metadata(pblk);
+       if (ret)
+               goto fail;
 
        l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
        if (!l_mg->bb_template) {
@@ -664,11 +752,14 @@ add_emeta_page:
        INIT_LIST_HEAD(&l_mg->gc_low_list);
        INIT_LIST_HEAD(&l_mg->gc_empty_list);
 
+       INIT_LIST_HEAD(&l_mg->emeta_list);
+
        l_mg->gc_lists[0] = &l_mg->gc_high_list;
        l_mg->gc_lists[1] = &l_mg->gc_mid_list;
        l_mg->gc_lists[2] = &l_mg->gc_low_list;
 
        spin_lock_init(&l_mg->free_lock);
+       spin_lock_init(&l_mg->close_lock);
        spin_lock_init(&l_mg->gc_lock);
 
        pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
@@ -689,10 +780,16 @@ add_emeta_page:
                line->type = PBLK_LINETYPE_FREE;
                line->state = PBLK_LINESTATE_FREE;
                line->gc_group = PBLK_LINEGC_NONE;
+               line->vsc = &l_mg->vsc_list[i];
                spin_lock_init(&line->lock);
 
-               nr_bad_blks = pblk_bb_line(pblk, line);
+               ret = pblk_alloc_line_bitmaps(pblk, line);
+               if (ret)
+                       goto fail_free_lines;
+
+               nr_bad_blks = pblk_bb_line(pblk, line, lm->blk_per_line);
                if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) {
+                       pblk_free_line_bitmaps(line);
                        ret = -EINVAL;
                        goto fail_free_lines;
                }
@@ -713,24 +810,20 @@ add_emeta_page:
 
        pblk_set_provision(pblk, nr_free_blks);
 
-       sema_init(&pblk->erase_sem, 1);
-
        /* Cleanup per-LUN bad block lists - managed within lines on run-time */
        for (i = 0; i < geo->nr_luns; i++)
                kfree(pblk->luns[i].bb_list);
 
        return 0;
 fail_free_lines:
-       kfree(pblk->lines);
+       while (--i >= 0)
+               pblk_free_line_bitmaps(&pblk->lines[i]);
 fail_free_bb_aux:
        kfree(l_mg->bb_aux);
 fail_free_bb_template:
        kfree(l_mg->bb_template);
 fail_free_meta:
-       for (i = 0; i < PBLK_DATA_LINES; i++) {
-               pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
-               pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
-       }
+       pblk_line_meta_free(pblk);
 fail:
        for (i = 0; i < geo->nr_luns; i++)
                kfree(pblk->luns[i].bb_list);
@@ -754,6 +847,15 @@ static int pblk_writer_init(struct pblk *pblk)
 
 static void pblk_writer_stop(struct pblk *pblk)
 {
+       /* The pipeline must be stopped and the write buffer emptied before the
+        * write thread is stopped
+        */
+       WARN(pblk_rb_read_count(&pblk->rwb),
+                       "Stopping not fully persisted write buffer\n");
+
+       WARN(pblk_rb_sync_count(&pblk->rwb),
+                       "Stopping not fully synced write buffer\n");
+
        if (pblk->writer_ts)
                kthread_stop(pblk->writer_ts);
        del_timer(&pblk->wtimer);
@@ -772,10 +874,9 @@ static void pblk_free(struct pblk *pblk)
 
 static void pblk_tear_down(struct pblk *pblk)
 {
-       pblk_flush_writer(pblk);
+       pblk_pipeline_stop(pblk);
        pblk_writer_stop(pblk);
        pblk_rb_sync_l2p(&pblk->rwb);
-       pblk_recov_pad(pblk);
        pblk_rwb_free(pblk);
        pblk_rl_free(&pblk->rl);
 
@@ -821,6 +922,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
 
        pblk->dev = dev;
        pblk->disk = tdisk;
+       pblk->state = PBLK_STATE_RUNNING;
 
        spin_lock_init(&pblk->trans_lock);
        spin_lock_init(&pblk->lock);
@@ -836,8 +938,8 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
        atomic_long_set(&pblk->req_writes, 0);
        atomic_long_set(&pblk->sub_writes, 0);
        atomic_long_set(&pblk->sync_writes, 0);
-       atomic_long_set(&pblk->compl_writes, 0);
        atomic_long_set(&pblk->inflight_reads, 0);
+       atomic_long_set(&pblk->cache_reads, 0);
        atomic_long_set(&pblk->sync_reads, 0);
        atomic_long_set(&pblk->recov_writes, 0);
        atomic_long_set(&pblk->recov_writes, 0);
@@ -946,11 +1048,20 @@ static struct nvm_tgt_type tt_pblk = {
 
 static int __init pblk_module_init(void)
 {
-       return nvm_register_tgt_type(&tt_pblk);
+       int ret;
+
+       pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
+       if (!pblk_bio_set)
+               return -ENOMEM;
+       ret = nvm_register_tgt_type(&tt_pblk);
+       if (ret)
+               bioset_free(pblk_bio_set);
+       return ret;
 }
 
 static void pblk_module_exit(void)
 {
+       bioset_free(pblk_bio_set);
        nvm_unregister_tgt_type(&tt_pblk);
 }
 
index 17c1695..fddb924 100644 (file)
@@ -25,9 +25,9 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
                               unsigned int valid_secs)
 {
        struct pblk_line *line = pblk_line_get_data(pblk);
-       struct line_emeta *emeta = line->emeta;
+       struct pblk_emeta *emeta = line->emeta;
        struct pblk_w_ctx *w_ctx;
-       __le64 *lba_list = pblk_line_emeta_to_lbas(emeta);
+       __le64 *lba_list = emeta_to_lbas(pblk, emeta->buf);
        u64 paddr;
        int nr_secs = pblk->min_write_pgs;
        int i;
@@ -51,18 +51,20 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
                        w_ctx->ppa = ppa_list[i];
                        meta_list[i].lba = cpu_to_le64(w_ctx->lba);
                        lba_list[paddr] = cpu_to_le64(w_ctx->lba);
-                       le64_add_cpu(&line->emeta->nr_valid_lbas, 1);
+                       line->nr_valid_lbas++;
                } else {
-                       meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
-                       lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
-                       pblk_map_pad_invalidate(pblk, line, paddr);
+                       __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
+                       lba_list[paddr] = meta_list[i].lba = addr_empty;
+                       __pblk_map_invalidate(pblk, line, paddr);
                }
        }
 
        if (pblk_line_is_full(line)) {
-               line = pblk_line_replace_data(pblk);
-               if (!line)
-                       return;
+               struct pblk_line *prev_line = line;
+
+               pblk_line_replace_data(pblk);
+               pblk_line_close_meta(pblk, prev_line);
        }
 
        pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
@@ -91,8 +93,9 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
 {
        struct nvm_tgt_dev *dev = pblk->dev;
        struct nvm_geo *geo = &dev->geo;
-       struct pblk_line *e_line = pblk_line_get_data_next(pblk);
+       struct pblk_line_meta *lm = &pblk->lm;
        struct pblk_sec_meta *meta_list = rqd->meta_list;
+       struct pblk_line *e_line, *d_line;
        unsigned int map_secs;
        int min = pblk->min_write_pgs;
        int i, erase_lun;
@@ -102,35 +105,63 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
                pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
                                        lun_bitmap, &meta_list[i], map_secs);
 
-               erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls +
-                                                       rqd->ppa_list[i].g.ch;
+               erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]);
 
-               if (!test_bit(erase_lun, e_line->erase_bitmap)) {
-                       if (down_trylock(&pblk->erase_sem))
-                               continue;
+               /* line can change after page map. We might also be writing the
+                * last line.
+                */
+               e_line = pblk_line_get_erase(pblk);
+               if (!e_line)
+                       return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
+                                                       valid_secs, i + min);
 
+               spin_lock(&e_line->lock);
+               if (!test_bit(erase_lun, e_line->erase_bitmap)) {
                        set_bit(erase_lun, e_line->erase_bitmap);
                        atomic_dec(&e_line->left_eblks);
+
                        *erase_ppa = rqd->ppa_list[i];
                        erase_ppa->g.blk = e_line->id;
 
+                       spin_unlock(&e_line->lock);
+
                        /* Avoid evaluating e_line->left_eblks */
                        return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
                                                        valid_secs, i + min);
                }
+               spin_unlock(&e_line->lock);
        }
 
-       /* Erase blocks that are bad in this line but might not be in next */
-       if (unlikely(ppa_empty(*erase_ppa))) {
-               struct pblk_line_meta *lm = &pblk->lm;
+       d_line = pblk_line_get_data(pblk);
+
+       /* line can change after page map. We might also be writing the
+        * last line.
+        */
+       e_line = pblk_line_get_erase(pblk);
+       if (!e_line)
+               return;
 
-               i = find_first_zero_bit(e_line->erase_bitmap, lm->blk_per_line);
-               if (i == lm->blk_per_line)
+       /* Erase blocks that are bad in this line but might not be in next */
+       if (unlikely(ppa_empty(*erase_ppa)) &&
+                       bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
+               int bit = -1;
+
+retry:
+               bit = find_next_bit(d_line->blk_bitmap,
+                                               lm->blk_per_line, bit + 1);
+               if (bit >= lm->blk_per_line)
                        return;
 
-               set_bit(i, e_line->erase_bitmap);
+               spin_lock(&e_line->lock);
+               if (test_bit(bit, e_line->erase_bitmap)) {
+                       spin_unlock(&e_line->lock);
+                       goto retry;
+               }
+               spin_unlock(&e_line->lock);
+
+               set_bit(bit, e_line->erase_bitmap);
                atomic_dec(&e_line->left_eblks);
-               *erase_ppa = pblk->luns[i].bppa; /* set ch and lun */
+               *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
                erase_ppa->g.blk = e_line->id;
        }
 }
index 045384d..5ecc154 100644 (file)
@@ -150,6 +150,7 @@ try:
        /* Release flags on context. Protect from writes and reads */
        smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
        pblk_ppa_set_empty(&w_ctx->ppa);
+       w_ctx->lba = ADDR_EMPTY;
 }
 
 #define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
@@ -180,6 +181,14 @@ unsigned int pblk_rb_read_count(struct pblk_rb *rb)
        return pblk_rb_ring_count(mem, subm, rb->nr_entries);
 }
 
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
+{
+       unsigned int mem = READ_ONCE(rb->mem);
+       unsigned int sync = READ_ONCE(rb->sync);
+
+       return pblk_rb_ring_count(mem, sync, rb->nr_entries);
+}
+
 unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
 {
        unsigned int subm;
@@ -199,12 +208,22 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
        struct pblk_line *line;
        struct pblk_rb_entry *entry;
        struct pblk_w_ctx *w_ctx;
+       unsigned int user_io = 0, gc_io = 0;
        unsigned int i;
+       int flags;
 
        for (i = 0; i < to_update; i++) {
                entry = &rb->entries[*l2p_upd];
                w_ctx = &entry->w_ctx;
 
+               flags = READ_ONCE(entry->w_ctx.flags);
+               if (flags & PBLK_IOTYPE_USER)
+                       user_io++;
+               else if (flags & PBLK_IOTYPE_GC)
+                       gc_io++;
+               else
+                       WARN(1, "pblk: unknown IO type\n");
+
                pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
                                                        entry->cacheline);
 
@@ -214,6 +233,8 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
                *l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1);
        }
 
+       pblk_rl_out(&pblk->rl, user_io, gc_io);
+
        return 0;
 }
 
@@ -357,6 +378,9 @@ static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio,
        /* Protect syncs */
        smp_store_release(&rb->sync_point, sync_point);
 
+       if (!bio)
+               return 0;
+
        spin_lock_irq(&rb->s_lock);
        bio_list_add(&entry->w_ctx.bios, bio);
        spin_unlock_irq(&rb->s_lock);
@@ -395,6 +419,17 @@ static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
        return 1;
 }
 
+void pblk_rb_flush(struct pblk_rb *rb)
+{
+       struct pblk *pblk = container_of(rb, struct pblk, rwb);
+       unsigned int mem = READ_ONCE(rb->mem);
+
+       if (pblk_rb_sync_point_set(rb, NULL, mem))
+               return;
+
+       pblk_write_should_kick(pblk);
+}
+
 static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
                                   unsigned int *pos, struct bio *bio,
                                   int *io_ret)
@@ -431,15 +466,16 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
                           unsigned int nr_entries, unsigned int *pos)
 {
        struct pblk *pblk = container_of(rb, struct pblk, rwb);
-       int flush_done;
+       int io_ret;
 
        spin_lock(&rb->w_lock);
-       if (!pblk_rl_user_may_insert(&pblk->rl, nr_entries)) {
+       io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
+       if (io_ret) {
                spin_unlock(&rb->w_lock);
-               return NVM_IO_REQUEUE;
+               return io_ret;
        }
 
-       if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &flush_done)) {
+       if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
                spin_unlock(&rb->w_lock);
                return NVM_IO_REQUEUE;
        }
@@ -447,7 +483,7 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
        pblk_rl_user_in(&pblk->rl, nr_entries);
        spin_unlock(&rb->w_lock);
 
-       return flush_done;
+       return io_ret;
 }
 
 /*
@@ -521,20 +557,18 @@ out:
  * This function is used by the write thread to form the write bio that will
  * persist data on the write buffer to the media.
  */
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
-                                struct pblk_c_ctx *c_ctx,
-                                unsigned int pos,
-                                unsigned int nr_entries,
-                                unsigned int count)
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
+                                struct bio *bio, unsigned int pos,
+                                unsigned int nr_entries, unsigned int count)
 {
        struct pblk *pblk = container_of(rb, struct pblk, rwb);
+       struct request_queue *q = pblk->dev->q;
+       struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
        struct pblk_rb_entry *entry;
        struct page *page;
-       unsigned int pad = 0, read = 0, to_read = nr_entries;
-       unsigned int user_io = 0, gc_io = 0;
+       unsigned int pad = 0, to_read = nr_entries;
        unsigned int i;
        int flags;
-       int ret;
 
        if (count < nr_entries) {
                pad = nr_entries - count;
@@ -553,15 +587,10 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
                 */
 try:
                flags = READ_ONCE(entry->w_ctx.flags);
-               if (!(flags & PBLK_WRITTEN_DATA))
+               if (!(flags & PBLK_WRITTEN_DATA)) {
+                       io_schedule();
                        goto try;
-
-               if (flags & PBLK_IOTYPE_USER)
-                       user_io++;
-               else if (flags & PBLK_IOTYPE_GC)
-                       gc_io++;
-               else
-                       WARN(1, "pblk: unknown IO type\n");
+               }
 
                page = virt_to_page(entry->data);
                if (!page) {
@@ -570,17 +599,17 @@ try:
                        flags |= PBLK_SUBMITTED_ENTRY;
                        /* Release flags on context. Protect from writes */
                        smp_store_release(&entry->w_ctx.flags, flags);
-                       goto out;
+                       return NVM_IO_ERR;
                }
 
-               ret = bio_add_page(bio, page, rb->seg_size, 0);
-               if (ret != rb->seg_size) {
+               if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
+                                                               rb->seg_size) {
                        pr_err("pblk: could not add page to write bio\n");
                        flags &= ~PBLK_WRITTEN_DATA;
                        flags |= PBLK_SUBMITTED_ENTRY;
                        /* Release flags on context. Protect from writes */
                        smp_store_release(&entry->w_ctx.flags, flags);
-                       goto out;
+                       return NVM_IO_ERR;
                }
 
                if (flags & PBLK_FLUSH_ENTRY) {
@@ -607,14 +636,19 @@ try:
                pos = (pos + 1) & (rb->nr_entries - 1);
        }
 
-       read = to_read;
-       pblk_rl_out(&pblk->rl, user_io, gc_io);
+       if (pad) {
+               if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
+                       pr_err("pblk: could not pad page in write bio\n");
+                       return NVM_IO_ERR;
+               }
+       }
+
 #ifdef CONFIG_NVM_DEBUG
        atomic_long_add(pad, &((struct pblk *)
                        (container_of(rb, struct pblk, rwb)))->padded_writes);
 #endif
-out:
-       return read;
+
+       return NVM_IO_OK;
 }
 
 /*
@@ -623,15 +657,17 @@ out:
  * be directed to disk.
  */
 int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
-                       u64 pos, int bio_iter)
+                       struct ppa_addr ppa, int bio_iter)
 {
+       struct pblk *pblk = container_of(rb, struct pblk, rwb);
        struct pblk_rb_entry *entry;
        struct pblk_w_ctx *w_ctx;
+       struct ppa_addr l2p_ppa;
+       u64 pos = pblk_addr_to_cacheline(ppa);
        void *data;
        int flags;
        int ret = 1;
 
-       spin_lock(&rb->w_lock);
 
 #ifdef CONFIG_NVM_DEBUG
        /* Caller must ensure that the access will not cause an overflow */
@@ -641,8 +677,14 @@ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
        w_ctx = &entry->w_ctx;
        flags = READ_ONCE(w_ctx->flags);
 
+       spin_lock(&rb->w_lock);
+       spin_lock(&pblk->trans_lock);
+       l2p_ppa = pblk_trans_map_get(pblk, lba);
+       spin_unlock(&pblk->trans_lock);
+
        /* Check if the entry has been overwritten or is scheduled to be */
-       if (w_ctx->lba != lba || flags & PBLK_WRITABLE_ENTRY) {
+       if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
+                                               flags & PBLK_WRITABLE_ENTRY) {
                ret = 0;
                goto out;
        }
index 4a12f14..4e5c48f 100644 (file)
@@ -34,8 +34,7 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
        BUG_ON(!pblk_addr_in_cache(ppa));
 #endif
 
-       return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba,
-                                       pblk_addr_to_cacheline(ppa), bio_iter);
+       return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa, bio_iter);
 }
 
 static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -76,6 +75,9 @@ retry:
                        }
                        WARN_ON(test_and_set_bit(i, read_bitmap));
                        advanced_bio = 1;
+#ifdef CONFIG_NVM_DEBUG
+                       atomic_long_inc(&pblk->cache_reads);
+#endif
                } else {
                        /* Read from media non-cached sectors */
                        rqd->ppa_list[j++] = p;
@@ -85,6 +87,11 @@ retry:
                        bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
        }
 
+       if (pblk_io_aligned(pblk, nr_secs))
+               rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+       else
+               rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
 #ifdef CONFIG_NVM_DEBUG
        atomic_long_add(nr_secs, &pblk->inflight_reads);
 #endif
@@ -94,8 +101,6 @@ static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
 {
        int err;
 
-       rqd->flags = pblk_set_read_mode(pblk);
-
        err = pblk_submit_io(pblk, rqd);
        if (err)
                return NVM_IO_ERR;
@@ -107,27 +112,27 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
 {
        struct pblk *pblk = rqd->private;
        struct nvm_tgt_dev *dev = pblk->dev;
-       struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+       struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
        struct bio *bio = rqd->bio;
 
        if (rqd->error)
                pblk_log_read_err(pblk, rqd);
 #ifdef CONFIG_NVM_DEBUG
        else
-               WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n");
+               WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
 #endif
 
-       if (rqd->nr_ppas > 1)
-               nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
+       nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 
        bio_put(bio);
-       if (r_ctx->orig_bio) {
+       if (r_ctx->private) {
+               struct bio *orig_bio = r_ctx->private;
+
 #ifdef CONFIG_NVM_DEBUG
-               WARN_ONCE(r_ctx->orig_bio->bi_error,
-                                               "pblk: corrupted read bio\n");
+               WARN_ONCE(orig_bio->bi_status, "pblk: corrupted read bio\n");
 #endif
-               bio_endio(r_ctx->orig_bio);
-               bio_put(r_ctx->orig_bio);
+               bio_endio(orig_bio);
+               bio_put(orig_bio);
        }
 
 #ifdef CONFIG_NVM_DEBUG
@@ -136,6 +141,7 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
 #endif
 
        pblk_free_rqd(pblk, rqd, READ);
+       atomic_dec(&pblk->inflight_io);
 }
 
 static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
@@ -173,6 +179,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 
        rqd->bio = new_bio;
        rqd->nr_ppas = nr_holes;
+       rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
        rqd->end_io = NULL;
 
        if (unlikely(nr_secs > 1 && nr_holes == 1)) {
@@ -280,9 +287,14 @@ retry:
                        goto retry;
                }
                WARN_ON(test_and_set_bit(0, read_bitmap));
+#ifdef CONFIG_NVM_DEBUG
+                       atomic_long_inc(&pblk->cache_reads);
+#endif
        } else {
                rqd->ppa_addr = ppa;
        }
+
+       rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
 }
 
 int pblk_submit_read(struct pblk *pblk, struct bio *bio)
@@ -316,13 +328,16 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
         */
        bio_init_idx = pblk_get_bi_idx(bio);
 
+       rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+                                                       &rqd->dma_meta_list);
+       if (!rqd->meta_list) {
+               pr_err("pblk: not able to allocate ppa list\n");
+               goto fail_rqd_free;
+       }
+
        if (nr_secs > 1) {
-               rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
-                                               &rqd->dma_ppa_list);
-               if (!rqd->ppa_list) {
-                       pr_err("pblk: not able to allocate ppa list\n");
-                       goto fail_rqd_free;
-               }
+               rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
+               rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
 
                pblk_read_ppalist_rq(pblk, rqd, &read_bitmap);
        } else {
@@ -332,6 +347,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
        bio_get(bio);
        if (bitmap_full(&read_bitmap, nr_secs)) {
                bio_endio(bio);
+               atomic_inc(&pblk->inflight_io);
                pblk_end_io_read(rqd);
                return NVM_IO_OK;
        }
@@ -339,17 +355,17 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
        /* All sectors are to be read from the device */
        if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) {
                struct bio *int_bio = NULL;
-               struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+               struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
 
                /* Clone read bio to deal with read errors internally */
-               int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set);
+               int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set);
                if (!int_bio) {
                        pr_err("pblk: could not clone read bio\n");
                        return NVM_IO_ERR;
                }
 
                rqd->bio = int_bio;
-               r_ctx->orig_bio = bio;
+               r_ctx->private = bio;
 
                ret = pblk_submit_read_io(pblk, rqd);
                if (ret) {
@@ -445,7 +461,6 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 {
        struct nvm_tgt_dev *dev = pblk->dev;
        struct nvm_geo *geo = &dev->geo;
-       struct request_queue *q = dev->q;
        struct bio *bio;
        struct nvm_rq rqd;
        int ret, data_len;
@@ -453,22 +468,19 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 
        memset(&rqd, 0, sizeof(struct nvm_rq));
 
+       rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+                                                       &rqd.dma_meta_list);
+       if (!rqd.meta_list)
+               return NVM_IO_ERR;
+
        if (nr_secs > 1) {
-               rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
-                                                       &rqd.dma_ppa_list);
-               if (!rqd.ppa_list)
-                       return NVM_IO_ERR;
+               rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
+               rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
 
                *secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list,
                                                                nr_secs);
-               if (*secs_to_gc == 1) {
-                       struct ppa_addr ppa;
-
-                       ppa = rqd.ppa_list[0];
-                       nvm_dev_dma_free(dev->parent, rqd.ppa_list,
-                                                       rqd.dma_ppa_list);
-                       rqd.ppa_addr = ppa;
-               }
+               if (*secs_to_gc == 1)
+                       rqd.ppa_addr = rqd.ppa_list[0];
        } else {
                *secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]);
        }
@@ -477,7 +489,8 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
                goto out;
 
        data_len = (*secs_to_gc) * geo->sec_size;
-       bio = bio_map_kern(q, data, data_len, GFP_KERNEL);
+       bio = pblk_bio_map_addr(pblk, data, *secs_to_gc, data_len,
+                                               PBLK_KMALLOC_META, GFP_KERNEL);
        if (IS_ERR(bio)) {
                pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio));
                goto err_free_dma;
@@ -490,6 +503,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
        rqd.end_io = pblk_end_io_sync;
        rqd.private = &wait;
        rqd.nr_ppas = *secs_to_gc;
+       rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
        rqd.bio = bio;
 
        ret = pblk_submit_read_io(pblk, &rqd);
@@ -503,6 +517,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
                                msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
                pr_err("pblk: GC read I/O timed out\n");
        }
+       atomic_dec(&pblk->inflight_io);
 
        if (rqd.error) {
                atomic_long_inc(&pblk->read_failed_gc);
@@ -518,12 +533,10 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
 #endif
 
 out:
-       if (rqd.nr_ppas > 1)
-               nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+       nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
        return NVM_IO_OK;
 
 err_free_dma:
-       if (rqd.nr_ppas > 1)
-               nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+       nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
        return NVM_IO_ERR;
 }
index f8f8508..0e48d3e 100644 (file)
@@ -120,18 +120,18 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
        return 0;
 }
 
-__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta)
+__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta_buf)
 {
        u32 crc;
 
-       crc = pblk_calc_emeta_crc(pblk, emeta);
-       if (le32_to_cpu(emeta->crc) != crc)
+       crc = pblk_calc_emeta_crc(pblk, emeta_buf);
+       if (le32_to_cpu(emeta_buf->crc) != crc)
                return NULL;
 
-       if (le32_to_cpu(emeta->header.identifier) != PBLK_MAGIC)
+       if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
                return NULL;
 
-       return pblk_line_emeta_to_lbas(emeta);
+       return emeta_to_lbas(pblk, emeta_buf);
 }
 
 static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
@@ -139,19 +139,20 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
        struct nvm_tgt_dev *dev = pblk->dev;
        struct nvm_geo *geo = &dev->geo;
        struct pblk_line_meta *lm = &pblk->lm;
-       struct line_emeta *emeta = line->emeta;
+       struct pblk_emeta *emeta = line->emeta;
+       struct line_emeta *emeta_buf = emeta->buf;
        __le64 *lba_list;
        int data_start;
        int nr_data_lbas, nr_valid_lbas, nr_lbas = 0;
        int i;
 
-       lba_list = pblk_recov_get_lba_list(pblk, emeta);
+       lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
        if (!lba_list)
                return 1;
 
        data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
-       nr_data_lbas = lm->sec_per_line - lm->emeta_sec;
-       nr_valid_lbas = le64_to_cpu(emeta->nr_valid_lbas);
+       nr_data_lbas = lm->sec_per_line - lm->emeta_sec[0];
+       nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
 
        for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) {
                struct ppa_addr ppa;
@@ -169,7 +170,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
                        if (test_and_set_bit(i, line->invalid_bitmap))
                                WARN_ONCE(1, "pblk: rec. double invalidate:\n");
                        else
-                               line->vsc--;
+                               le32_add_cpu(line->vsc, -1);
                        spin_unlock(&line->lock);
 
                        continue;
@@ -181,7 +182,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
 
        if (nr_valid_lbas != nr_lbas)
                pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n",
-                               line->id, line->emeta->nr_valid_lbas, nr_lbas);
+                               line->id, emeta_buf->nr_valid_lbas, nr_lbas);
 
        line->left_msecs = 0;
 
@@ -195,7 +196,7 @@ static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
        struct pblk_line_meta *lm = &pblk->lm;
        int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
 
-       return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec -
+       return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
                                nr_bb * geo->sec_per_blk;
 }
 
@@ -240,7 +241,7 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
        r_ptr_int = r_ptr;
 
 next_read_rq:
-       memset(rqd, 0, pblk_r_rq_size);
+       memset(rqd, 0, pblk_g_rq_size);
 
        rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
        if (!rq_ppas)
@@ -256,7 +257,6 @@ next_read_rq:
 
        rqd->bio = bio;
        rqd->opcode = NVM_OP_PREAD;
-       rqd->flags = pblk_set_read_mode(pblk);
        rqd->meta_list = meta_list;
        rqd->nr_ppas = rq_ppas;
        rqd->ppa_list = ppa_list;
@@ -265,6 +265,11 @@ next_read_rq:
        rqd->end_io = pblk_end_io_sync;
        rqd->private = &wait;
 
+       if (pblk_io_aligned(pblk, rq_ppas))
+               rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+       else
+               rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
        for (i = 0; i < rqd->nr_ppas; ) {
                struct ppa_addr ppa;
                int pos;
@@ -295,7 +300,7 @@ next_read_rq:
                pr_err("pblk: L2P recovery read timed out\n");
                return -EINTR;
        }
-
+       atomic_dec(&pblk->inflight_io);
        reinit_completion(&wait);
 
        /* At this point, the read should not fail. If it does, it is a problem
@@ -322,47 +327,94 @@ next_read_rq:
        return 0;
 }
 
+static void pblk_recov_complete(struct kref *ref)
+{
+       struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
+
+       complete(&pad_rq->wait);
+}
+
+static void pblk_end_io_recov(struct nvm_rq *rqd)
+{
+       struct pblk_pad_rq *pad_rq = rqd->private;
+       struct pblk *pblk = pad_rq->pblk;
+       struct nvm_tgt_dev *dev = pblk->dev;
+
+       kref_put(&pad_rq->ref, pblk_recov_complete);
+       nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
+       pblk_free_rqd(pblk, rqd, WRITE);
+}
+
 static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
-                             struct pblk_recov_alloc p, int left_ppas)
+                             int left_ppas)
 {
        struct nvm_tgt_dev *dev = pblk->dev;
        struct nvm_geo *geo = &dev->geo;
        struct ppa_addr *ppa_list;
        struct pblk_sec_meta *meta_list;
+       struct pblk_pad_rq *pad_rq;
        struct nvm_rq *rqd;
        struct bio *bio;
        void *data;
        dma_addr_t dma_ppa_list, dma_meta_list;
-       __le64 *lba_list = pblk_line_emeta_to_lbas(line->emeta);
+       __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
        u64 w_ptr = line->cur_sec;
-       int left_line_ppas = line->left_msecs;
-       int rq_ppas, rq_len;
+       int left_line_ppas, rq_ppas, rq_len;
        int i, j;
        int ret = 0;
-       DECLARE_COMPLETION_ONSTACK(wait);
 
-       ppa_list = p.ppa_list;
-       meta_list = p.meta_list;
-       rqd = p.rqd;
-       data = p.data;
-       dma_ppa_list = p.dma_ppa_list;
-       dma_meta_list = p.dma_meta_list;
+       spin_lock(&line->lock);
+       left_line_ppas = line->left_msecs;
+       spin_unlock(&line->lock);
+
+       pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
+       if (!pad_rq)
+               return -ENOMEM;
+
+       data = vzalloc(pblk->max_write_pgs * geo->sec_size);
+       if (!data) {
+               ret = -ENOMEM;
+               goto free_rq;
+       }
+
+       pad_rq->pblk = pblk;
+       init_completion(&pad_rq->wait);
+       kref_init(&pad_rq->ref);
 
 next_pad_rq:
        rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
-       if (!rq_ppas)
-               rq_ppas = pblk->min_write_pgs;
+       if (rq_ppas < pblk->min_write_pgs) {
+               pr_err("pblk: corrupted pad line %d\n", line->id);
+               goto free_rq;
+       }
+
        rq_len = rq_ppas * geo->sec_size;
 
+       meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
+       if (!meta_list) {
+               ret = -ENOMEM;
+               goto free_data;
+       }
+
+       ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
+       dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
+       rqd = pblk_alloc_rqd(pblk, WRITE);
+       if (IS_ERR(rqd)) {
+               ret = PTR_ERR(rqd);
+               goto fail_free_meta;
+       }
+       memset(rqd, 0, pblk_w_rq_size);
+
        bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
-       if (IS_ERR(bio))
-               return PTR_ERR(bio);
+       if (IS_ERR(bio)) {
+               ret = PTR_ERR(bio);
+               goto fail_free_rqd;
+       }
 
        bio->bi_iter.bi_sector = 0; /* internal bio */
        bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
-       memset(rqd, 0, pblk_r_rq_size);
-
        rqd->bio = bio;
        rqd->opcode = NVM_OP_PWRITE;
        rqd->flags = pblk_set_progr_mode(pblk, WRITE);
@@ -371,8 +423,8 @@ next_pad_rq:
        rqd->ppa_list = ppa_list;
        rqd->dma_ppa_list = dma_ppa_list;
        rqd->dma_meta_list = dma_meta_list;
-       rqd->end_io = pblk_end_io_sync;
-       rqd->private = &wait;
+       rqd->end_io = pblk_end_io_recov;
+       rqd->private = pad_rq;
 
        for (i = 0; i < rqd->nr_ppas; ) {
                struct ppa_addr ppa;
@@ -390,34 +442,51 @@ next_pad_rq:
 
                for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
                        struct ppa_addr dev_ppa;
+                       __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
 
                        dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
 
                        pblk_map_invalidate(pblk, dev_ppa);
-                       meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
-                       lba_list[w_ptr] = cpu_to_le64(ADDR_EMPTY);
+                       lba_list[w_ptr] = meta_list[i].lba = addr_empty;
                        rqd->ppa_list[i] = dev_ppa;
                }
        }
 
+       kref_get(&pad_rq->ref);
+
        ret = pblk_submit_io(pblk, rqd);
        if (ret) {
                pr_err("pblk: I/O submission failed: %d\n", ret);
-               return ret;
+               goto free_data;
        }
 
-       if (!wait_for_completion_io_timeout(&wait,
-                               msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
-               pr_err("pblk: L2P recovery write timed out\n");
-       }
-       reinit_completion(&wait);
+       atomic_dec(&pblk->inflight_io);
 
        left_line_ppas -= rq_ppas;
        left_ppas -= rq_ppas;
-       if (left_ppas > 0 && left_line_ppas)
+       if (left_ppas && left_line_ppas)
                goto next_pad_rq;
 
-       return 0;
+       kref_put(&pad_rq->ref, pblk_recov_complete);
+
+       if (!wait_for_completion_io_timeout(&pad_rq->wait,
+                               msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+               pr_err("pblk: pad write timed out\n");
+               ret = -ETIME;
+       }
+
+free_rq:
+       kfree(pad_rq);
+free_data:
+       vfree(data);
+       return ret;
+
+fail_free_rqd:
+       pblk_free_rqd(pblk, rqd, WRITE);
+fail_free_meta:
+       nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+       kfree(pad_rq);
+       return ret;
 }
 
 /* When this function is called, it means that not all upper pages have been
@@ -456,7 +525,7 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
        rec_round = 0;
 
 next_rq:
-       memset(rqd, 0, pblk_r_rq_size);
+       memset(rqd, 0, pblk_g_rq_size);
 
        rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
        if (!rq_ppas)
@@ -472,7 +541,6 @@ next_rq:
 
        rqd->bio = bio;
        rqd->opcode = NVM_OP_PREAD;
-       rqd->flags = pblk_set_read_mode(pblk);
        rqd->meta_list = meta_list;
        rqd->nr_ppas = rq_ppas;
        rqd->ppa_list = ppa_list;
@@ -481,6 +549,11 @@ next_rq:
        rqd->end_io = pblk_end_io_sync;
        rqd->private = &wait;
 
+       if (pblk_io_aligned(pblk, rq_ppas))
+               rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+       else
+               rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
        for (i = 0; i < rqd->nr_ppas; ) {
                struct ppa_addr ppa;
                int pos;
@@ -510,6 +583,7 @@ next_rq:
                                msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
                pr_err("pblk: L2P recovery read timed out\n");
        }
+       atomic_dec(&pblk->inflight_io);
        reinit_completion(&wait);
 
        /* This should not happen since the read failed during normal recovery,
@@ -544,7 +618,7 @@ next_rq:
                if (pad_secs > line->left_msecs)
                        pad_secs = line->left_msecs;
 
-               ret = pblk_recov_pad_oob(pblk, line, p, pad_secs);
+               ret = pblk_recov_pad_oob(pblk, line, pad_secs);
                if (ret)
                        pr_err("pblk: OOB padding failed (err:%d)\n", ret);
 
@@ -552,7 +626,6 @@ next_rq:
                if (ret)
                        pr_err("pblk: OOB read failed (err:%d)\n", ret);
 
-               line->left_ssecs = line->left_msecs;
                left_ppas = 0;
        }
 
@@ -591,7 +664,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
        *done = 1;
 
 next_rq:
-       memset(rqd, 0, pblk_r_rq_size);
+       memset(rqd, 0, pblk_g_rq_size);
 
        rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
        if (!rq_ppas)
@@ -607,7 +680,6 @@ next_rq:
 
        rqd->bio = bio;
        rqd->opcode = NVM_OP_PREAD;
-       rqd->flags = pblk_set_read_mode(pblk);
        rqd->meta_list = meta_list;
        rqd->nr_ppas = rq_ppas;
        rqd->ppa_list = ppa_list;
@@ -616,6 +688,11 @@ next_rq:
        rqd->end_io = pblk_end_io_sync;
        rqd->private = &wait;
 
+       if (pblk_io_aligned(pblk, rq_ppas))
+               rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+       else
+               rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
        for (i = 0; i < rqd->nr_ppas; ) {
                struct ppa_addr ppa;
                int pos;
@@ -646,6 +723,7 @@ next_rq:
                                msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
                pr_err("pblk: L2P recovery read timed out\n");
        }
+       atomic_dec(&pblk->inflight_io);
        reinit_completion(&wait);
 
        /* Reached the end of the written line */
@@ -658,7 +736,6 @@ next_rq:
                /* Roll back failed sectors */
                line->cur_sec -= nr_error_bits;
                line->left_msecs += nr_error_bits;
-               line->left_ssecs = line->left_msecs;
                bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
 
                left_ppas = 0;
@@ -770,8 +847,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
        struct pblk_line_meta *lm = &pblk->lm;
        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        struct pblk_line *line, *tline, *data_line = NULL;
-       struct line_smeta *smeta;
-       struct line_emeta *emeta;
+       struct pblk_smeta *smeta;
+       struct pblk_emeta *emeta;
+       struct line_smeta *smeta_buf;
        int found_lines = 0, recovered_lines = 0, open_lines = 0;
        int is_next = 0;
        int meta_line;
@@ -784,8 +862,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
        spin_lock(&l_mg->free_lock);
        meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
        set_bit(meta_line, &l_mg->meta_bitmap);
-       smeta = l_mg->sline_meta[meta_line].meta;
-       emeta = l_mg->eline_meta[meta_line].meta;
+       smeta = l_mg->sline_meta[meta_line];
+       emeta = l_mg->eline_meta[meta_line];
+       smeta_buf = (struct line_smeta *)smeta;
        spin_unlock(&l_mg->free_lock);
 
        /* Order data lines using their sequence number */
@@ -796,33 +875,33 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 
                memset(smeta, 0, lm->smeta_len);
                line->smeta = smeta;
-               line->lun_bitmap = ((void *)(smeta)) +
+               line->lun_bitmap = ((void *)(smeta_buf)) +
                                                sizeof(struct line_smeta);
 
                /* Lines that cannot be read are assumed as not written here */
                if (pblk_line_read_smeta(pblk, line))
                        continue;
 
-               crc = pblk_calc_smeta_crc(pblk, smeta);
-               if (le32_to_cpu(smeta->crc) != crc)
+               crc = pblk_calc_smeta_crc(pblk, smeta_buf);
+               if (le32_to_cpu(smeta_buf->crc) != crc)
                        continue;
 
-               if (le32_to_cpu(smeta->header.identifier) != PBLK_MAGIC)
+               if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
                        continue;
 
-               if (le16_to_cpu(smeta->header.version) != 1) {
+               if (le16_to_cpu(smeta_buf->header.version) != 1) {
                        pr_err("pblk: found incompatible line version %u\n",
-                                       smeta->header.version);
+                                       smeta_buf->header.version);
                        return ERR_PTR(-EINVAL);
                }
 
                /* The first valid instance uuid is used for initialization */
                if (!valid_uuid) {
-                       memcpy(pblk->instance_uuid, smeta->header.uuid, 16);
+                       memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
                        valid_uuid = 1;
                }
 
-               if (memcmp(pblk->instance_uuid, smeta->header.uuid, 16)) {
+               if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
                        pr_debug("pblk: ignore line %u due to uuid mismatch\n",
                                        i);
                        continue;
@@ -830,9 +909,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 
                /* Update line metadata */
                spin_lock(&line->lock);
-               line->id = le32_to_cpu(line->smeta->header.id);
-               line->type = le16_to_cpu(line->smeta->header.type);
-               line->seq_nr = le64_to_cpu(line->smeta->seq_nr);
+               line->id = le32_to_cpu(smeta_buf->header.id);
+               line->type = le16_to_cpu(smeta_buf->header.type);
+               line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
                spin_unlock(&line->lock);
 
                /* Update general metadata */
@@ -848,7 +927,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
                pblk_recov_line_add_ordered(&recov_list, line);
                found_lines++;
                pr_debug("pblk: recovering data line %d, seq:%llu\n",
-                                               line->id, smeta->seq_nr);
+                                               line->id, smeta_buf->seq_nr);
        }
 
        if (!found_lines) {
@@ -868,15 +947,15 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 
                recovered_lines++;
                /* Calculate where emeta starts based on the line bb */
-               off = lm->sec_per_line - lm->emeta_sec;
+               off = lm->sec_per_line - lm->emeta_sec[0];
                nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
                off -= nr_bb * geo->sec_per_pl;
 
-               memset(emeta, 0, lm->emeta_len);
-               line->emeta = emeta;
                line->emeta_ssec = off;
+               line->emeta = emeta;
+               memset(line->emeta->buf, 0, lm->emeta_len[0]);
 
-               if (pblk_line_read_emeta(pblk, line)) {
+               if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
                        pblk_recov_l2p_from_oob(pblk, line);
                        goto next;
                }
@@ -941,58 +1020,26 @@ out:
 }
 
 /*
- * Pad until smeta can be read on current data line
+ * Pad current line
  */
-void pblk_recov_pad(struct pblk *pblk)
+int pblk_recov_pad(struct pblk *pblk)
 {
-       struct nvm_tgt_dev *dev = pblk->dev;
-       struct nvm_geo *geo = &dev->geo;
        struct pblk_line *line;
        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-       struct nvm_rq *rqd;
-       struct pblk_recov_alloc p;
-       struct ppa_addr *ppa_list;
-       struct pblk_sec_meta *meta_list;
-       void *data;
-       dma_addr_t dma_ppa_list, dma_meta_list;
+       int left_msecs;
+       int ret = 0;
 
        spin_lock(&l_mg->free_lock);
        line = l_mg->data_line;
+       left_msecs = line->left_msecs;
        spin_unlock(&l_mg->free_lock);
 
-       rqd = pblk_alloc_rqd(pblk, READ);
-       if (IS_ERR(rqd))
-               return;
-
-       meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
-       if (!meta_list)
-               goto free_rqd;
-
-       ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
-       dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
-
-       data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
-       if (!data)
-               goto free_meta_list;
-
-       p.ppa_list = ppa_list;
-       p.meta_list = meta_list;
-       p.rqd = rqd;
-       p.data = data;
-       p.dma_ppa_list = dma_ppa_list;
-       p.dma_meta_list = dma_meta_list;
-
-       if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) {
-               pr_err("pblk: Tear down padding failed\n");
-               goto free_data;
+       ret = pblk_recov_pad_oob(pblk, line, left_msecs);
+       if (ret) {
+               pr_err("pblk: Tear down padding failed (%d)\n", ret);
+               return ret;
        }
 
-       pblk_line_close(pblk, line);
-
-free_data:
-       kfree(data);
-free_meta_list:
-       nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
-free_rqd:
-       pblk_free_rqd(pblk, rqd, READ);
+       pblk_line_close_meta(pblk, line);
+       return ret;
 }
index ab7cbb1..2e6a536 100644 (file)
@@ -23,11 +23,35 @@ static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
        mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
 }
 
+int pblk_rl_is_limit(struct pblk_rl *rl)
+{
+       int rb_space;
+
+       rb_space = atomic_read(&rl->rb_space);
+
+       return (rb_space == 0);
+}
+
 int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
 {
        int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
+       int rb_space = atomic_read(&rl->rb_space);
 
-       return (!(rb_user_cnt + nr_entries > rl->rb_user_max));
+       if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0))
+               return NVM_IO_ERR;
+
+       if (rb_user_cnt >= rl->rb_user_max)
+               return NVM_IO_REQUEUE;
+
+       return NVM_IO_OK;
+}
+
+void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries)
+{
+       int rb_space = atomic_read(&rl->rb_space);
+
+       if (unlikely(rb_space >= 0))
+               atomic_sub(nr_entries, &rl->rb_space);
 }
 
 int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
@@ -37,7 +61,7 @@ int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
 
        /* If there is no user I/O let GC take over space on the write buffer */
        rb_user_active = READ_ONCE(rl->rb_user_active);
-       return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active));
+       return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
 }
 
 void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
@@ -77,33 +101,32 @@ static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max)
        unsigned long free_blocks = pblk_rl_nr_free_blks(rl);
 
        if (free_blocks >= rl->high) {
-               rl->rb_user_max = max - rl->rb_gc_rsv;
-               rl->rb_gc_max = rl->rb_gc_rsv;
+               rl->rb_user_max = max;
+               rl->rb_gc_max = 0;
                rl->rb_state = PBLK_RL_HIGH;
        } else if (free_blocks < rl->high) {
                int shift = rl->high_pw - rl->rb_windows_pw;
                int user_windows = free_blocks >> shift;
                int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW;
-               int gc_max;
 
                rl->rb_user_max = user_max;
-               gc_max = max - rl->rb_user_max;
-               rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv);
-
-               if (free_blocks > rl->low)
-                       rl->rb_state = PBLK_RL_MID;
-               else
-                       rl->rb_state = PBLK_RL_LOW;
+               rl->rb_gc_max = max - user_max;
+
+               if (free_blocks <= rl->rsv_blocks) {
+                       rl->rb_user_max = 0;
+                       rl->rb_gc_max = max;
+               }
+
+               /* In the worst case, we will need to GC lines in the low list
+                * (high valid sector count). If there are lines to GC on high
+                * or mid lists, these will be prioritized
+                */
+               rl->rb_state = PBLK_RL_LOW;
        }
 
        return rl->rb_state;
 }
 
-void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv)
-{
-       rl->rb_gc_rsv = rl->rb_gc_max = rsv;
-}
-
 void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
 {
        struct pblk *pblk = container_of(rl, struct pblk, rl);
@@ -122,11 +145,15 @@ void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
 
 void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
 {
-       struct pblk *pblk = container_of(rl, struct pblk, rl);
        int blk_in_line = atomic_read(&line->blk_in_line);
-       int ret;
 
        atomic_sub(blk_in_line, &rl->free_blocks);
+}
+
+void pblk_gc_should_kick(struct pblk *pblk)
+{
+       struct pblk_rl *rl = &pblk->rl;
+       int ret;
 
        /* Rates will not change that often - no need to lock update */
        ret = pblk_rl_update_rates(rl, rl->rb_budget);
@@ -136,11 +163,16 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
                pblk_gc_should_stop(pblk);
 }
 
-int pblk_rl_gc_thrs(struct pblk_rl *rl)
+int pblk_rl_high_thrs(struct pblk_rl *rl)
 {
        return rl->high;
 }
 
+int pblk_rl_low_thrs(struct pblk_rl *rl)
+{
+       return rl->low;
+}
+
 int pblk_rl_sysfs_rate_show(struct pblk_rl *rl)
 {
        return rl->rb_user_max;
@@ -161,24 +193,36 @@ void pblk_rl_free(struct pblk_rl *rl)
 
 void pblk_rl_init(struct pblk_rl *rl, int budget)
 {
+       struct pblk *pblk = container_of(rl, struct pblk, rl);
+       struct pblk_line_meta *lm = &pblk->lm;
+       int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE;
        unsigned int rb_windows;
 
        rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS;
-       rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
        rl->high_pw = get_count_order(rl->high);
 
+       rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
+       if (rl->low < min_blocks)
+               rl->low = min_blocks;
+
+       rl->rsv_blocks = min_blocks;
+
        /* This will always be a power-of-2 */
        rb_windows = budget / PBLK_MAX_REQ_ADDRS;
-       rl->rb_windows_pw = get_count_order(rb_windows) + 1;
+       rl->rb_windows_pw = get_count_order(rb_windows);
 
        /* To start with, all buffer is available to user I/O writers */
        rl->rb_budget = budget;
        rl->rb_user_max = budget;
-       atomic_set(&rl->rb_user_cnt, 0);
        rl->rb_gc_max = 0;
        rl->rb_state = PBLK_RL_HIGH;
+
+       atomic_set(&rl->rb_user_cnt, 0);
        atomic_set(&rl->rb_gc_cnt, 0);
+       atomic_set(&rl->rb_space, -1);
 
        setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl);
+
        rl->rb_user_active = 0;
+       rl->rb_gc_active = 0;
 }
index f0af1d1..95fb434 100644 (file)
@@ -49,30 +49,26 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
 
 static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
 {
-       struct nvm_tgt_dev *dev = pblk->dev;
-       struct nvm_geo *geo = &dev->geo;
        int free_blocks, total_blocks;
        int rb_user_max, rb_user_cnt;
-       int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state;
+       int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
 
        free_blocks = atomic_read(&pblk->rl.free_blocks);
        rb_user_max = pblk->rl.rb_user_max;
        rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
        rb_gc_max = pblk->rl.rb_gc_max;
-       rb_gc_rsv = pblk->rl.rb_gc_rsv;
        rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
        rb_budget = pblk->rl.rb_budget;
        rb_state = pblk->rl.rb_state;
 
-       total_blocks = geo->blks_per_lun * geo->nr_luns;
+       total_blocks = pblk->rl.total_blocks;
 
        return snprintf(page, PAGE_SIZE,
-               "u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
+               "u:%u/%u,gc:%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
                                rb_user_cnt,
                                rb_user_max,
                                rb_gc_cnt,
                                rb_gc_max,
-                               rb_gc_rsv,
                                rb_state,
                                rb_budget,
                                pblk->rl.low,
@@ -150,11 +146,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
        ssize_t sz = 0;
        int nr_free_lines;
        int cur_data, cur_log;
-       int free_line_cnt = 0, closed_line_cnt = 0;
+       int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
        int d_line_cnt = 0, l_line_cnt = 0;
        int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
-       int free = 0, bad = 0, cor = 0;
-       int msecs = 0, ssecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
+       int bad = 0, cor = 0;
+       int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
        int map_weight = 0, meta_weight = 0;
 
        spin_lock(&l_mg->free_lock);
@@ -166,6 +162,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
                free_line_cnt++;
        spin_unlock(&l_mg->free_lock);
 
+       spin_lock(&l_mg->close_lock);
+       list_for_each_entry(line, &l_mg->emeta_list, list)
+               emeta_line_cnt++;
+       spin_unlock(&l_mg->close_lock);
+
        spin_lock(&l_mg->gc_lock);
        list_for_each_entry(line, &l_mg->gc_full_list, list) {
                if (line->type == PBLK_LINETYPE_DATA)
@@ -212,8 +213,6 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
                gc_empty++;
        }
 
-       list_for_each_entry(line, &l_mg->free_list, list)
-               free++;
        list_for_each_entry(line, &l_mg->bad_list, list)
                bad++;
        list_for_each_entry(line, &l_mg->corrupt_list, list)
@@ -224,8 +223,7 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
        if (l_mg->data_line) {
                cur_sec = l_mg->data_line->cur_sec;
                msecs = l_mg->data_line->left_msecs;
-               ssecs = l_mg->data_line->left_ssecs;
-               vsc = l_mg->data_line->vsc;
+               vsc = le32_to_cpu(*l_mg->data_line->vsc);
                sec_in_line = l_mg->data_line->sec_in_line;
                meta_weight = bitmap_weight(&l_mg->meta_bitmap,
                                                        PBLK_DATA_LINES);
@@ -235,17 +233,20 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
        spin_unlock(&l_mg->free_lock);
 
        if (nr_free_lines != free_line_cnt)
-               pr_err("pblk: corrupted free line list\n");
+               pr_err("pblk: corrupted free line list:%d/%d\n",
+                                               nr_free_lines, free_line_cnt);
 
        sz = snprintf(page, PAGE_SIZE - sz,
                "line: nluns:%d, nblks:%d, nsecs:%d\n",
                geo->nr_luns, lm->blk_per_line, lm->sec_per_line);
 
        sz += snprintf(page + sz, PAGE_SIZE - sz,
-               "lines:d:%d,l:%d-f:%d(%d),b:%d,co:%d,c:%d(d:%d,l:%d)t:%d\n",
+               "lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
                                        cur_data, cur_log,
-                                       free, nr_free_lines, bad, cor,
+                                       nr_free_lines,
+                                       emeta_line_cnt, meta_weight,
                                        closed_line_cnt,
+                                       bad, cor,
                                        d_line_cnt, l_line_cnt,
                                        l_mg->nr_lines);
 
@@ -255,9 +256,10 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
                        atomic_read(&pblk->gc.inflight_gc));
 
        sz += snprintf(page + sz, PAGE_SIZE - sz,
-               "data (%d) cur:%d, left:%d/%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
-                       cur_data, cur_sec, msecs, ssecs, vsc, sec_in_line,
-                       map_weight, lm->sec_per_line, meta_weight);
+               "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
+                       cur_data, cur_sec, msecs, vsc, sec_in_line,
+                       map_weight, lm->sec_per_line,
+                       atomic_read(&pblk->inflight_io));
 
        return sz;
 }
@@ -274,7 +276,7 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
                                        lm->smeta_len, lm->smeta_sec);
        sz += snprintf(page + sz, PAGE_SIZE - sz,
                                "emeta - len:%d, sec:%d, bb_start:%d\n",
-                                       lm->emeta_len, lm->emeta_sec,
+                                       lm->emeta_len[0], lm->emeta_sec[0],
                                        lm->emeta_bb);
        sz += snprintf(page + sz, PAGE_SIZE - sz,
                                "bitmap lengths: sec:%d, blk:%d, lun:%d\n",
@@ -290,6 +292,11 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
        return sz;
 }
 
+static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
+{
+       return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
+}
+
 #ifdef CONFIG_NVM_DEBUG
 static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
 {
@@ -303,52 +310,51 @@ static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
                        atomic_long_read(&pblk->padded_wb),
                        atomic_long_read(&pblk->sub_writes),
                        atomic_long_read(&pblk->sync_writes),
-                       atomic_long_read(&pblk->compl_writes),
                        atomic_long_read(&pblk->recov_writes),
                        atomic_long_read(&pblk->recov_gc_writes),
                        atomic_long_read(&pblk->recov_gc_reads),
+                       atomic_long_read(&pblk->cache_reads),
                        atomic_long_read(&pblk->sync_reads));
 }
 #endif
 
-static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page,
-                                    size_t len)
+static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
+                                  size_t len)
 {
-       struct pblk_gc *gc = &pblk->gc;
        size_t c_len;
-       int value;
+       int force;
 
        c_len = strcspn(page, "\n");
        if (c_len >= len)
                return -EINVAL;
 
-       if (kstrtouint(page, 0, &value))
+       if (kstrtouint(page, 0, &force))
                return -EINVAL;
 
-       spin_lock(&gc->lock);
-       pblk_rl_set_gc_rsc(&pblk->rl, value);
-       spin_unlock(&gc->lock);
+       pblk_gc_sysfs_force(pblk, force);
 
        return len;
 }
 
-static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
-                                  size_t len)
+static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
+                                            const char *page, size_t len)
 {
        size_t c_len;
-       int force;
+       int sec_per_write;
 
        c_len = strcspn(page, "\n");
        if (c_len >= len)
                return -EINVAL;
 
-       if (kstrtouint(page, 0, &force))
+       if (kstrtouint(page, 0, &sec_per_write))
                return -EINVAL;
 
-       if (force < 0 || force > 1)
+       if (sec_per_write < pblk->min_write_pgs
+                               || sec_per_write > pblk->max_write_pgs
+                               || sec_per_write % pblk->min_write_pgs != 0)
                return -EINVAL;
 
-       pblk_gc_sysfs_force(pblk, force);
+       pblk_set_sec_per_write(pblk, sec_per_write);
 
        return len;
 }
@@ -398,9 +404,9 @@ static struct attribute sys_gc_force = {
        .mode = 0200,
 };
 
-static struct attribute sys_gc_rl_max = {
-       .name = "gc_rl_max",
-       .mode = 0200,
+static struct attribute sys_max_sec_per_write = {
+       .name = "max_sec_per_write",
+       .mode = 0644,
 };
 
 #ifdef CONFIG_NVM_DEBUG
@@ -416,7 +422,7 @@ static struct attribute *pblk_attrs[] = {
        &sys_errors_attr,
        &sys_gc_state,
        &sys_gc_force,
-       &sys_gc_rl_max,
+       &sys_max_sec_per_write,
        &sys_rb_attr,
        &sys_stats_ppaf_attr,
        &sys_lines_attr,
@@ -448,6 +454,8 @@ static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
                return pblk_sysfs_lines(pblk, buf);
        else if (strcmp(attr->name, "lines_info") == 0)
                return pblk_sysfs_lines_info(pblk, buf);
+       else if (strcmp(attr->name, "max_sec_per_write") == 0)
+               return pblk_sysfs_get_sec_per_write(pblk, buf);
 #ifdef CONFIG_NVM_DEBUG
        else if (strcmp(attr->name, "stats") == 0)
                return pblk_sysfs_stats_debug(pblk, buf);
@@ -460,10 +468,10 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
 {
        struct pblk *pblk = container_of(kobj, struct pblk, kobj);
 
-       if (strcmp(attr->name, "gc_rl_max") == 0)
-               return pblk_sysfs_rate_store(pblk, buf, len);
-       else if (strcmp(attr->name, "gc_force") == 0)
+       if (strcmp(attr->name, "gc_force") == 0)
                return pblk_sysfs_gc_force(pblk, buf, len);
+       else if (strcmp(attr->name, "max_sec_per_write") == 0)
+               return pblk_sysfs_set_sec_per_write(pblk, buf, len);
 
        return 0;
 }
index aef6fd7..d62a8f4 100644 (file)
 
 #include "pblk.h"
 
-static void pblk_sync_line(struct pblk *pblk, struct pblk_line *line)
-{
-#ifdef CONFIG_NVM_DEBUG
-       atomic_long_inc(&pblk->sync_writes);
-#endif
-
-       /* Counter protected by rb sync lock */
-       line->left_ssecs--;
-       if (!line->left_ssecs)
-               pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
-}
-
 static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
                                    struct pblk_c_ctx *c_ctx)
 {
@@ -39,21 +27,14 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
 
        for (i = 0; i < c_ctx->nr_valid; i++) {
                struct pblk_w_ctx *w_ctx;
-               struct ppa_addr p;
-               struct pblk_line *line;
 
                w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i);
-
-               p = rqd->ppa_list[i];
-               line = &pblk->lines[pblk_dev_ppa_to_line(p)];
-               pblk_sync_line(pblk, line);
-
                while ((original_bio = bio_list_pop(&w_ctx->bios)))
                        bio_endio(original_bio);
        }
 
 #ifdef CONFIG_NVM_DEBUG
-       atomic_long_add(c_ctx->nr_valid, &pblk->compl_writes);
+       atomic_long_add(c_ctx->nr_valid, &pblk->sync_writes);
 #endif
 
        ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
@@ -169,7 +150,7 @@ static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
        }
 
        INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
-       queue_work(pblk->kw_wq, &recovery->ws_rec);
+       queue_work(pblk->close_wq, &recovery->ws_rec);
 
 out:
        pblk_complete_write(pblk, rqd, c_ctx);
@@ -186,14 +167,50 @@ static void pblk_end_io_write(struct nvm_rq *rqd)
        }
 #ifdef CONFIG_NVM_DEBUG
        else
-               WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n");
+               WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
 #endif
 
        pblk_complete_write(pblk, rqd, c_ctx);
+       atomic_dec(&pblk->inflight_io);
+}
+
+static void pblk_end_io_write_meta(struct nvm_rq *rqd)
+{
+       struct pblk *pblk = rqd->private;
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
+       struct pblk_line *line = m_ctx->private;
+       struct pblk_emeta *emeta = line->emeta;
+       int pos = pblk_ppa_to_pos(geo, rqd->ppa_list[0]);
+       struct pblk_lun *rlun = &pblk->luns[pos];
+       int sync;
+
+       up(&rlun->wr_sem);
+
+       if (rqd->error) {
+               pblk_log_write_err(pblk, rqd);
+               pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
+       }
+#ifdef CONFIG_NVM_DEBUG
+       else
+               WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
+#endif
+
+       sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
+       if (sync == emeta->nr_entries)
+               pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws,
+                                                               pblk->close_wq);
+
+       bio_put(rqd->bio);
+       pblk_free_rqd(pblk, rqd, READ);
+
+       atomic_dec(&pblk->inflight_io);
 }
 
 static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
-                          unsigned int nr_secs)
+                          unsigned int nr_secs,
+                          nvm_end_io_fn(*end_io))
 {
        struct nvm_tgt_dev *dev = pblk->dev;
 
@@ -202,7 +219,7 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
        rqd->nr_ppas = nr_secs;
        rqd->flags = pblk_set_progr_mode(pblk, WRITE);
        rqd->private = pblk;
-       rqd->end_io = pblk_end_io_write;
+       rqd->end_io = end_io;
 
        rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
                                                        &rqd->dma_meta_list);
@@ -219,11 +236,10 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
 }
 
 static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
-                          struct pblk_c_ctx *c_ctx)
+                          struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa)
 {
        struct pblk_line_meta *lm = &pblk->lm;
-       struct pblk_line *e_line = pblk_line_get_data_next(pblk);
-       struct ppa_addr erase_ppa;
+       struct pblk_line *e_line = pblk_line_get_erase(pblk);
        unsigned int valid = c_ctx->nr_valid;
        unsigned int padded = c_ctx->nr_padded;
        unsigned int nr_secs = valid + padded;
@@ -231,40 +247,23 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
        int ret = 0;
 
        lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
-       if (!lun_bitmap) {
-               ret = -ENOMEM;
-               goto out;
-       }
+       if (!lun_bitmap)
+               return -ENOMEM;
        c_ctx->lun_bitmap = lun_bitmap;
 
-       ret = pblk_alloc_w_rq(pblk, rqd, nr_secs);
+       ret = pblk_alloc_w_rq(pblk, rqd, nr_secs, pblk_end_io_write);
        if (ret) {
                kfree(lun_bitmap);
-               goto out;
+               return ret;
        }
 
-       ppa_set_empty(&erase_ppa);
        if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
                pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0);
        else
                pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
-                                                       valid, &erase_ppa);
-
-out:
-       if (unlikely(e_line && !ppa_empty(erase_ppa))) {
-               if (pblk_blk_erase_async(pblk, erase_ppa)) {
-                       struct nvm_tgt_dev *dev = pblk->dev;
-                       struct nvm_geo *geo = &dev->geo;
-                       int bit;
-
-                       atomic_inc(&e_line->left_eblks);
-                       bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch;
-                       WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
-                       up(&pblk->erase_sem);
-               }
-       }
+                                                       valid, erase_ppa);
 
-       return ret;
+       return 0;
 }
 
 int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -280,7 +279,7 @@ int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
 
        c_ctx->lun_bitmap = lun_bitmap;
 
-       ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas);
+       ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas, pblk_end_io_write);
        if (ret)
                return ret;
 
@@ -311,16 +310,237 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
        return secs_to_sync;
 }
 
+static inline int pblk_valid_meta_ppa(struct pblk *pblk,
+                                     struct pblk_line *meta_line,
+                                     struct ppa_addr *ppa_list, int nr_ppas)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_line *data_line;
+       struct ppa_addr ppa, ppa_opt;
+       u64 paddr;
+       int i;
+
+       data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])];
+       paddr = pblk_lookup_page(pblk, meta_line);
+       ppa = addr_to_gen_ppa(pblk, paddr, 0);
+
+       if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap))
+               return 1;
+
+       /* Schedule a metadata I/O that is half the distance from the data I/O
+        * with regards to the number of LUNs forming the pblk instance. This
+        * balances LUN conflicts across every I/O.
+        *
+        * When the LUN configuration changes (e.g., due to GC), this distance
+        * can align, which would result on a LUN deadlock. In this case, modify
+        * the distance to not be optimal, but allow metadata I/Os to succeed.
+        */
+       ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
+       if (unlikely(ppa_opt.ppa == ppa.ppa)) {
+               data_line->meta_distance--;
+               return 0;
+       }
+
+       for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
+               if (ppa_list[i].g.ch == ppa_opt.g.ch &&
+                                       ppa_list[i].g.lun == ppa_opt.g.lun)
+                       return 1;
+
+       if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) {
+               for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
+                       if (ppa_list[i].g.ch == ppa.g.ch &&
+                                               ppa_list[i].g.lun == ppa.g.lun)
+                               return 0;
+
+               return 1;
+       }
+
+       return 0;
+}
+
+int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_emeta *emeta = meta_line->emeta;
+       struct pblk_g_ctx *m_ctx;
+       struct pblk_lun *rlun;
+       struct bio *bio;
+       struct nvm_rq *rqd;
+       void *data;
+       u64 paddr;
+       int rq_ppas = pblk->min_write_pgs;
+       int id = meta_line->id;
+       int rq_len;
+       int i, j;
+       int ret;
+
+       rqd = pblk_alloc_rqd(pblk, READ);
+       if (IS_ERR(rqd)) {
+               pr_err("pblk: cannot allocate write req.\n");
+               return PTR_ERR(rqd);
+       }
+       m_ctx = nvm_rq_to_pdu(rqd);
+       m_ctx->private = meta_line;
+
+       rq_len = rq_ppas * geo->sec_size;
+       data = ((void *)emeta->buf) + emeta->mem;
+
+       bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
+                                       l_mg->emeta_alloc_type, GFP_KERNEL);
+       if (IS_ERR(bio)) {
+               ret = PTR_ERR(bio);
+               goto fail_free_rqd;
+       }
+       bio->bi_iter.bi_sector = 0; /* internal bio */
+       bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+       rqd->bio = bio;
+
+       ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta);
+       if (ret)
+               goto fail_free_bio;
+
+       for (i = 0; i < rqd->nr_ppas; ) {
+               spin_lock(&meta_line->lock);
+               paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas);
+               spin_unlock(&meta_line->lock);
+               for (j = 0; j < rq_ppas; j++, i++, paddr++)
+                       rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
+       }
+
+       rlun = &pblk->luns[pblk_ppa_to_pos(geo, rqd->ppa_list[0])];
+       ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
+       if (ret) {
+               pr_err("pblk: lun semaphore timed out (%d)\n", ret);
+               goto fail_free_bio;
+       }
+
+       emeta->mem += rq_len;
+       if (emeta->mem >= lm->emeta_len[0]) {
+               spin_lock(&l_mg->close_lock);
+               list_del(&meta_line->list);
+               WARN(!bitmap_full(meta_line->map_bitmap, lm->sec_per_line),
+                               "pblk: corrupt meta line %d\n", meta_line->id);
+               spin_unlock(&l_mg->close_lock);
+       }
+
+       ret = pblk_submit_io(pblk, rqd);
+       if (ret) {
+               pr_err("pblk: emeta I/O submission failed: %d\n", ret);
+               goto fail_rollback;
+       }
+
+       return NVM_IO_OK;
+
+fail_rollback:
+       spin_lock(&l_mg->close_lock);
+       pblk_dealloc_page(pblk, meta_line, rq_ppas);
+       list_add(&meta_line->list, &meta_line->list);
+       spin_unlock(&l_mg->close_lock);
+fail_free_bio:
+       if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META))
+               bio_put(bio);
+fail_free_rqd:
+       pblk_free_rqd(pblk, rqd, READ);
+       return ret;
+}
+
+static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
+                              int prev_n)
+{
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+       struct pblk_line *meta_line;
+
+       spin_lock(&l_mg->close_lock);
+retry:
+       if (list_empty(&l_mg->emeta_list)) {
+               spin_unlock(&l_mg->close_lock);
+               return 0;
+       }
+       meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
+       if (bitmap_full(meta_line->map_bitmap, lm->sec_per_line))
+               goto retry;
+       spin_unlock(&l_mg->close_lock);
+
+       if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n))
+               return 0;
+
+       return pblk_submit_meta_io(pblk, meta_line);
+}
+
+static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
+{
+       struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+       struct ppa_addr erase_ppa;
+       int err;
+
+       ppa_set_empty(&erase_ppa);
+
+       /* Assign lbas to ppas and populate request structure */
+       err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa);
+       if (err) {
+               pr_err("pblk: could not setup write request: %d\n", err);
+               return NVM_IO_ERR;
+       }
+
+       if (likely(ppa_empty(erase_ppa))) {
+               /* Submit metadata write for previous data line */
+               err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas);
+               if (err) {
+                       pr_err("pblk: metadata I/O submission failed: %d", err);
+                       return NVM_IO_ERR;
+               }
+
+               /* Submit data write for current data line */
+               err = pblk_submit_io(pblk, rqd);
+               if (err) {
+                       pr_err("pblk: data I/O submission failed: %d\n", err);
+                       return NVM_IO_ERR;
+               }
+       } else {
+               /* Submit data write for current data line */
+               err = pblk_submit_io(pblk, rqd);
+               if (err) {
+                       pr_err("pblk: data I/O submission failed: %d\n", err);
+                       return NVM_IO_ERR;
+               }
+
+               /* Submit available erase for next data line */
+               if (pblk_blk_erase_async(pblk, erase_ppa)) {
+                       struct pblk_line *e_line = pblk_line_get_erase(pblk);
+                       struct nvm_tgt_dev *dev = pblk->dev;
+                       struct nvm_geo *geo = &dev->geo;
+                       int bit;
+
+                       atomic_inc(&e_line->left_eblks);
+                       bit = pblk_ppa_to_pos(geo, erase_ppa);
+                       WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
+               }
+       }
+
+       return NVM_IO_OK;
+}
+
+static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
+{
+       struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+       struct bio *bio = rqd->bio;
+
+       if (c_ctx->nr_padded)
+               pblk_bio_free_pages(pblk, bio, rqd->nr_ppas, c_ctx->nr_padded);
+}
+
 static int pblk_submit_write(struct pblk *pblk)
 {
        struct bio *bio;
        struct nvm_rq *rqd;
-       struct pblk_c_ctx *c_ctx;
-       unsigned int pgs_read;
        unsigned int secs_avail, secs_to_sync, secs_to_com;
        unsigned int secs_to_flush;
        unsigned long pos;
-       int err;
 
        /* If there are no sectors in the cache, flushes (bios without data)
         * will be cleared on the cache threads
@@ -338,7 +558,6 @@ static int pblk_submit_write(struct pblk *pblk)
                pr_err("pblk: cannot allocate write req.\n");
                return 1;
        }
-       c_ctx = nvm_rq_to_pdu(rqd);
 
        bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs);
        if (!bio) {
@@ -358,29 +577,14 @@ static int pblk_submit_write(struct pblk *pblk)
        secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
        pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
 
-       pgs_read = pblk_rb_read_to_bio(&pblk->rwb, bio, c_ctx, pos,
-                                               secs_to_sync, secs_avail);
-       if (!pgs_read) {
+       if (pblk_rb_read_to_bio(&pblk->rwb, rqd, bio, pos, secs_to_sync,
+                                                               secs_avail)) {
                pr_err("pblk: corrupted write bio\n");
                goto fail_put_bio;
        }
 
-       if (c_ctx->nr_padded)
-               if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, c_ctx->nr_padded))
-                       goto fail_put_bio;
-
-       /* Assign lbas to ppas and populate request structure */
-       err = pblk_setup_w_rq(pblk, rqd, c_ctx);
-       if (err) {
-               pr_err("pblk: could not setup write request\n");
-               goto fail_free_bio;
-       }
-
-       err = pblk_submit_io(pblk, rqd);
-       if (err) {
-               pr_err("pblk: I/O submission failed: %d\n", err);
+       if (pblk_submit_io_set(pblk, rqd))
                goto fail_free_bio;
-       }
 
 #ifdef CONFIG_NVM_DEBUG
        atomic_long_add(secs_to_sync, &pblk->sub_writes);
@@ -389,8 +593,7 @@ static int pblk_submit_write(struct pblk *pblk)
        return 0;
 
 fail_free_bio:
-       if (c_ctx->nr_padded)
-               pblk_bio_free_pages(pblk, bio, secs_to_sync, c_ctx->nr_padded);
+       pblk_free_write_rqd(pblk, rqd);
 fail_put_bio:
        bio_put(bio);
 fail_free_rqd:
index 99f3186..1593138 100644 (file)
 #define PBLK_MAX_REQ_ADDRS (64)
 #define PBLK_MAX_REQ_ADDRS_PW (6)
 
+#define PBLK_WS_POOL_SIZE (128)
+#define PBLK_META_POOL_SIZE (128)
+#define PBLK_READ_REQ_POOL_SIZE (1024)
+
+#define PBLK_NR_CLOSE_JOBS (4)
+
 #define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
 
 #define PBLK_COMMAND_TIMEOUT_MS 30000
@@ -72,11 +78,15 @@ enum {
        PBLK_BLK_ST_CLOSED =    0x2,
 };
 
+struct pblk_sec_meta {
+       u64 reserved;
+       __le64 lba;
+};
+
 /* The number of GC lists and the rate-limiter states go together. This way the
  * rate-limiter can dictate how much GC is needed based on resource utilization.
  */
-#define PBLK_NR_GC_LISTS 3
-#define PBLK_MAX_GC_JOBS 32
+#define PBLK_GC_NR_LISTS 3
 
 enum {
        PBLK_RL_HIGH = 1,
@@ -84,14 +94,9 @@ enum {
        PBLK_RL_LOW = 3,
 };
 
-struct pblk_sec_meta {
-       u64 reserved;
-       __le64 lba;
-};
-
 #define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
 
-/* write completion context */
+/* write buffer completion context */
 struct pblk_c_ctx {
        struct list_head list;          /* Head for out-of-order completion */
 
@@ -101,9 +106,16 @@ struct pblk_c_ctx {
        unsigned int nr_padded;
 };
 
-/* Read context */
-struct pblk_r_ctx {
-       struct bio *orig_bio;
+/* generic context */
+struct pblk_g_ctx {
+       void *private;
+};
+
+/* Pad context */
+struct pblk_pad_rq {
+       struct pblk *pblk;
+       struct completion wait;
+       struct kref ref;
 };
 
 /* Recovery context */
@@ -195,29 +207,39 @@ struct pblk_lun {
 struct pblk_gc_rq {
        struct pblk_line *line;
        void *data;
-       u64 *lba_list;
+       u64 lba_list[PBLK_MAX_REQ_ADDRS];
        int nr_secs;
        int secs_to_gc;
        struct list_head list;
 };
 
 struct pblk_gc {
+       /* These states are not protected by a lock since (i) they are in the
+        * fast path, and (ii) they are not critical.
+        */
        int gc_active;
        int gc_enabled;
        int gc_forced;
-       int gc_jobs_active;
-       atomic_t inflight_gc;
 
        struct task_struct *gc_ts;
        struct task_struct *gc_writer_ts;
+       struct task_struct *gc_reader_ts;
+
+       struct workqueue_struct *gc_line_reader_wq;
        struct workqueue_struct *gc_reader_wq;
+
        struct timer_list gc_timer;
 
+       struct semaphore gc_sem;
+       atomic_t inflight_gc;
        int w_entries;
+
        struct list_head w_list;
+       struct list_head r_list;
 
        spinlock_t lock;
        spinlock_t w_lock;
+       spinlock_t r_lock;
 };
 
 struct pblk_rl {
@@ -229,10 +251,8 @@ struct pblk_rl {
                                 */
        unsigned int high_pw;   /* High rounded up as a power of 2 */
 
-#define PBLK_USER_HIGH_THRS 2  /* Begin write limit at 50 percent
-                                * available blks
-                                */
-#define PBLK_USER_LOW_THRS 20  /* Aggressive GC at 5% available blocks */
+#define PBLK_USER_HIGH_THRS 8  /* Begin write limit at 12% available blks */
+#define PBLK_USER_LOW_THRS 10  /* Aggressive GC at 10% available blocks */
 
        int rb_windows_pw;      /* Number of rate windows in the write buffer
                                 * given as a power-of-2. This guarantees that
@@ -244,13 +264,19 @@ struct pblk_rl {
                                 */
        int rb_budget;          /* Total number of entries available for I/O */
        int rb_user_max;        /* Max buffer entries available for user I/O */
-       atomic_t rb_user_cnt;   /* User I/O buffer counter */
        int rb_gc_max;          /* Max buffer entries available for GC I/O */
        int rb_gc_rsv;          /* Reserved buffer entries for GC I/O */
        int rb_state;           /* Rate-limiter current state */
+
+       atomic_t rb_user_cnt;   /* User I/O buffer counter */
        atomic_t rb_gc_cnt;     /* GC I/O buffer counter */
+       atomic_t rb_space;      /* Space limit in case of reaching capacity */
+
+       int rsv_blocks;         /* Reserved blocks for GC */
 
        int rb_user_active;
+       int rb_gc_active;
+
        struct timer_list u_timer;
 
        unsigned long long nr_secs;
@@ -258,8 +284,6 @@ struct pblk_rl {
        atomic_t free_blocks;
 };
 
-#define PBLK_LINE_NR_LUN_BITMAP 2
-#define PBLK_LINE_NR_SEC_BITMAP 2
 #define PBLK_LINE_EMPTY (~0U)
 
 enum {
@@ -310,16 +334,19 @@ struct line_smeta {
        __le32 window_wr_lun;   /* Number of parallel LUNs to write */
 
        __le32 rsvd[2];
+
+       __le64 lun_bitmap[];
 };
 
 /*
- * Metadata Layout:
- *     1. struct pblk_emeta
- *     2. nr_lbas u64 forming lba list
- *     3. nr_lines (all) u32 valid sector count (vsc) (~0U: non-alloc line)
- *     4. nr_luns bits (u64 format) forming line bad block bitmap
- *
- *     3. and 4. will be part of FTL log
+ * Metadata layout in media:
+ *     First sector:
+ *             1. struct line_emeta
+ *             2. bad block bitmap (u64 * window_wr_lun)
+ *     Mid sectors (start at lbas_sector):
+ *             3. nr_lbas (u64) forming lba list
+ *     Last sectors (start at vsc_sector):
+ *             4. u32 valid sector count (vsc) for all lines (~0U: free line)
  */
 struct line_emeta {
        struct line_header header;
@@ -339,6 +366,23 @@ struct line_emeta {
        __le32 next_id;         /* Line id for next line */
        __le64 nr_lbas;         /* Number of lbas mapped in line */
        __le64 nr_valid_lbas;   /* Number of valid lbas mapped in line */
+       __le64 bb_bitmap[];     /* Updated bad block bitmap for line */
+};
+
+struct pblk_emeta {
+       struct line_emeta *buf;         /* emeta buffer in media format */
+       int mem;                        /* Write offset - points to next
+                                        * writable entry in memory
+                                        */
+       atomic_t sync;                  /* Synced - backpointer that signals the
+                                        * last entry that has been successfully
+                                        * persisted to media
+                                        */
+       unsigned int nr_entries;        /* Number of emeta entries */
+};
+
+struct pblk_smeta {
+       struct line_smeta *buf;         /* smeta buffer in persistent format */
 };
 
 struct pblk_line {
@@ -355,9 +399,12 @@ struct pblk_line {
 
        unsigned long *lun_bitmap;      /* Bitmap for LUNs mapped in line */
 
-       struct line_smeta *smeta;       /* Start metadata */
-       struct line_emeta *emeta;       /* End metadata */
+       struct pblk_smeta *smeta;       /* Start metadata */
+       struct pblk_emeta *emeta;       /* End medatada */
+
        int meta_line;                  /* Metadata line id */
+       int meta_distance;              /* Distance between data and metadata */
+
        u64 smeta_ssec;                 /* Sector where smeta starts */
        u64 emeta_ssec;                 /* Sector where emeta starts */
 
@@ -374,9 +421,10 @@ struct pblk_line {
        atomic_t left_seblks;           /* Blocks left for sync erasing */
 
        int left_msecs;                 /* Sectors left for mapping */
-       int left_ssecs;                 /* Sectors left to sync */
        unsigned int cur_sec;           /* Sector map pointer */
-       unsigned int vsc;               /* Valid sector count in line */
+       unsigned int nr_valid_lbas;     /* Number of valid lbas in line */
+
+       __le32 *vsc;                    /* Valid sector count in line */
 
        struct kref ref;                /* Write buffer L2P references */
 
@@ -385,13 +433,15 @@ struct pblk_line {
 
 #define PBLK_DATA_LINES 4
 
-enum{
+enum {
        PBLK_KMALLOC_META = 1,
        PBLK_VMALLOC_META = 2,
 };
 
-struct pblk_line_metadata {
-       void *meta;
+enum {
+       PBLK_EMETA_TYPE_HEADER = 1,     /* struct line_emeta first sector */
+       PBLK_EMETA_TYPE_LLBA = 2,       /* lba list - type: __le64 */
+       PBLK_EMETA_TYPE_VSC = 3,        /* vsc list - type: __le32 */
 };
 
 struct pblk_line_mgmt {
@@ -404,7 +454,7 @@ struct pblk_line_mgmt {
        struct list_head bad_list;      /* Full lines bad */
 
        /* GC lists - use gc_lock */
-       struct list_head *gc_lists[PBLK_NR_GC_LISTS];
+       struct list_head *gc_lists[PBLK_GC_NR_LISTS];
        struct list_head gc_high_list;  /* Full lines ready to GC, high isc */
        struct list_head gc_mid_list;   /* Full lines ready to GC, mid isc */
        struct list_head gc_low_list;   /* Full lines ready to GC, low isc */
@@ -417,13 +467,16 @@ struct pblk_line_mgmt {
        struct pblk_line *log_next;     /* Next FTL log line */
        struct pblk_line *data_next;    /* Next data line */
 
+       struct list_head emeta_list;    /* Lines queued to schedule emeta */
+
+       __le32 *vsc_list;               /* Valid sector counts for all lines */
+
        /* Metadata allocation type: VMALLOC | KMALLOC */
-       int smeta_alloc_type;
        int emeta_alloc_type;
 
        /* Pre-allocated metadata for data lines */
-       struct pblk_line_metadata sline_meta[PBLK_DATA_LINES];
-       struct pblk_line_metadata eline_meta[PBLK_DATA_LINES];
+       struct pblk_smeta *sline_meta[PBLK_DATA_LINES];
+       struct pblk_emeta *eline_meta[PBLK_DATA_LINES];
        unsigned long meta_bitmap;
 
        /* Helpers for fast bitmap calculations */
@@ -434,25 +487,40 @@ struct pblk_line_mgmt {
        unsigned long l_seq_nr;         /* Log line unique sequence number */
 
        spinlock_t free_lock;
+       spinlock_t close_lock;
        spinlock_t gc_lock;
 };
 
 struct pblk_line_meta {
        unsigned int smeta_len;         /* Total length for smeta */
-       unsigned int smeta_sec;         /* Sectors needed for smeta*/
-       unsigned int emeta_len;         /* Total length for emeta */
-       unsigned int emeta_sec;         /* Sectors needed for emeta*/
+       unsigned int smeta_sec;         /* Sectors needed for smeta */
+
+       unsigned int emeta_len[4];      /* Lengths for emeta:
+                                        *  [0]: Total length
+                                        *  [1]: struct line_emeta length
+                                        *  [2]: L2P portion length
+                                        *  [3]: vsc list length
+                                        */
+       unsigned int emeta_sec[4];      /* Sectors needed for emeta. Same layout
+                                        * as emeta_len
+                                        */
+
        unsigned int emeta_bb;          /* Boundary for bb that affects emeta */
+
+       unsigned int vsc_list_len;      /* Length for vsc list */
        unsigned int sec_bitmap_len;    /* Length for sector bitmap in line */
        unsigned int blk_bitmap_len;    /* Length for block bitmap in line */
        unsigned int lun_bitmap_len;    /* Length for lun bitmap in line */
 
        unsigned int blk_per_line;      /* Number of blocks in a full line */
        unsigned int sec_per_line;      /* Number of sectors in a line */
+       unsigned int dsec_per_line;     /* Number of data sectors in a line */
        unsigned int min_blk_line;      /* Min. number of good blocks in line */
 
        unsigned int mid_thrs;          /* Threshold for GC mid list */
        unsigned int high_thrs;         /* Threshold for GC high list */
+
+       unsigned int meta_distance;     /* Distance between data and metadata */
 };
 
 struct pblk_addr_format {
@@ -470,6 +538,13 @@ struct pblk_addr_format {
        u8      sec_offset;
 };
 
+enum {
+       PBLK_STATE_RUNNING = 0,
+       PBLK_STATE_STOPPING = 1,
+       PBLK_STATE_RECOVERING = 2,
+       PBLK_STATE_STOPPED = 3,
+};
+
 struct pblk {
        struct nvm_tgt_dev *dev;
        struct gendisk *disk;
@@ -487,6 +562,8 @@ struct pblk {
 
        struct pblk_rb rwb;
 
+       int state;                      /* pblk line state */
+
        int min_write_pgs; /* Minimum amount of pages required by controller */
        int max_write_pgs; /* Maximum amount of pages supported by controller */
        int pgs_in_buffer; /* Number of pages that need to be held in buffer to
@@ -499,7 +576,7 @@ struct pblk {
        /* pblk provisioning values. Used by rate limiter */
        struct pblk_rl rl;
 
-       struct semaphore erase_sem;
+       int sec_per_write;
 
        unsigned char instance_uuid[16];
 #ifdef CONFIG_NVM_DEBUG
@@ -511,8 +588,8 @@ struct pblk {
        atomic_long_t req_writes;       /* Sectors stored on write buffer */
        atomic_long_t sub_writes;       /* Sectors submitted from buffer */
        atomic_long_t sync_writes;      /* Sectors synced to media */
-       atomic_long_t compl_writes;     /* Sectors completed in write bio */
        atomic_long_t inflight_reads;   /* Inflight sector read requests */
+       atomic_long_t cache_reads;      /* Read requests that hit the cache */
        atomic_long_t sync_reads;       /* Completed sector read requests */
        atomic_long_t recov_writes;     /* Sectors submitted from recovery */
        atomic_long_t recov_gc_writes;  /* Sectors submitted from write GC */
@@ -528,6 +605,8 @@ struct pblk {
        atomic_long_t write_failed;
        atomic_long_t erase_failed;
 
+       atomic_t inflight_io;           /* General inflight I/O counter */
+
        struct task_struct *writer_ts;
 
        /* Simple translation map of logical addresses to physical addresses.
@@ -542,11 +621,13 @@ struct pblk {
        mempool_t *page_pool;
        mempool_t *line_ws_pool;
        mempool_t *rec_pool;
-       mempool_t *r_rq_pool;
+       mempool_t *g_rq_pool;
        mempool_t *w_rq_pool;
        mempool_t *line_meta_pool;
 
-       struct workqueue_struct *kw_wq;
+       struct workqueue_struct *close_wq;
+       struct workqueue_struct *bb_wq;
+
        struct timer_list wtimer;
 
        struct pblk_gc gc;
@@ -559,7 +640,7 @@ struct pblk_line_ws {
        struct work_struct ws;
 };
 
-#define pblk_r_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_r_ctx))
+#define pblk_g_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_g_ctx))
 #define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx))
 
 /*
@@ -579,18 +660,17 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
                            struct pblk_w_ctx w_ctx, struct pblk_line *gc_line,
                            unsigned int pos);
 struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos);
+void pblk_rb_flush(struct pblk_rb *rb);
 
 void pblk_rb_sync_l2p(struct pblk_rb *rb);
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
-                                struct pblk_c_ctx *c_ctx,
-                                unsigned int pos,
-                                unsigned int nr_entries,
-                                unsigned int count);
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
+                                struct bio *bio, unsigned int pos,
+                                unsigned int nr_entries, unsigned int count);
 unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
                                      struct list_head *list,
                                      unsigned int max);
 int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
-                       u64 pos, int bio_iter);
+                       struct ppa_addr ppa, int bio_iter);
 unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
 
 unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
@@ -601,6 +681,7 @@ void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
 unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb);
 
 unsigned int pblk_rb_read_count(struct pblk_rb *rb);
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb);
 unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);
 
 int pblk_rb_tear_down_check(struct pblk_rb *rb);
@@ -612,40 +693,50 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf);
  * pblk core
  */
 struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw);
+void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write);
 int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
                        struct pblk_c_ctx *c_ctx);
 void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw);
-void pblk_flush_writer(struct pblk *pblk);
+void pblk_wait_for_meta(struct pblk *pblk);
 struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba);
 void pblk_discard(struct pblk *pblk, struct bio *bio);
 void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
 void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
 int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
+int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
 struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
                              unsigned int nr_secs, unsigned int len,
-                             gfp_t gfp_mask);
+                             int alloc_type, gfp_t gfp_mask);
 struct pblk_line *pblk_line_get(struct pblk *pblk);
 struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
+void pblk_line_replace_data(struct pblk *pblk);
 int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
 void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
 struct pblk_line *pblk_line_get_data(struct pblk *pblk);
-struct pblk_line *pblk_line_get_data_next(struct pblk *pblk);
+struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
 int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
 int pblk_line_is_full(struct pblk_line *line);
 void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
-void pblk_line_close_ws(struct work_struct *work);
+void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line);
 void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
+void pblk_line_close_meta_sync(struct pblk *pblk);
+void pblk_line_close_ws(struct work_struct *work);
+void pblk_pipeline_stop(struct pblk *pblk);
 void pblk_line_mark_bb(struct work_struct *work);
 void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
-                     void (*work)(struct work_struct *));
+                     void (*work)(struct work_struct *),
+                     struct workqueue_struct *wq);
 u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line);
 int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line);
-int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line);
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
+                        void *emeta_buf);
 int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
 void pblk_line_put(struct kref *ref);
 struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
+u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line);
+void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
+u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
                   unsigned long secs_to_flush);
 void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
@@ -656,11 +747,11 @@ void pblk_end_bio_sync(struct bio *bio);
 void pblk_end_io_sync(struct nvm_rq *rqd);
 int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
                       int nr_pages);
-void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
-                            u64 paddr);
 void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
                         int nr_pages);
 void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa);
+void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+                          u64 paddr);
 void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa);
 void pblk_update_map_cache(struct pblk *pblk, sector_t lba,
                           struct ppa_addr ppa);
@@ -702,6 +793,7 @@ void pblk_write_should_kick(struct pblk *pblk);
 /*
  * pblk read path
  */
+extern struct bio_set *pblk_bio_set;
 int pblk_submit_read(struct pblk *pblk, struct bio *bio);
 int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
                        unsigned int nr_secs, unsigned int *secs_to_gc,
@@ -711,7 +803,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
  */
 void pblk_submit_rec(struct work_struct *work);
 struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
-void pblk_recov_pad(struct pblk *pblk);
+int pblk_recov_pad(struct pblk *pblk);
 __le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta);
 int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
                        struct pblk_rec_ctx *recovery, u64 *comp_bits,
@@ -720,33 +812,40 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
 /*
  * pblk gc
  */
-#define PBLK_GC_TRIES 3
+#define PBLK_GC_MAX_READERS 8  /* Max number of outstanding GC reader jobs */
+#define PBLK_GC_W_QD 128       /* Queue depth for inflight GC write I/Os */
+#define PBLK_GC_L_QD 4         /* Queue depth for inflight GC lines */
+#define PBLK_GC_RSV_LINE 1     /* Reserved lines for GC */
 
 int pblk_gc_init(struct pblk *pblk);
 void pblk_gc_exit(struct pblk *pblk);
 void pblk_gc_should_start(struct pblk *pblk);
 void pblk_gc_should_stop(struct pblk *pblk);
-int pblk_gc_status(struct pblk *pblk);
+void pblk_gc_should_kick(struct pblk *pblk);
+void pblk_gc_kick(struct pblk *pblk);
 void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
                              int *gc_active);
-void pblk_gc_sysfs_force(struct pblk *pblk, int force);
+int pblk_gc_sysfs_force(struct pblk *pblk, int force);
 
 /*
  * pblk rate limiter
  */
 void pblk_rl_init(struct pblk_rl *rl, int budget);
 void pblk_rl_free(struct pblk_rl *rl);
-int pblk_rl_gc_thrs(struct pblk_rl *rl);
+int pblk_rl_high_thrs(struct pblk_rl *rl);
+int pblk_rl_low_thrs(struct pblk_rl *rl);
 unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
 int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
+void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries);
 void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
 int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries);
 void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
 void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
-void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv);
 int pblk_rl_sysfs_rate_show(struct pblk_rl *rl);
 void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
 void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line);
+void pblk_rl_set_space_limit(struct pblk_rl *rl, int entries_left);
+int pblk_rl_is_limit(struct pblk_rl *rl);
 
 /*
  * pblk sysfs
@@ -774,9 +873,30 @@ static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx)
        return c_ctx - sizeof(struct nvm_rq);
 }
 
-static inline void *pblk_line_emeta_to_lbas(struct line_emeta *emeta)
+static inline void *emeta_to_bb(struct line_emeta *emeta)
+{
+       return emeta->bb_bitmap;
+}
+
+static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta)
+{
+       return ((void *)emeta + pblk->lm.emeta_len[1]);
+}
+
+static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta)
 {
-       return (emeta) + 1;
+       return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]);
+}
+
+static inline int pblk_line_vsc(struct pblk_line *line)
+{
+       int vsc;
+
+       spin_lock(&line->lock);
+       vsc = le32_to_cpu(*line->vsc);
+       spin_unlock(&line->lock);
+
+       return vsc;
 }
 
 #define NVM_MEM_PAGE_WRITE (8)
@@ -917,6 +1037,14 @@ static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr)
        ppa_addr->ppa = ADDR_EMPTY;
 }
 
+static inline bool pblk_ppa_comp(struct ppa_addr lppa, struct ppa_addr rppa)
+{
+       if (lppa.ppa == rppa.ppa)
+               return true;
+
+       return false;
+}
+
 static inline int pblk_addr_in_cache(struct ppa_addr ppa)
 {
        return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached);
@@ -964,11 +1092,11 @@ static inline struct ppa_addr addr_to_pblk_ppa(struct pblk *pblk, u64 paddr,
 }
 
 static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk,
-                                           struct line_smeta *smeta)
+                                           struct line_header *header)
 {
        u32 crc = ~(u32)0;
 
-       crc = crc32_le(crc, (unsigned char *)smeta + sizeof(crc),
+       crc = crc32_le(crc, (unsigned char *)header + sizeof(crc),
                                sizeof(struct line_header) - sizeof(crc));
 
        return crc;
@@ -996,7 +1124,7 @@ static inline u32 pblk_calc_emeta_crc(struct pblk *pblk,
 
        crc = crc32_le(crc, (unsigned char *)emeta +
                                sizeof(struct line_header) + sizeof(crc),
-                               lm->emeta_len -
+                               lm->emeta_len[0] -
                                sizeof(struct line_header) - sizeof(crc));
 
        return crc;
@@ -1016,9 +1144,27 @@ static inline int pblk_set_progr_mode(struct pblk *pblk, int type)
        return flags;
 }
 
-static inline int pblk_set_read_mode(struct pblk *pblk)
+enum {
+       PBLK_READ_RANDOM        = 0,
+       PBLK_READ_SEQUENTIAL    = 1,
+};
+
+static inline int pblk_set_read_mode(struct pblk *pblk, int type)
+{
+       struct nvm_tgt_dev *dev = pblk->dev;
+       struct nvm_geo *geo = &dev->geo;
+       int flags;
+
+       flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+       if (type == PBLK_READ_SEQUENTIAL)
+               flags |= geo->plane_mode >> 1;
+
+       return flags;
+}
+
+static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs)
 {
-       return NVM_IO_SNGL_ACCESS | NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+       return !(nr_secs % pblk->min_write_pgs);
 }
 
 #ifdef CONFIG_NVM_DEBUG
index cf0e28a..267f01a 100644 (file)
@@ -279,8 +279,8 @@ static void rrpc_end_sync_bio(struct bio *bio)
 {
        struct completion *waiting = bio->bi_private;
 
-       if (bio->bi_error)
-               pr_err("nvm: gc request failed (%u).\n", bio->bi_error);
+       if (bio->bi_status)
+               pr_err("nvm: gc request failed (%u).\n", bio->bi_status);
 
        complete(waiting);
 }
@@ -359,7 +359,7 @@ try:
                        goto finished;
                }
                wait_for_completion_io(&wait);
-               if (bio->bi_error) {
+               if (bio->bi_status) {
                        rrpc_inflight_laddr_release(rrpc, rqd);
                        goto finished;
                }
@@ -385,7 +385,7 @@ try:
                wait_for_completion_io(&wait);
 
                rrpc_inflight_laddr_release(rrpc, rqd);
-               if (bio->bi_error)
+               if (bio->bi_status)
                        goto finished;
 
                bio_reset(bio);
@@ -994,7 +994,7 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
        struct nvm_rq *rqd;
        int err;
 
-       blk_queue_split(q, &bio, q->bio_split);
+       blk_queue_split(q, &bio);
 
        if (bio_op(bio) == REQ_OP_DISCARD) {
                rrpc_discard(rrpc, bio);
index c3ea03c..dee542f 100644 (file)
@@ -849,10 +849,11 @@ static inline void wake_up_allocators(struct cache_set *c)
 
 /* Forward declarations */
 
-void bch_count_io_errors(struct cache *, int, const char *);
+void bch_count_io_errors(struct cache *, blk_status_t, const char *);
 void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
-                             int, const char *);
-void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *);
+                             blk_status_t, const char *);
+void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
+               const char *);
 void bch_bbio_free(struct bio *, struct cache_set *);
 struct bio *bch_bbio_alloc(struct cache_set *);
 
index 450d0e8..866dcf7 100644 (file)
@@ -307,7 +307,7 @@ static void bch_btree_node_read(struct btree *b)
        bch_submit_bbio(bio, b->c, &b->key, 0);
        closure_sync(&cl);
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                set_btree_node_io_error(b);
 
        bch_bbio_free(bio, b->c);
@@ -374,10 +374,10 @@ static void btree_node_write_endio(struct bio *bio)
        struct closure *cl = bio->bi_private;
        struct btree *b = container_of(cl, struct btree, io);
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                set_btree_node_io_error(b);
 
-       bch_bbio_count_io_errors(b->c, bio, bio->bi_error, "writing btree");
+       bch_bbio_count_io_errors(b->c, bio, bio->bi_status, "writing btree");
        closure_put(cl);
 }
 
index 06f5505..35a5a72 100644 (file)
@@ -110,7 +110,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
        struct bio_vec bv, cbv;
        struct bvec_iter iter, citer = { 0 };
 
-       check = bio_clone(bio, GFP_NOIO);
+       check = bio_clone_kmalloc(bio, GFP_NOIO);
        if (!check)
                return;
        check->bi_opf = REQ_OP_READ;
index db45a88..6a9b850 100644 (file)
@@ -50,7 +50,7 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
 
 /* IO errors */
 
-void bch_count_io_errors(struct cache *ca, int error, const char *m)
+void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m)
 {
        /*
         * The halflife of an error is:
@@ -103,7 +103,7 @@ void bch_count_io_errors(struct cache *ca, int error, const char *m)
 }
 
 void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
-                             int error, const char *m)
+                             blk_status_t error, const char *m)
 {
        struct bbio *b = container_of(bio, struct bbio, bio);
        struct cache *ca = PTR_CACHE(c, &b->key, 0);
@@ -132,7 +132,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
 }
 
 void bch_bbio_endio(struct cache_set *c, struct bio *bio,
-                   int error, const char *m)
+                   blk_status_t error, const char *m)
 {
        struct closure *cl = bio->bi_private;
 
index 1198e53..0352d05 100644 (file)
@@ -549,7 +549,7 @@ static void journal_write_endio(struct bio *bio)
 {
        struct journal_write *w = bio->bi_private;
 
-       cache_set_err_on(bio->bi_error, w->c, "journal io error");
+       cache_set_err_on(bio->bi_status, w->c, "journal io error");
        closure_put(&w->c->journal.io);
 }
 
index 13b8a90..f633b30 100644 (file)
@@ -63,14 +63,14 @@ static void read_moving_endio(struct bio *bio)
        struct moving_io *io = container_of(bio->bi_private,
                                            struct moving_io, cl);
 
-       if (bio->bi_error)
-               io->op.error = bio->bi_error;
+       if (bio->bi_status)
+               io->op.status = bio->bi_status;
        else if (!KEY_DIRTY(&b->key) &&
                 ptr_stale(io->op.c, &b->key, 0)) {
-               io->op.error = -EINTR;
+               io->op.status = BLK_STS_IOERR;
        }
 
-       bch_bbio_endio(io->op.c, bio, bio->bi_error, "reading data to move");
+       bch_bbio_endio(io->op.c, bio, bio->bi_status, "reading data to move");
 }
 
 static void moving_init(struct moving_io *io)
@@ -92,7 +92,7 @@ static void write_moving(struct closure *cl)
        struct moving_io *io = container_of(cl, struct moving_io, cl);
        struct data_insert_op *op = &io->op;
 
-       if (!op->error) {
+       if (!op->status) {
                moving_init(io);
 
                io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
index 709c9cc..019b3df 100644 (file)
@@ -81,7 +81,7 @@ static void bch_data_insert_keys(struct closure *cl)
        if (ret == -ESRCH) {
                op->replace_collision = true;
        } else if (ret) {
-               op->error               = -ENOMEM;
+               op->status              = BLK_STS_RESOURCE;
                op->insert_data_done    = true;
        }
 
@@ -178,17 +178,17 @@ static void bch_data_insert_endio(struct bio *bio)
        struct closure *cl = bio->bi_private;
        struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                /* TODO: We could try to recover from this. */
                if (op->writeback)
-                       op->error = bio->bi_error;
+                       op->status = bio->bi_status;
                else if (!op->replace)
                        set_closure_fn(cl, bch_data_insert_error, op->wq);
                else
                        set_closure_fn(cl, NULL, NULL);
        }
 
-       bch_bbio_endio(op->c, bio, bio->bi_error, "writing data to cache");
+       bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
 }
 
 static void bch_data_insert_start(struct closure *cl)
@@ -488,15 +488,15 @@ static void bch_cache_read_endio(struct bio *bio)
         * from the backing device.
         */
 
-       if (bio->bi_error)
-               s->iop.error = bio->bi_error;
+       if (bio->bi_status)
+               s->iop.status = bio->bi_status;
        else if (!KEY_DIRTY(&b->key) &&
                 ptr_stale(s->iop.c, &b->key, 0)) {
                atomic_long_inc(&s->iop.c->cache_read_races);
-               s->iop.error = -EINTR;
+               s->iop.status = BLK_STS_IOERR;
        }
 
-       bch_bbio_endio(s->iop.c, bio, bio->bi_error, "reading from cache");
+       bch_bbio_endio(s->iop.c, bio, bio->bi_status, "reading from cache");
 }
 
 /*
@@ -593,9 +593,9 @@ static void request_endio(struct bio *bio)
 {
        struct closure *cl = bio->bi_private;
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                struct search *s = container_of(cl, struct search, cl);
-               s->iop.error = bio->bi_error;
+               s->iop.status = bio->bi_status;
                /* Only cache read errors are recoverable */
                s->recoverable = false;
        }
@@ -611,7 +611,7 @@ static void bio_complete(struct search *s)
                                    &s->d->disk->part0, s->start_time);
 
                trace_bcache_request_end(s->d, s->orig_bio);
-               s->orig_bio->bi_error = s->iop.error;
+               s->orig_bio->bi_status = s->iop.status;
                bio_endio(s->orig_bio);
                s->orig_bio = NULL;
        }
@@ -664,7 +664,7 @@ static inline struct search *search_alloc(struct bio *bio,
        s->iop.inode            = d->id;
        s->iop.write_point      = hash_long((unsigned long) current, 16);
        s->iop.write_prio       = 0;
-       s->iop.error            = 0;
+       s->iop.status           = 0;
        s->iop.flags            = 0;
        s->iop.flush_journal    = op_is_flush(bio->bi_opf);
        s->iop.wq               = bcache_wq;
@@ -707,7 +707,7 @@ static void cached_dev_read_error(struct closure *cl)
                /* Retry from the backing device: */
                trace_bcache_read_retry(s->orig_bio);
 
-               s->iop.error = 0;
+               s->iop.status = 0;
                do_bio_hook(s, s->orig_bio);
 
                /* XXX: invalidate cache */
@@ -767,7 +767,7 @@ static void cached_dev_read_done_bh(struct closure *cl)
                                  !s->cache_miss, s->iop.bypass);
        trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass);
 
-       if (s->iop.error)
+       if (s->iop.status)
                continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq);
        else if (s->iop.bio || verify(dc, &s->bio.bio))
                continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq);
index 1ff3687..7689176 100644 (file)
@@ -10,7 +10,7 @@ struct data_insert_op {
        unsigned                inode;
        uint16_t                write_point;
        uint16_t                write_prio;
-       short                   error;
+       blk_status_t            status;
 
        union {
                uint16_t        flags;
index e57353e..8352fad 100644 (file)
@@ -271,7 +271,7 @@ static void write_super_endio(struct bio *bio)
 {
        struct cache *ca = bio->bi_private;
 
-       bch_count_io_errors(ca, bio->bi_error, "writing superblock");
+       bch_count_io_errors(ca, bio->bi_status, "writing superblock");
        closure_put(&ca->set->sb_write);
 }
 
@@ -321,7 +321,7 @@ static void uuid_endio(struct bio *bio)
        struct closure *cl = bio->bi_private;
        struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
 
-       cache_set_err_on(bio->bi_error, c, "accessing uuids");
+       cache_set_err_on(bio->bi_status, c, "accessing uuids");
        bch_bbio_free(bio, c);
        closure_put(cl);
 }
@@ -494,7 +494,7 @@ static void prio_endio(struct bio *bio)
 {
        struct cache *ca = bio->bi_private;
 
-       cache_set_err_on(bio->bi_error, ca->set, "accessing priorities");
+       cache_set_err_on(bio->bi_status, ca->set, "accessing priorities");
        bch_bbio_free(bio, ca->set);
        closure_put(&ca->prio);
 }
@@ -782,7 +782,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
 
        minor *= BCACHE_MINORS;
 
-       if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+       if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+                                          BIOSET_NEED_BVECS |
+                                          BIOSET_NEED_RESCUER)) ||
            !(d->disk = alloc_disk(BCACHE_MINORS))) {
                ida_simple_remove(&bcache_minor, minor);
                return -ENOMEM;
@@ -1516,7 +1518,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
                                sizeof(struct bbio) + sizeof(struct bio_vec) *
                                bucket_pages(c))) ||
            !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
-           !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+           !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+                                          BIOSET_NEED_BVECS |
+                                          BIOSET_NEED_RESCUER)) ||
            !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
            !(c->moving_gc_wq = alloc_workqueue("bcache_gc",
                                                WQ_MEM_RECLAIM, 0)) ||
index 6ac2e48..42c66e7 100644 (file)
@@ -167,7 +167,7 @@ static void dirty_endio(struct bio *bio)
        struct keybuf_key *w = bio->bi_private;
        struct dirty_io *io = w->private;
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                SET_KEY_DIRTY(&w->key, false);
 
        closure_put(&io->cl);
@@ -195,7 +195,7 @@ static void read_dirty_endio(struct bio *bio)
        struct dirty_io *io = w->private;
 
        bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0),
-                           bio->bi_error, "reading dirty data from cache");
+                           bio->bi_status, "reading dirty data from cache");
 
        dirty_endio(bio);
 }
index ae7da2c..82d2738 100644 (file)
@@ -229,7 +229,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
 EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
 
 void dm_cell_error(struct dm_bio_prison *prison,
-                  struct dm_bio_prison_cell *cell, int error)
+                  struct dm_bio_prison_cell *cell, blk_status_t error)
 {
        struct bio_list bios;
        struct bio *bio;
@@ -238,7 +238,7 @@ void dm_cell_error(struct dm_bio_prison *prison,
        dm_cell_release(prison, cell, &bios);
 
        while ((bio = bio_list_pop(&bios))) {
-               bio->bi_error = error;
+               bio->bi_status = error;
                bio_endio(bio);
        }
 }
index cddd4ac..cec52ac 100644 (file)
@@ -91,7 +91,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
                               struct dm_bio_prison_cell *cell,
                               struct bio_list *inmates);
 void dm_cell_error(struct dm_bio_prison *prison,
-                  struct dm_bio_prison_cell *cell, int error);
+                  struct dm_bio_prison_cell *cell, blk_status_t error);
 
 /*
  * Visits the cell and then releases.  Guarantees no new inmates are
index 840c149..850ff6c 100644 (file)
@@ -145,8 +145,8 @@ struct dm_buffer {
        enum data_mode data_mode;
        unsigned char list_mode;                /* LIST_* */
        unsigned hold_count;
-       int read_error;
-       int write_error;
+       blk_status_t read_error;
+       blk_status_t write_error;
        unsigned long state;
        unsigned long last_accessed;
        struct dm_bufio_client *c;
@@ -555,7 +555,7 @@ static void dmio_complete(unsigned long error, void *context)
 {
        struct dm_buffer *b = context;
 
-       b->bio.bi_error = error ? -EIO : 0;
+       b->bio.bi_status = error ? BLK_STS_IOERR : 0;
        b->bio.bi_end_io(&b->bio);
 }
 
@@ -588,7 +588,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
 
        r = dm_io(&io_req, 1, &region, NULL);
        if (r) {
-               b->bio.bi_error = r;
+               b->bio.bi_status = errno_to_blk_status(r);
                end_io(&b->bio);
        }
 }
@@ -596,7 +596,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
 static void inline_endio(struct bio *bio)
 {
        bio_end_io_t *end_fn = bio->bi_private;
-       int error = bio->bi_error;
+       blk_status_t status = bio->bi_status;
 
        /*
         * Reset the bio to free any attached resources
@@ -604,7 +604,7 @@ static void inline_endio(struct bio *bio)
         */
        bio_reset(bio);
 
-       bio->bi_error = error;
+       bio->bi_status = status;
        end_fn(bio);
 }
 
@@ -685,11 +685,12 @@ static void write_endio(struct bio *bio)
 {
        struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
 
-       b->write_error = bio->bi_error;
-       if (unlikely(bio->bi_error)) {
+       b->write_error = bio->bi_status;
+       if (unlikely(bio->bi_status)) {
                struct dm_bufio_client *c = b->c;
-               int error = bio->bi_error;
-               (void)cmpxchg(&c->async_write_error, 0, error);
+
+               (void)cmpxchg(&c->async_write_error, 0,
+                               blk_status_to_errno(bio->bi_status));
        }
 
        BUG_ON(!test_bit(B_WRITING, &b->state));
@@ -1063,7 +1064,7 @@ static void read_endio(struct bio *bio)
 {
        struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
 
-       b->read_error = bio->bi_error;
+       b->read_error = bio->bi_status;
 
        BUG_ON(!test_bit(B_READING, &b->state));
 
@@ -1107,7 +1108,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
        wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
 
        if (b->read_error) {
-               int error = b->read_error;
+               int error = blk_status_to_errno(b->read_error);
 
                dm_bufio_release(b);
 
@@ -1257,7 +1258,8 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
  */
 int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
 {
-       int a, f;
+       blk_status_t a;
+       int f;
        unsigned long buffers_processed = 0;
        struct dm_buffer *b, *tmp;
 
index d682a05..c5ea03f 100644 (file)
@@ -119,7 +119,7 @@ static void iot_io_end(struct io_tracker *iot, sector_t len)
  */
 struct continuation {
        struct work_struct ws;
-       int input;
+       blk_status_t input;
 };
 
 static inline void init_continuation(struct continuation *k,
@@ -145,7 +145,7 @@ struct batcher {
        /*
         * The operation that everyone is waiting for.
         */
-       int (*commit_op)(void *context);
+       blk_status_t (*commit_op)(void *context);
        void *commit_context;
 
        /*
@@ -171,8 +171,7 @@ struct batcher {
 static void __commit(struct work_struct *_ws)
 {
        struct batcher *b = container_of(_ws, struct batcher, commit_work);
-
-       int r;
+       blk_status_t r;
        unsigned long flags;
        struct list_head work_items;
        struct work_struct *ws, *tmp;
@@ -205,7 +204,7 @@ static void __commit(struct work_struct *_ws)
 
        while ((bio = bio_list_pop(&bios))) {
                if (r) {
-                       bio->bi_error = r;
+                       bio->bi_status = r;
                        bio_endio(bio);
                } else
                        b->issue_op(bio, b->issue_context);
@@ -213,7 +212,7 @@ static void __commit(struct work_struct *_ws)
 }
 
 static void batcher_init(struct batcher *b,
-                        int (*commit_op)(void *),
+                        blk_status_t (*commit_op)(void *),
                         void *commit_context,
                         void (*issue_op)(struct bio *bio, void *),
                         void *issue_context,
@@ -955,7 +954,7 @@ static void writethrough_endio(struct bio *bio)
 
        dm_unhook_bio(&pb->hook_info, bio);
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                bio_endio(bio);
                return;
        }
@@ -1220,7 +1219,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
        struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
 
        if (read_err || write_err)
-               mg->k.input = -EIO;
+               mg->k.input = BLK_STS_IOERR;
 
        queue_continuation(mg->cache->wq, &mg->k);
 }
@@ -1266,8 +1265,8 @@ static void overwrite_endio(struct bio *bio)
 
        dm_unhook_bio(&pb->hook_info, bio);
 
-       if (bio->bi_error)
-               mg->k.input = bio->bi_error;
+       if (bio->bi_status)
+               mg->k.input = bio->bi_status;
 
        queue_continuation(mg->cache->wq, &mg->k);
 }
@@ -1323,8 +1322,10 @@ static void mg_complete(struct dm_cache_migration *mg, bool success)
                if (mg->overwrite_bio) {
                        if (success)
                                force_set_dirty(cache, cblock);
+                       else if (mg->k.input)
+                               mg->overwrite_bio->bi_status = mg->k.input;
                        else
-                               mg->overwrite_bio->bi_error = (mg->k.input ? : -EIO);
+                               mg->overwrite_bio->bi_status = BLK_STS_IOERR;
                        bio_endio(mg->overwrite_bio);
                } else {
                        if (success)
@@ -1504,7 +1505,7 @@ static void mg_copy(struct work_struct *ws)
                r = copy(mg, is_policy_promote);
                if (r) {
                        DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
-                       mg->k.input = -EIO;
+                       mg->k.input = BLK_STS_IOERR;
                        mg_complete(mg, false);
                }
        }
@@ -1907,12 +1908,12 @@ static int commit(struct cache *cache, bool clean_shutdown)
 /*
  * Used by the batcher.
  */
-static int commit_op(void *context)
+static blk_status_t commit_op(void *context)
 {
        struct cache *cache = context;
 
        if (dm_cache_changed_this_transaction(cache->cmd))
-               return commit(cache, false);
+               return errno_to_blk_status(commit(cache, false));
 
        return 0;
 }
@@ -2018,7 +2019,7 @@ static void requeue_deferred_bios(struct cache *cache)
        bio_list_init(&cache->deferred_bios);
 
        while ((bio = bio_list_pop(&bios))) {
-               bio->bi_error = DM_ENDIO_REQUEUE;
+               bio->bi_status = BLK_STS_DM_REQUEUE;
                bio_endio(bio);
        }
 }
@@ -2820,7 +2821,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
        return r;
 }
 
-static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int cache_end_io(struct dm_target *ti, struct bio *bio,
+               blk_status_t *error)
 {
        struct cache *cache = ti->private;
        unsigned long flags;
@@ -2838,7 +2840,7 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
        bio_drop_shared_lock(cache, bio);
        accounted_complete(cache, bio);
 
-       return 0;
+       return DM_ENDIO_DONE;
 }
 
 static int write_dirty_bitset(struct cache *cache)
index ebf9e72..9e1b72e 100644 (file)
@@ -71,7 +71,7 @@ struct dm_crypt_io {
        struct convert_context ctx;
 
        atomic_t io_pending;
-       int error;
+       blk_status_t error;
        sector_t sector;
 
        struct rb_node rb_node;
@@ -1292,7 +1292,7 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
 /*
  * Encrypt / decrypt data from one bio to another one (can be the same one)
  */
-static int crypt_convert(struct crypt_config *cc,
+static blk_status_t crypt_convert(struct crypt_config *cc,
                         struct convert_context *ctx)
 {
        unsigned int tag_offset = 0;
@@ -1343,13 +1343,13 @@ static int crypt_convert(struct crypt_config *cc,
                 */
                case -EBADMSG:
                        atomic_dec(&ctx->cc_pending);
-                       return -EILSEQ;
+                       return BLK_STS_PROTECTION;
                /*
                 * There was an error while processing the request.
                 */
                default:
                        atomic_dec(&ctx->cc_pending);
-                       return -EIO;
+                       return BLK_STS_IOERR;
                }
        }
 
@@ -1463,7 +1463,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
 {
        struct crypt_config *cc = io->cc;
        struct bio *base_bio = io->base_bio;
-       int error = io->error;
+       blk_status_t error = io->error;
 
        if (!atomic_dec_and_test(&io->io_pending))
                return;
@@ -1476,7 +1476,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
        else
                kfree(io->integrity_metadata);
 
-       base_bio->bi_error = error;
+       base_bio->bi_status = error;
        bio_endio(base_bio);
 }
 
@@ -1502,7 +1502,7 @@ static void crypt_endio(struct bio *clone)
        struct dm_crypt_io *io = clone->bi_private;
        struct crypt_config *cc = io->cc;
        unsigned rw = bio_data_dir(clone);
-       int error;
+       blk_status_t error;
 
        /*
         * free the processed pages
@@ -1510,7 +1510,7 @@ static void crypt_endio(struct bio *clone)
        if (rw == WRITE)
                crypt_free_buffer_pages(cc, clone);
 
-       error = clone->bi_error;
+       error = clone->bi_status;
        bio_put(clone);
 
        if (rw == READ && !error) {
@@ -1570,7 +1570,7 @@ static void kcryptd_io_read_work(struct work_struct *work)
 
        crypt_inc_pending(io);
        if (kcryptd_io_read(io, GFP_NOIO))
-               io->error = -ENOMEM;
+               io->error = BLK_STS_RESOURCE;
        crypt_dec_pending(io);
 }
 
@@ -1656,7 +1656,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
        sector_t sector;
        struct rb_node **rbp, *parent;
 
-       if (unlikely(io->error < 0)) {
+       if (unlikely(io->error)) {
                crypt_free_buffer_pages(cc, clone);
                bio_put(clone);
                crypt_dec_pending(io);
@@ -1697,7 +1697,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
        struct bio *clone;
        int crypt_finished;
        sector_t sector = io->sector;
-       int r;
+       blk_status_t r;
 
        /*
         * Prevent io from disappearing until this function completes.
@@ -1707,7 +1707,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 
        clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
        if (unlikely(!clone)) {
-               io->error = -EIO;
+               io->error = BLK_STS_IOERR;
                goto dec;
        }
 
@@ -1718,7 +1718,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 
        crypt_inc_pending(io);
        r = crypt_convert(cc, &io->ctx);
-       if (r < 0)
+       if (r)
                io->error = r;
        crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
 
@@ -1740,7 +1740,7 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
 static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
 {
        struct crypt_config *cc = io->cc;
-       int r = 0;
+       blk_status_t r;
 
        crypt_inc_pending(io);
 
@@ -1748,7 +1748,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
                           io->sector);
 
        r = crypt_convert(cc, &io->ctx);
-       if (r < 0)
+       if (r)
                io->error = r;
 
        if (atomic_dec_and_test(&io->ctx.cc_pending))
@@ -1781,9 +1781,9 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
        if (error == -EBADMSG) {
                DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu",
                            (unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)));
-               io->error = -EILSEQ;
+               io->error = BLK_STS_PROTECTION;
        } else if (error < 0)
-               io->error = -EIO;
+               io->error = BLK_STS_IOERR;
 
        crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
 
@@ -2677,7 +2677,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
                goto bad;
        }
 
-       cc->bs = bioset_create(MIN_IOS, 0);
+       cc->bs = bioset_create(MIN_IOS, 0, (BIOSET_NEED_BVECS |
+                                           BIOSET_NEED_RESCUER));
        if (!cc->bs) {
                ti->error = "Cannot allocate crypt bioset";
                goto bad;
@@ -2795,10 +2796,10 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
         * and is aligned to this size as defined in IO hints.
         */
        if (unlikely((bio->bi_iter.bi_sector & ((cc->sector_size >> SECTOR_SHIFT) - 1)) != 0))
-               return -EIO;
+               return DM_MAPIO_KILL;
 
        if (unlikely(bio->bi_iter.bi_size & (cc->sector_size - 1)))
-               return -EIO;
+               return DM_MAPIO_KILL;
 
        io = dm_per_bio_data(bio, cc->per_bio_data_size);
        crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
index 13305a1..3d04d5c 100644 (file)
@@ -321,7 +321,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
                if (bio_data_dir(bio) == READ) {
                        if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags) &&
                            !test_bit(ERROR_WRITES, &fc->flags))
-                               return -EIO;
+                               return DM_MAPIO_KILL;
                        goto map_bio;
                }
 
@@ -349,7 +349,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
                /*
                 * By default, error all I/O.
                 */
-               return -EIO;
+               return DM_MAPIO_KILL;
        }
 
 map_bio:
@@ -358,12 +358,13 @@ map_bio:
        return DM_MAPIO_REMAPPED;
 }
 
-static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int flakey_end_io(struct dm_target *ti, struct bio *bio,
+               blk_status_t *error)
 {
        struct flakey_c *fc = ti->private;
        struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
 
-       if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
+       if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
                if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
                    all_corrupt_bio_flags_match(bio, fc)) {
                        /*
@@ -377,11 +378,11 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
                         * Error read during the down_interval if drop_writes
                         * and error_writes were not configured.
                         */
-                       return -EIO;
+                       *error = BLK_STS_IOERR;
                }
        }
 
-       return error;
+       return DM_ENDIO_DONE;
 }
 
 static void flakey_status(struct dm_target *ti, status_type_t type,
index 7910bfe..1b224aa 100644 (file)
@@ -246,7 +246,7 @@ struct dm_integrity_io {
        unsigned metadata_offset;
 
        atomic_t in_flight;
-       int bi_error;
+       blk_status_t bi_status;
 
        struct completion *completion;
 
@@ -1105,18 +1105,21 @@ static void schedule_autocommit(struct dm_integrity_c *ic)
 static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
 {
        struct bio *bio;
-       spin_lock_irq(&ic->endio_wait.lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&ic->endio_wait.lock, flags);
        bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
        bio_list_add(&ic->flush_bio_list, bio);
-       spin_unlock_irq(&ic->endio_wait.lock);
+       spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
+
        queue_work(ic->commit_wq, &ic->commit_work);
 }
 
 static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
 {
        int r = dm_integrity_failed(ic);
-       if (unlikely(r) && !bio->bi_error)
-               bio->bi_error = r;
+       if (unlikely(r) && !bio->bi_status)
+               bio->bi_status = errno_to_blk_status(r);
        bio_endio(bio);
 }
 
@@ -1124,7 +1127,7 @@ static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *di
 {
        struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
 
-       if (unlikely(dio->fua) && likely(!bio->bi_error) && likely(!dm_integrity_failed(ic)))
+       if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic)))
                submit_flush_bio(ic, dio);
        else
                do_endio(ic, bio);
@@ -1143,9 +1146,9 @@ static void dec_in_flight(struct dm_integrity_io *dio)
 
                bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
 
-               if (unlikely(dio->bi_error) && !bio->bi_error)
-                       bio->bi_error = dio->bi_error;
-               if (likely(!bio->bi_error) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
+               if (unlikely(dio->bi_status) && !bio->bi_status)
+                       bio->bi_status = dio->bi_status;
+               if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
                        dio->range.logical_sector += dio->range.n_sectors;
                        bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
                        INIT_WORK(&dio->work, integrity_bio_wait);
@@ -1319,7 +1322,7 @@ skip_io:
        dec_in_flight(dio);
        return;
 error:
-       dio->bi_error = r;
+       dio->bi_status = errno_to_blk_status(r);
        dec_in_flight(dio);
 }
 
@@ -1332,7 +1335,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
        sector_t area, offset;
 
        dio->ic = ic;
-       dio->bi_error = 0;
+       dio->bi_status = 0;
 
        if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
                submit_flush_bio(ic, dio);
@@ -1353,13 +1356,13 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
                DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
                      (unsigned long long)dio->range.logical_sector, bio_sectors(bio),
                      (unsigned long long)ic->provided_data_sectors);
-               return -EIO;
+               return DM_MAPIO_KILL;
        }
        if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) {
                DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
                      ic->sectors_per_block,
                      (unsigned long long)dio->range.logical_sector, bio_sectors(bio));
-               return -EIO;
+               return DM_MAPIO_KILL;
        }
 
        if (ic->sectors_per_block > 1) {
@@ -1369,7 +1372,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
                        if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
                                DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
                                        bv.bv_offset, bv.bv_len, ic->sectors_per_block);
-                               return -EIO;
+                               return DM_MAPIO_KILL;
                        }
                }
        }
@@ -1384,18 +1387,18 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
                                wanted_tag_size *= ic->tag_size;
                        if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
                                DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size);
-                               return -EIO;
+                               return DM_MAPIO_KILL;
                        }
                }
        } else {
                if (unlikely(bip != NULL)) {
                        DMERR("Unexpected integrity data when using internal hash");
-                       return -EIO;
+                       return DM_MAPIO_KILL;
                }
        }
 
        if (unlikely(ic->mode == 'R') && unlikely(dio->write))
-               return -EIO;
+               return DM_MAPIO_KILL;
 
        get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
        dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
@@ -3040,6 +3043,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
                ti->error = "The device is too small";
                goto bad;
        }
+       if (ti->len > ic->provided_data_sectors) {
+               r = -EINVAL;
+               ti->error = "Not enough provided sectors for requested mapping size";
+               goto bad;
+       }
 
        if (!buffer_sectors)
                buffer_sectors = 1;
index 3702e50..2503960 100644 (file)
@@ -58,7 +58,8 @@ struct dm_io_client *dm_io_client_create(void)
        if (!client->pool)
                goto bad;
 
-       client->bios = bioset_create(min_ios, 0);
+       client->bios = bioset_create(min_ios, 0, (BIOSET_NEED_BVECS |
+                                                 BIOSET_NEED_RESCUER));
        if (!client->bios)
                goto bad;
 
@@ -124,7 +125,7 @@ static void complete_io(struct io *io)
        fn(error_bits, context);
 }
 
-static void dec_count(struct io *io, unsigned int region, int error)
+static void dec_count(struct io *io, unsigned int region, blk_status_t error)
 {
        if (error)
                set_bit(region, &io->error_bits);
@@ -137,9 +138,9 @@ static void endio(struct bio *bio)
 {
        struct io *io;
        unsigned region;
-       int error;
+       blk_status_t error;
 
-       if (bio->bi_error && bio_data_dir(bio) == READ)
+       if (bio->bi_status && bio_data_dir(bio) == READ)
                zero_fill_bio(bio);
 
        /*
@@ -147,7 +148,7 @@ static void endio(struct bio *bio)
         */
        retrieve_io_and_region_from_bio(bio, &io, &region);
 
-       error = bio->bi_error;
+       error = bio->bi_status;
        bio_put(bio);
 
        dec_count(io, region, error);
@@ -317,9 +318,9 @@ static void do_region(int op, int op_flags, unsigned region,
        else if (op == REQ_OP_WRITE_SAME)
                special_cmd_max_sectors = q->limits.max_write_same_sectors;
        if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
-            op == REQ_OP_WRITE_SAME)  &&
-           special_cmd_max_sectors == 0) {
-               dec_count(io, region, -EOPNOTSUPP);
+            op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) {
+               atomic_inc(&io->count);
+               dec_count(io, region, BLK_STS_NOTSUPP);
                return;
        }
 
index 4dfe386..a1da0eb 100644 (file)
@@ -150,10 +150,10 @@ static void log_end_io(struct bio *bio)
 {
        struct log_writes_c *lc = bio->bi_private;
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                unsigned long flags;
 
-               DMERR("Error writing log block, error=%d", bio->bi_error);
+               DMERR("Error writing log block, error=%d", bio->bi_status);
                spin_lock_irqsave(&lc->blocks_lock, flags);
                lc->logging_enabled = false;
                spin_unlock_irqrestore(&lc->blocks_lock, flags);
@@ -586,7 +586,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
                spin_lock_irq(&lc->blocks_lock);
                lc->logging_enabled = false;
                spin_unlock_irq(&lc->blocks_lock);
-               return -ENOMEM;
+               return DM_MAPIO_KILL;
        }
        INIT_LIST_HEAD(&block->list);
        pb->block = block;
@@ -639,7 +639,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
                        spin_lock_irq(&lc->blocks_lock);
                        lc->logging_enabled = false;
                        spin_unlock_irq(&lc->blocks_lock);
-                       return -ENOMEM;
+                       return DM_MAPIO_KILL;
                }
 
                src = kmap_atomic(bv.bv_page);
@@ -664,7 +664,8 @@ map_bio:
        return DM_MAPIO_REMAPPED;
 }
 
-static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int normal_end_io(struct dm_target *ti, struct bio *bio,
+               blk_status_t *error)
 {
        struct log_writes_c *lc = ti->private;
        struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
@@ -686,7 +687,7 @@ static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
                spin_unlock_irqrestore(&lc->blocks_lock, flags);
        }
 
-       return error;
+       return DM_ENDIO_DONE;
 }
 
 /*
index 3df056b..0e8ab5b 100644 (file)
@@ -559,13 +559,13 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
                if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
                        return DM_MAPIO_REQUEUE;
                dm_report_EIO(m);
-               return -EIO;
+               return DM_MAPIO_KILL;
        }
 
        mpio->pgpath = pgpath;
        mpio->nr_bytes = nr_bytes;
 
-       bio->bi_error = 0;
+       bio->bi_status = 0;
        bio->bi_bdev = pgpath->path.dev->bdev;
        bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
 
@@ -621,11 +621,19 @@ static void process_queued_bios(struct work_struct *work)
        blk_start_plug(&plug);
        while ((bio = bio_list_pop(&bios))) {
                r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
-               if (r < 0 || r == DM_MAPIO_REQUEUE) {
-                       bio->bi_error = r;
+               switch (r) {
+               case DM_MAPIO_KILL:
+                       bio->bi_status = BLK_STS_IOERR;
+                       bio_endio(bio);
+                       break;
+               case DM_MAPIO_REQUEUE:
+                       bio->bi_status = BLK_STS_DM_REQUEUE;
                        bio_endio(bio);
-               } else if (r == DM_MAPIO_REMAPPED)
+                       break;
+               case DM_MAPIO_REMAPPED:
                        generic_make_request(bio);
+                       break;
+               }
        }
        blk_finish_plug(&plug);
 }
@@ -1442,22 +1450,15 @@ static void activate_path_work(struct work_struct *work)
        activate_or_offline_path(pgpath);
 }
 
-static int noretry_error(int error)
+static int noretry_error(blk_status_t error)
 {
        switch (error) {
-       case -EBADE:
-               /*
-                * EBADE signals an reservation conflict.
-                * We shouldn't fail the path here as we can communicate with
-                * the target.  We should failover to the next path, but in
-                * doing so we might be causing a ping-pong between paths.
-                * So just return the reservation conflict error.
-                */
-       case -EOPNOTSUPP:
-       case -EREMOTEIO:
-       case -EILSEQ:
-       case -ENODATA:
-       case -ENOSPC:
+       case BLK_STS_NOTSUPP:
+       case BLK_STS_NOSPC:
+       case BLK_STS_TARGET:
+       case BLK_STS_NEXUS:
+       case BLK_STS_MEDIUM:
+       case BLK_STS_RESOURCE:
                return 1;
        }
 
@@ -1466,7 +1467,7 @@ static int noretry_error(int error)
 }
 
 static int multipath_end_io(struct dm_target *ti, struct request *clone,
-                           int error, union map_info *map_context)
+                           blk_status_t error, union map_info *map_context)
 {
        struct dm_mpath_io *mpio = get_mpio(map_context);
        struct pgpath *pgpath = mpio->pgpath;
@@ -1493,7 +1494,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 
                if (atomic_read(&m->nr_valid_paths) == 0 &&
                    !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
-                       if (error == -EIO)
+                       if (error == BLK_STS_IOERR)
                                dm_report_EIO(m);
                        /* complete with the original error */
                        r = DM_ENDIO_DONE;
@@ -1510,24 +1511,26 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
        return r;
 }
 
-static int do_end_io_bio(struct multipath *m, struct bio *clone,
-                        int error, struct dm_mpath_io *mpio)
+static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
+               blk_status_t *error)
 {
+       struct multipath *m = ti->private;
+       struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
+       struct pgpath *pgpath = mpio->pgpath;
        unsigned long flags;
+       int r = DM_ENDIO_DONE;
 
-       if (!error)
-               return 0;       /* I/O complete */
-
-       if (noretry_error(error))
-               return error;
+       if (!*error || noretry_error(*error))
+               goto done;
 
-       if (mpio->pgpath)
-               fail_path(mpio->pgpath);
+       if (pgpath)
+               fail_path(pgpath);
 
        if (atomic_read(&m->nr_valid_paths) == 0 &&
            !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
                dm_report_EIO(m);
-               return -EIO;
+               *error = BLK_STS_IOERR;
+               goto done;
        }
 
        /* Queue for the daemon to resubmit */
@@ -1539,23 +1542,11 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone,
        if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
                queue_work(kmultipathd, &m->process_queued_bios);
 
-       return DM_ENDIO_INCOMPLETE;
-}
-
-static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error)
-{
-       struct multipath *m = ti->private;
-       struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
-       struct pgpath *pgpath;
-       struct path_selector *ps;
-       int r;
-
-       BUG_ON(!mpio);
-
-       r = do_end_io_bio(m, clone, error, mpio);
-       pgpath = mpio->pgpath;
+       r = DM_ENDIO_INCOMPLETE;
+done:
        if (pgpath) {
-               ps = &pgpath->pg->ps;
+               struct path_selector *ps = &pgpath->pg->ps;
+
                if (ps->type->end_io)
                        ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
        }
index 7d89322..b4b75da 100644 (file)
@@ -1927,7 +1927,7 @@ struct dm_raid_superblock {
        /********************************************************************
         * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
         *
-        * FEATURE_FLAG_SUPPORTS_V190 in the features member indicates that those exist
+        * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
         */
 
        __le32 flags; /* Flags defining array states for reshaping */
@@ -2092,6 +2092,11 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
        sb->layout = cpu_to_le32(mddev->layout);
        sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors);
 
+       /********************************************************************
+        * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
+        *
+        * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
+        */
        sb->new_level = cpu_to_le32(mddev->new_level);
        sb->new_layout = cpu_to_le32(mddev->new_layout);
        sb->new_stripe_sectors = cpu_to_le32(mddev->new_chunk_sectors);
@@ -2438,8 +2443,14 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
        mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
 
        if (!test_and_clear_bit(FirstUse, &rdev->flags)) {
-               /* Retrieve device size stored in superblock to be prepared for shrink */
-               rdev->sectors = le64_to_cpu(sb->sectors);
+               /*
+                * Retrieve rdev size stored in superblock to be prepared for shrink.
+                * Check extended superblock members are present otherwise the size
+                * will not be set!
+                */
+               if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190)
+                       rdev->sectors = le64_to_cpu(sb->sectors);
+
                rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
                if (rdev->recovery_offset == MaxSector)
                        set_bit(In_sync, &rdev->flags);
index e61c450..a4fbd91 100644 (file)
@@ -145,6 +145,7 @@ static void dispatch_bios(void *context, struct bio_list *bio_list)
 
 struct dm_raid1_bio_record {
        struct mirror *m;
+       /* if details->bi_bdev == NULL, details were not saved */
        struct dm_bio_details details;
        region_t write_region;
 };
@@ -490,9 +491,9 @@ static void hold_bio(struct mirror_set *ms, struct bio *bio)
                 * If device is suspended, complete the bio.
                 */
                if (dm_noflush_suspending(ms->ti))
-                       bio->bi_error = DM_ENDIO_REQUEUE;
+                       bio->bi_status = BLK_STS_DM_REQUEUE;
                else
-                       bio->bi_error = -EIO;
+                       bio->bi_status = BLK_STS_IOERR;
 
                bio_endio(bio);
                return;
@@ -626,7 +627,7 @@ static void write_callback(unsigned long error, void *context)
         * degrade the array.
         */
        if (bio_op(bio) == REQ_OP_DISCARD) {
-               bio->bi_error = -EOPNOTSUPP;
+               bio->bi_status = BLK_STS_NOTSUPP;
                bio_endio(bio);
                return;
        }
@@ -1198,6 +1199,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
        struct dm_raid1_bio_record *bio_record =
          dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
 
+       bio_record->details.bi_bdev = NULL;
+
        if (rw == WRITE) {
                /* Save region for mirror_end_io() handler */
                bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio);
@@ -1207,14 +1210,14 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
 
        r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0);
        if (r < 0 && r != -EWOULDBLOCK)
-               return r;
+               return DM_MAPIO_KILL;
 
        /*
         * If region is not in-sync queue the bio.
         */
        if (!r || (r == -EWOULDBLOCK)) {
                if (bio->bi_opf & REQ_RAHEAD)
-                       return -EWOULDBLOCK;
+                       return DM_MAPIO_KILL;
 
                queue_bio(ms, bio, rw);
                return DM_MAPIO_SUBMITTED;
@@ -1226,7 +1229,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
         */
        m = choose_mirror(ms, bio->bi_iter.bi_sector);
        if (unlikely(!m))
-               return -EIO;
+               return DM_MAPIO_KILL;
 
        dm_bio_record(&bio_record->details, bio);
        bio_record->m = m;
@@ -1236,7 +1239,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
        return DM_MAPIO_REMAPPED;
 }
 
-static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int mirror_end_io(struct dm_target *ti, struct bio *bio,
+               blk_status_t *error)
 {
        int rw = bio_data_dir(bio);
        struct mirror_set *ms = (struct mirror_set *) ti->private;
@@ -1252,16 +1256,26 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
                if (!(bio->bi_opf & REQ_PREFLUSH) &&
                    bio_op(bio) != REQ_OP_DISCARD)
                        dm_rh_dec(ms->rh, bio_record->write_region);
-               return error;
+               return DM_ENDIO_DONE;
        }
 
-       if (error == -EOPNOTSUPP)
-               return error;
+       if (*error == BLK_STS_NOTSUPP)
+               goto out;
+
+       if (bio->bi_opf & REQ_RAHEAD)
+               goto out;
 
-       if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
-               return error;
+       if (unlikely(*error)) {
+               if (!bio_record->details.bi_bdev) {
+                       /*
+                        * There wasn't enough memory to record necessary
+                        * information for a retry or there was no other
+                        * mirror in-sync.
+                        */
+                       DMERR_LIMIT("Mirror read failed.");
+                       return DM_ENDIO_DONE;
+               }
 
-       if (unlikely(error)) {
                m = bio_record->m;
 
                DMERR("Mirror read failed from %s. Trying alternative device.",
@@ -1277,7 +1291,8 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
                        bd = &bio_record->details;
 
                        dm_bio_restore(bd, bio);
-                       bio->bi_error = 0;
+                       bio_record->details.bi_bdev = NULL;
+                       bio->bi_status = 0;
 
                        queue_bio(ms, bio, rw);
                        return DM_ENDIO_INCOMPLETE;
@@ -1285,7 +1300,10 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
                DMERR("All replicated volumes dead, failing I/O");
        }
 
-       return error;
+out:
+       bio_record->details.bi_bdev = NULL;
+
+       return DM_ENDIO_DONE;
 }
 
 static void mirror_presuspend(struct dm_target *ti)
index b639fa7..c6ebc5b 100644 (file)
@@ -71,7 +71,7 @@ static void dm_old_start_queue(struct request_queue *q)
 
 static void dm_mq_start_queue(struct request_queue *q)
 {
-       blk_mq_start_stopped_hw_queues(q, true);
+       blk_mq_unquiesce_queue(q);
        blk_mq_kick_requeue_list(q);
 }
 
@@ -119,7 +119,7 @@ static void end_clone_bio(struct bio *clone)
        struct dm_rq_target_io *tio = info->tio;
        struct bio *bio = info->orig;
        unsigned int nr_bytes = info->orig->bi_iter.bi_size;
-       int error = clone->bi_error;
+       blk_status_t error = clone->bi_status;
 
        bio_put(clone);
 
@@ -158,7 +158,7 @@ static void end_clone_bio(struct bio *clone)
         * Do not use blk_end_request() here, because it may complete
         * the original request before the clone, and break the ordering.
         */
-       blk_update_request(tio->orig, 0, nr_bytes);
+       blk_update_request(tio->orig, BLK_STS_OK, nr_bytes);
 }
 
 static struct dm_rq_target_io *tio_from_request(struct request *rq)
@@ -216,7 +216,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
  * Must be called without clone's queue lock held,
  * see end_clone_request() for more details.
  */
-static void dm_end_request(struct request *clone, int error)
+static void dm_end_request(struct request *clone, blk_status_t error)
 {
        int rw = rq_data_dir(clone);
        struct dm_rq_target_io *tio = clone->end_io_data;
@@ -285,7 +285,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
        rq_completed(md, rw, false);
 }
 
-static void dm_done(struct request *clone, int error, bool mapped)
+static void dm_done(struct request *clone, blk_status_t error, bool mapped)
 {
        int r = DM_ENDIO_DONE;
        struct dm_rq_target_io *tio = clone->end_io_data;
@@ -298,7 +298,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
                        r = rq_end_io(tio->ti, clone, error, &tio->info);
        }
 
-       if (unlikely(error == -EREMOTEIO)) {
+       if (unlikely(error == BLK_STS_TARGET)) {
                if (req_op(clone) == REQ_OP_WRITE_SAME &&
                    !clone->q->limits.max_write_same_sectors)
                        disable_write_same(tio->md);
@@ -358,7 +358,7 @@ static void dm_softirq_done(struct request *rq)
  * Complete the clone and the original request with the error status
  * through softirq context.
  */
-static void dm_complete_request(struct request *rq, int error)
+static void dm_complete_request(struct request *rq, blk_status_t error)
 {
        struct dm_rq_target_io *tio = tio_from_request(rq);
 
@@ -375,7 +375,7 @@ static void dm_complete_request(struct request *rq, int error)
  * Target's rq_end_io() function isn't called.
  * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
  */
-static void dm_kill_unmapped_request(struct request *rq, int error)
+static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
 {
        rq->rq_flags |= RQF_FAILED;
        dm_complete_request(rq, error);
@@ -384,7 +384,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
 /*
  * Called with the clone's queue lock held (in the case of .request_fn)
  */
-static void end_clone_request(struct request *clone, int error)
+static void end_clone_request(struct request *clone, blk_status_t error)
 {
        struct dm_rq_target_io *tio = clone->end_io_data;
 
@@ -401,7 +401,7 @@ static void end_clone_request(struct request *clone, int error)
 
 static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
 {
-       int r;
+       blk_status_t r;
 
        if (blk_queue_io_stat(clone->q))
                clone->rq_flags |= RQF_IO_STAT;
@@ -506,7 +506,7 @@ static int map_request(struct dm_rq_target_io *tio)
                break;
        case DM_MAPIO_KILL:
                /* The target wants to complete the I/O */
-               dm_kill_unmapped_request(rq, -EIO);
+               dm_kill_unmapped_request(rq, BLK_STS_IOERR);
                break;
        default:
                DMWARN("unimplemented target map return value: %d", r);
@@ -727,7 +727,7 @@ static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
        return __dm_rq_init_rq(set->driver_data, rq);
 }
 
-static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
                          const struct blk_mq_queue_data *bd)
 {
        struct request *rq = bd->rq;
@@ -744,7 +744,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
        }
 
        if (ti->type->busy && ti->type->busy(ti))
-               return BLK_MQ_RQ_QUEUE_BUSY;
+               return BLK_STS_RESOURCE;
 
        dm_start_request(md, rq);
 
@@ -762,10 +762,10 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
                rq_end_stats(md, rq);
                rq_completed(md, rq_data_dir(rq), false);
                blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
-               return BLK_MQ_RQ_QUEUE_BUSY;
+               return BLK_STS_RESOURCE;
        }
 
-       return BLK_MQ_RQ_QUEUE_OK;
+       return BLK_STS_OK;
 }
 
 static const struct blk_mq_ops dm_mq_ops = {
index f0020d2..9813922 100644 (file)
@@ -24,7 +24,7 @@ struct dm_rq_target_io {
        struct dm_target *ti;
        struct request *orig, *clone;
        struct kthread_work work;
-       int error;
+       blk_status_t error;
        union map_info info;
        struct dm_stats_aux stats_aux;
        unsigned long duration_jiffies;
index e152d98..1ba4104 100644 (file)
@@ -1590,7 +1590,7 @@ static void full_bio_end_io(struct bio *bio)
 {
        void *callback_data = bio->bi_private;
 
-       dm_kcopyd_do_callback(callback_data, 0, bio->bi_error ? 1 : 0);
+       dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0);
 }
 
 static void start_full_bio(struct dm_snap_pending_exception *pe,
@@ -1690,7 +1690,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
        /* Full snapshots are not usable */
        /* To get here the table must be live so s->active is always set. */
        if (!s->valid)
-               return -EIO;
+               return DM_MAPIO_KILL;
 
        /* FIXME: should only take write lock if we need
         * to copy an exception */
@@ -1698,7 +1698,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
 
        if (!s->valid || (unlikely(s->snapshot_overflowed) &&
            bio_data_dir(bio) == WRITE)) {
-               r = -EIO;
+               r = DM_MAPIO_KILL;
                goto out_unlock;
        }
 
@@ -1723,7 +1723,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
 
                        if (!s->valid || s->snapshot_overflowed) {
                                free_pending_exception(pe);
-                               r = -EIO;
+                               r = DM_MAPIO_KILL;
                                goto out_unlock;
                        }
 
@@ -1741,7 +1741,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
                                        DMERR("Snapshot overflowed: Unable to allocate exception.");
                                } else
                                        __invalidate_snapshot(s, -ENOMEM);
-                               r = -EIO;
+                               r = DM_MAPIO_KILL;
                                goto out_unlock;
                        }
                }
@@ -1851,14 +1851,15 @@ out_unlock:
        return r;
 }
 
-static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
+               blk_status_t *error)
 {
        struct dm_snapshot *s = ti->private;
 
        if (is_bio_tracked(bio))
                stop_tracking_chunk(s, bio);
 
-       return 0;
+       return DM_ENDIO_DONE;
 }
 
 static void snapshot_merge_presuspend(struct dm_target *ti)
index 7515248..11621a0 100644 (file)
@@ -375,20 +375,21 @@ static void stripe_status(struct dm_target *ti, status_type_t type,
        }
 }
 
-static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int stripe_end_io(struct dm_target *ti, struct bio *bio,
+               blk_status_t *error)
 {
        unsigned i;
        char major_minor[16];
        struct stripe_c *sc = ti->private;
 
-       if (!error)
-               return 0; /* I/O complete */
+       if (!*error)
+               return DM_ENDIO_DONE; /* I/O complete */
 
-       if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
-               return error;
+       if (bio->bi_opf & REQ_RAHEAD)
+               return DM_ENDIO_DONE;
 
-       if (error == -EOPNOTSUPP)
-               return error;
+       if (*error == BLK_STS_NOTSUPP)
+               return DM_ENDIO_DONE;
 
        memset(major_minor, 0, sizeof(major_minor));
        sprintf(major_minor, "%d:%d",
@@ -409,7 +410,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
                                schedule_work(&sc->trigger_event);
                }
 
-       return error;
+       return DM_ENDIO_DONE;
 }
 
 static int stripe_iterate_devices(struct dm_target *ti,
index b242b75..c0d7e60 100644 (file)
@@ -128,7 +128,7 @@ static void io_err_dtr(struct dm_target *tt)
 
 static int io_err_map(struct dm_target *tt, struct bio *bio)
 {
-       return -EIO;
+       return DM_MAPIO_KILL;
 }
 
 static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,
index 17ad50d..9dec2f8 100644 (file)
@@ -383,8 +383,8 @@ static void end_discard(struct discard_op *op, int r)
         * Even if r is set, there could be sub discards in flight that we
         * need to wait for.
         */
-       if (r && !op->parent_bio->bi_error)
-               op->parent_bio->bi_error = r;
+       if (r && !op->parent_bio->bi_status)
+               op->parent_bio->bi_status = errno_to_blk_status(r);
        bio_endio(op->parent_bio);
 }
 
@@ -450,22 +450,20 @@ static void cell_release_no_holder(struct pool *pool,
 }
 
 static void cell_error_with_code(struct pool *pool,
-                                struct dm_bio_prison_cell *cell, int error_code)
+               struct dm_bio_prison_cell *cell, blk_status_t error_code)
 {
        dm_cell_error(pool->prison, cell, error_code);
        dm_bio_prison_free_cell(pool->prison, cell);
 }
 
-static int get_pool_io_error_code(struct pool *pool)
+static blk_status_t get_pool_io_error_code(struct pool *pool)
 {
-       return pool->out_of_data_space ? -ENOSPC : -EIO;
+       return pool->out_of_data_space ? BLK_STS_NOSPC : BLK_STS_IOERR;
 }
 
 static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
 {
-       int error = get_pool_io_error_code(pool);
-
-       cell_error_with_code(pool, cell, error);
+       cell_error_with_code(pool, cell, get_pool_io_error_code(pool));
 }
 
 static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
@@ -475,7 +473,7 @@ static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
 
 static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell)
 {
-       cell_error_with_code(pool, cell, DM_ENDIO_REQUEUE);
+       cell_error_with_code(pool, cell, BLK_STS_DM_REQUEUE);
 }
 
 /*----------------------------------------------------------------*/
@@ -555,17 +553,18 @@ static void __merge_bio_list(struct bio_list *bios, struct bio_list *master)
        bio_list_init(master);
 }
 
-static void error_bio_list(struct bio_list *bios, int error)
+static void error_bio_list(struct bio_list *bios, blk_status_t error)
 {
        struct bio *bio;
 
        while ((bio = bio_list_pop(bios))) {
-               bio->bi_error = error;
+               bio->bi_status = error;
                bio_endio(bio);
        }
 }
 
-static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error)
+static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
+               blk_status_t error)
 {
        struct bio_list bios;
        unsigned long flags;
@@ -608,11 +607,11 @@ static void requeue_io(struct thin_c *tc)
        __merge_bio_list(&bios, &tc->retry_on_resume_list);
        spin_unlock_irqrestore(&tc->lock, flags);
 
-       error_bio_list(&bios, DM_ENDIO_REQUEUE);
+       error_bio_list(&bios, BLK_STS_DM_REQUEUE);
        requeue_deferred_cells(tc);
 }
 
-static void error_retry_list_with_code(struct pool *pool, int error)
+static void error_retry_list_with_code(struct pool *pool, blk_status_t error)
 {
        struct thin_c *tc;
 
@@ -624,9 +623,7 @@ static void error_retry_list_with_code(struct pool *pool, int error)
 
 static void error_retry_list(struct pool *pool)
 {
-       int error = get_pool_io_error_code(pool);
-
-       error_retry_list_with_code(pool, error);
+       error_retry_list_with_code(pool, get_pool_io_error_code(pool));
 }
 
 /*
@@ -774,7 +771,7 @@ struct dm_thin_new_mapping {
         */
        atomic_t prepare_actions;
 
-       int err;
+       blk_status_t status;
        struct thin_c *tc;
        dm_block_t virt_begin, virt_end;
        dm_block_t data_block;
@@ -814,7 +811,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
 {
        struct dm_thin_new_mapping *m = context;
 
-       m->err = read_err || write_err ? -EIO : 0;
+       m->status = read_err || write_err ? BLK_STS_IOERR : 0;
        complete_mapping_preparation(m);
 }
 
@@ -825,7 +822,7 @@ static void overwrite_endio(struct bio *bio)
 
        bio->bi_end_io = m->saved_bi_end_io;
 
-       m->err = bio->bi_error;
+       m->status = bio->bi_status;
        complete_mapping_preparation(m);
 }
 
@@ -925,7 +922,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
        struct bio *bio = m->bio;
        int r;
 
-       if (m->err) {
+       if (m->status) {
                cell_error(pool, m->cell);
                goto out;
        }
@@ -1094,6 +1091,19 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
                return;
        }
 
+       /*
+        * Increment the unmapped blocks.  This prevents a race between the
+        * passdown io and reallocation of freed blocks.
+        */
+       r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
+       if (r) {
+               metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
+               bio_io_error(m->bio);
+               cell_defer_no_holder(tc, m->cell);
+               mempool_free(m, pool->mapping_pool);
+               return;
+       }
+
        discard_parent = bio_alloc(GFP_NOIO, 1);
        if (!discard_parent) {
                DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.",
@@ -1114,19 +1124,6 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
                        end_discard(&op, r);
                }
        }
-
-       /*
-        * Increment the unmapped blocks.  This prevents a race between the
-        * passdown io and reallocation of freed blocks.
-        */
-       r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
-       if (r) {
-               metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
-               bio_io_error(m->bio);
-               cell_defer_no_holder(tc, m->cell);
-               mempool_free(m, pool->mapping_pool);
-               return;
-       }
 }
 
 static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
@@ -1495,7 +1492,7 @@ static void retry_on_resume(struct bio *bio)
        spin_unlock_irqrestore(&tc->lock, flags);
 }
 
-static int should_error_unserviceable_bio(struct pool *pool)
+static blk_status_t should_error_unserviceable_bio(struct pool *pool)
 {
        enum pool_mode m = get_pool_mode(pool);
 
@@ -1503,27 +1500,27 @@ static int should_error_unserviceable_bio(struct pool *pool)
        case PM_WRITE:
                /* Shouldn't get here */
                DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
-               return -EIO;
+               return BLK_STS_IOERR;
 
        case PM_OUT_OF_DATA_SPACE:
-               return pool->pf.error_if_no_space ? -ENOSPC : 0;
+               return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
 
        case PM_READ_ONLY:
        case PM_FAIL:
-               return -EIO;
+               return BLK_STS_IOERR;
        default:
                /* Shouldn't get here */
                DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
-               return -EIO;
+               return BLK_STS_IOERR;
        }
 }
 
 static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
 {
-       int error = should_error_unserviceable_bio(pool);
+       blk_status_t error = should_error_unserviceable_bio(pool);
 
        if (error) {
-               bio->bi_error = error;
+               bio->bi_status = error;
                bio_endio(bio);
        } else
                retry_on_resume(bio);
@@ -1533,7 +1530,7 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c
 {
        struct bio *bio;
        struct bio_list bios;
-       int error;
+       blk_status_t error;
 
        error = should_error_unserviceable_bio(pool);
        if (error) {
@@ -2071,7 +2068,8 @@ static void process_thin_deferred_bios(struct thin_c *tc)
        unsigned count = 0;
 
        if (tc->requeue_mode) {
-               error_thin_bio_list(tc, &tc->deferred_bio_list, DM_ENDIO_REQUEUE);
+               error_thin_bio_list(tc, &tc->deferred_bio_list,
+                               BLK_STS_DM_REQUEUE);
                return;
        }
 
@@ -2322,7 +2320,7 @@ static void do_no_space_timeout(struct work_struct *ws)
        if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
                pool->pf.error_if_no_space = true;
                notify_of_pool_mode_change_to_oods(pool);
-               error_retry_list_with_code(pool, -ENOSPC);
+               error_retry_list_with_code(pool, BLK_STS_NOSPC);
        }
 }
 
@@ -2624,7 +2622,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
        thin_hook_bio(tc, bio);
 
        if (tc->requeue_mode) {
-               bio->bi_error = DM_ENDIO_REQUEUE;
+               bio->bi_status = BLK_STS_DM_REQUEUE;
                bio_endio(bio);
                return DM_MAPIO_SUBMITTED;
        }
@@ -4177,7 +4175,8 @@ static int thin_map(struct dm_target *ti, struct bio *bio)
        return thin_bio_map(ti, bio);
 }
 
-static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
+static int thin_endio(struct dm_target *ti, struct bio *bio,
+               blk_status_t *err)
 {
        unsigned long flags;
        struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
@@ -4212,7 +4211,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
        if (h->cell)
                cell_defer_no_holder(h->tc, h->cell);
 
-       return 0;
+       return DM_ENDIO_DONE;
 }
 
 static void thin_presuspend(struct dm_target *ti)
index 1ec9b2c..b46705e 100644 (file)
@@ -538,13 +538,13 @@ static int verity_verify_io(struct dm_verity_io *io)
 /*
  * End one "io" structure with a given error.
  */
-static void verity_finish_io(struct dm_verity_io *io, int error)
+static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
 {
        struct dm_verity *v = io->v;
        struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
 
        bio->bi_end_io = io->orig_bi_end_io;
-       bio->bi_error = error;
+       bio->bi_status = status;
 
        verity_fec_finish_io(io);
 
@@ -555,15 +555,15 @@ static void verity_work(struct work_struct *w)
 {
        struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
 
-       verity_finish_io(io, verity_verify_io(io));
+       verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
 }
 
 static void verity_end_io(struct bio *bio)
 {
        struct dm_verity_io *io = bio->bi_private;
 
-       if (bio->bi_error && !verity_fec_is_enabled(io->v)) {
-               verity_finish_io(io, bio->bi_error);
+       if (bio->bi_status && !verity_fec_is_enabled(io->v)) {
+               verity_finish_io(io, bio->bi_status);
                return;
        }
 
@@ -643,17 +643,17 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
        if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
            ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
                DMERR_LIMIT("unaligned io");
-               return -EIO;
+               return DM_MAPIO_KILL;
        }
 
        if (bio_end_sector(bio) >>
            (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
                DMERR_LIMIT("io out of range");
-               return -EIO;
+               return DM_MAPIO_KILL;
        }
 
        if (bio_data_dir(bio) == WRITE)
-               return -EIO;
+               return DM_MAPIO_KILL;
 
        io = dm_per_bio_data(bio, ti->per_io_data_size);
        io->v = v;
index b616f11..b65ca8d 100644 (file)
@@ -39,7 +39,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
        case REQ_OP_READ:
                if (bio->bi_opf & REQ_RAHEAD) {
                        /* readahead of null bytes only wastes buffer cache */
-                       return -EIO;
+                       return DM_MAPIO_KILL;
                }
                zero_fill_bio(bio);
                break;
@@ -47,7 +47,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
                /* writes get silently dropped */
                break;
        default:
-               return -EIO;
+               return DM_MAPIO_KILL;
        }
 
        bio_endio(bio);
index 37ccd73..4029460 100644 (file)
@@ -63,7 +63,7 @@ static struct workqueue_struct *deferred_remove_workqueue;
  */
 struct dm_io {
        struct mapped_device *md;
-       int error;
+       blk_status_t status;
        atomic_t io_count;
        struct bio *bio;
        unsigned long start_time;
@@ -768,23 +768,24 @@ static int __noflush_suspending(struct mapped_device *md)
  * Decrements the number of outstanding ios that a bio has been
  * cloned into, completing the original io if necc.
  */
-static void dec_pending(struct dm_io *io, int error)
+static void dec_pending(struct dm_io *io, blk_status_t error)
 {
        unsigned long flags;
-       int io_error;
+       blk_status_t io_error;
        struct bio *bio;
        struct mapped_device *md = io->md;
 
        /* Push-back supersedes any I/O errors */
        if (unlikely(error)) {
                spin_lock_irqsave(&io->endio_lock, flags);
-               if (!(io->error > 0 && __noflush_suspending(md)))
-                       io->error = error;
+               if (!(io->status == BLK_STS_DM_REQUEUE &&
+                               __noflush_suspending(md)))
+                       io->status = error;
                spin_unlock_irqrestore(&io->endio_lock, flags);
        }
 
        if (atomic_dec_and_test(&io->io_count)) {
-               if (io->error == DM_ENDIO_REQUEUE) {
+               if (io->status == BLK_STS_DM_REQUEUE) {
                        /*
                         * Target requested pushing back the I/O.
                         */
@@ -793,16 +794,16 @@ static void dec_pending(struct dm_io *io, int error)
                                bio_list_add_head(&md->deferred, io->bio);
                        else
                                /* noflush suspend was interrupted. */
-                               io->error = -EIO;
+                               io->status = BLK_STS_IOERR;
                        spin_unlock_irqrestore(&md->deferred_lock, flags);
                }
 
-               io_error = io->error;
+               io_error = io->status;
                bio = io->bio;
                end_io_acct(io);
                free_io(md, io);
 
-               if (io_error == DM_ENDIO_REQUEUE)
+               if (io_error == BLK_STS_DM_REQUEUE)
                        return;
 
                if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
@@ -814,7 +815,7 @@ static void dec_pending(struct dm_io *io, int error)
                        queue_io(md, bio);
                } else {
                        /* done with normal IO or empty flush */
-                       bio->bi_error = io_error;
+                       bio->bi_status = io_error;
                        bio_endio(bio);
                }
        }
@@ -838,31 +839,13 @@ void disable_write_zeroes(struct mapped_device *md)
 
 static void clone_endio(struct bio *bio)
 {
-       int error = bio->bi_error;
-       int r = error;
+       blk_status_t error = bio->bi_status;
        struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
        struct dm_io *io = tio->io;
        struct mapped_device *md = tio->io->md;
        dm_endio_fn endio = tio->ti->type->end_io;
 
-       if (endio) {
-               r = endio(tio->ti, bio, error);
-               if (r < 0 || r == DM_ENDIO_REQUEUE)
-                       /*
-                        * error and requeue request are handled
-                        * in dec_pending().
-                        */
-                       error = r;
-               else if (r == DM_ENDIO_INCOMPLETE)
-                       /* The target will handle the io */
-                       return;
-               else if (r) {
-                       DMWARN("unimplemented target endio return value: %d", r);
-                       BUG();
-               }
-       }
-
-       if (unlikely(r == -EREMOTEIO)) {
+       if (unlikely(error == BLK_STS_TARGET)) {
                if (bio_op(bio) == REQ_OP_WRITE_SAME &&
                    !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
                        disable_write_same(md);
@@ -871,6 +854,23 @@ static void clone_endio(struct bio *bio)
                        disable_write_zeroes(md);
        }
 
+       if (endio) {
+               int r = endio(tio->ti, bio, &error);
+               switch (r) {
+               case DM_ENDIO_REQUEUE:
+                       error = BLK_STS_DM_REQUEUE;
+                       /*FALLTHRU*/
+               case DM_ENDIO_DONE:
+                       break;
+               case DM_ENDIO_INCOMPLETE:
+                       /* The target will handle the io */
+                       return;
+               default:
+                       DMWARN("unimplemented target endio return value: %d", r);
+                       BUG();
+               }
+       }
+
        free_tio(tio);
        dec_pending(io, error);
 }
@@ -1036,7 +1036,8 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
 
                while ((bio = bio_list_pop(&list))) {
                        struct bio_set *bs = bio->bi_pool;
-                       if (unlikely(!bs) || bs == fs_bio_set) {
+                       if (unlikely(!bs) || bs == fs_bio_set ||
+                           !bs->rescue_workqueue) {
                                bio_list_add(&current->bio_list[i], bio);
                                continue;
                        }
@@ -1084,18 +1085,24 @@ static void __map_bio(struct dm_target_io *tio)
        r = ti->type->map(ti, clone);
        dm_offload_end(&o);
 
-       if (r == DM_MAPIO_REMAPPED) {
+       switch (r) {
+       case DM_MAPIO_SUBMITTED:
+               break;
+       case DM_MAPIO_REMAPPED:
                /* the bio has been remapped so dispatch it */
-
                trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
                                      tio->io->bio->bi_bdev->bd_dev, sector);
-
                generic_make_request(clone);
-       } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
-               /* error the io and bail out, or requeue it if needed */
-               dec_pending(tio->io, r);
+               break;
+       case DM_MAPIO_KILL:
+               dec_pending(tio->io, BLK_STS_IOERR);
+               free_tio(tio);
+               break;
+       case DM_MAPIO_REQUEUE:
+               dec_pending(tio->io, BLK_STS_DM_REQUEUE);
                free_tio(tio);
-       } else if (r != DM_MAPIO_SUBMITTED) {
+               break;
+       default:
                DMWARN("unimplemented target map return value: %d", r);
                BUG();
        }
@@ -1360,7 +1367,7 @@ static void __split_and_process_bio(struct mapped_device *md,
        ci.map = map;
        ci.md = md;
        ci.io = alloc_io(md);
-       ci.io->error = 0;
+       ci.io->status = 0;
        atomic_set(&ci.io->io_count, 1);
        ci.io->bio = bio;
        ci.io->md = md;
@@ -1527,7 +1534,6 @@ void dm_init_normal_md_queue(struct mapped_device *md)
         * Initialize aspects of queue that aren't relevant for blk-mq
         */
        md->queue->backing_dev_info->congested_fn = dm_any_congested;
-       blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
 }
 
 static void cleanup_mapped_device(struct mapped_device *md)
@@ -2654,7 +2660,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
                BUG();
        }
 
-       pools->bs = bioset_create_nobvec(pool_size, front_pad);
+       pools->bs = bioset_create(pool_size, front_pad, BIOSET_NEED_RESCUER);
        if (!pools->bs)
                goto out;
 
index 87edc34..31bcbfb 100644 (file)
@@ -185,7 +185,7 @@ static int start_readonly;
 static bool create_on_open = true;
 
 /* bio_clone_mddev
- * like bio_clone, but with a local bio set
+ * like bio_clone_bioset, but with a local bio set
  */
 
 struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
@@ -265,7 +265,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
        unsigned int sectors;
        int cpu;
 
-       blk_queue_split(q, &bio, q->bio_split);
+       blk_queue_split(q, &bio);
 
        if (mddev == NULL || mddev->pers == NULL) {
                bio_io_error(bio);
@@ -273,7 +273,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
        }
        if (mddev->ro == 1 && unlikely(rw == WRITE)) {
                if (bio_sectors(bio) != 0)
-                       bio->bi_error = -EROFS;
+                       bio->bi_status = BLK_STS_IOERR;
                bio_endio(bio);
                return BLK_QC_T_NONE;
        }
@@ -719,8 +719,8 @@ static void super_written(struct bio *bio)
        struct md_rdev *rdev = bio->bi_private;
        struct mddev *mddev = rdev->mddev;
 
-       if (bio->bi_error) {
-               pr_err("md: super_written gets error=%d\n", bio->bi_error);
+       if (bio->bi_status) {
+               pr_err("md: super_written gets error=%d\n", bio->bi_status);
                md_error(mddev, rdev);
                if (!test_bit(Faulty, &rdev->flags)
                    && (bio->bi_opf & MD_FAILFAST)) {
@@ -801,7 +801,7 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
 
        submit_bio_wait(bio);
 
-       ret = !bio->bi_error;
+       ret = !bio->bi_status;
        bio_put(bio);
        return ret;
 }
@@ -825,7 +825,7 @@ fail:
        return -EINVAL;
 }
 
-static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
+static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
 {
        return  sb1->set_uuid0 == sb2->set_uuid0 &&
                sb1->set_uuid1 == sb2->set_uuid1 &&
@@ -833,7 +833,7 @@ static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
                sb1->set_uuid3 == sb2->set_uuid3;
 }
 
-static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
+static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
 {
        int ret;
        mdp_super_t *tmp1, *tmp2;
@@ -1025,12 +1025,12 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
        } else {
                __u64 ev1, ev2;
                mdp_super_t *refsb = page_address(refdev->sb_page);
-               if (!uuid_equal(refsb, sb)) {
+               if (!md_uuid_equal(refsb, sb)) {
                        pr_warn("md: %s has different UUID to %s\n",
                                b, bdevname(refdev->bdev,b2));
                        goto abort;
                }
-               if (!sb_equal(refsb, sb)) {
+               if (!md_sb_equal(refsb, sb)) {
                        pr_warn("md: %s has same UUID but different superblock to %s\n",
                                b, bdevname(refdev->bdev, b2));
                        goto abort;
@@ -5428,7 +5428,7 @@ int md_run(struct mddev *mddev)
        }
 
        if (mddev->bio_set == NULL) {
-               mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
+               mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
                if (!mddev->bio_set)
                        return -ENOMEM;
        }
index e95d521..68d036e 100644 (file)
@@ -73,12 +73,12 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
  * operation and are ready to return a success/failure code to the buffer
  * cache layer.
  */
-static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err)
+static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
 {
        struct bio *bio = mp_bh->master_bio;
        struct mpconf *conf = mp_bh->mddev->private;
 
-       bio->bi_error = err;
+       bio->bi_status = status;
        bio_endio(bio);
        mempool_free(mp_bh, conf->pool);
 }
@@ -89,7 +89,7 @@ static void multipath_end_request(struct bio *bio)
        struct mpconf *conf = mp_bh->mddev->private;
        struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
 
-       if (!bio->bi_error)
+       if (!bio->bi_status)
                multipath_end_bh_io(mp_bh, 0);
        else if (!(bio->bi_opf & REQ_RAHEAD)) {
                /*
@@ -102,7 +102,7 @@ static void multipath_end_request(struct bio *bio)
                        (unsigned long long)bio->bi_iter.bi_sector);
                multipath_reschedule_retry(mp_bh);
        } else
-               multipath_end_bh_io(mp_bh, bio->bi_error);
+               multipath_end_bh_io(mp_bh, bio->bi_status);
        rdev_dec_pending(rdev, conf->mddev);
 }
 
@@ -347,7 +347,7 @@ static void multipathd(struct md_thread *thread)
                        pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
                               bdevname(bio->bi_bdev,b),
                               (unsigned long long)bio->bi_iter.bi_sector);
-                       multipath_end_bh_io(mp_bh, -EIO);
+                       multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
                } else {
                        pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
                               bdevname(bio->bi_bdev,b),
index e1a7e3d..98ca2c1 100644 (file)
@@ -277,7 +277,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
        struct r1conf *conf = r1_bio->mddev->private;
 
        if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
-               bio->bi_error = -EIO;
+               bio->bi_status = BLK_STS_IOERR;
 
        bio_endio(bio);
        /*
@@ -335,7 +335,7 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
 
 static void raid1_end_read_request(struct bio *bio)
 {
-       int uptodate = !bio->bi_error;
+       int uptodate = !bio->bi_status;
        struct r1bio *r1_bio = bio->bi_private;
        struct r1conf *conf = r1_bio->mddev->private;
        struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
@@ -426,12 +426,12 @@ static void raid1_end_write_request(struct bio *bio)
        struct md_rdev *rdev = conf->mirrors[mirror].rdev;
        bool discard_error;
 
-       discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+       discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
 
        /*
         * 'one mirror IO has finished' event handler:
         */
-       if (bio->bi_error && !discard_error) {
+       if (bio->bi_status && !discard_error) {
                set_bit(WriteErrorSeen, &rdev->flags);
                if (!test_and_set_bit(WantReplacement, &rdev->flags))
                        set_bit(MD_RECOVERY_NEEDED, &
@@ -802,7 +802,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
                bio->bi_next = NULL;
                bio->bi_bdev = rdev->bdev;
                if (test_bit(Faulty, &rdev->flags)) {
-                       bio->bi_error = -EIO;
+                       bio->bi_status = BLK_STS_IOERR;
                        bio_endio(bio);
                } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
                                    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1856,7 +1856,7 @@ static void end_sync_read(struct bio *bio)
         * or re-read if the read failed.
         * We don't do much here, just schedule handling by raid1d
         */
-       if (!bio->bi_error)
+       if (!bio->bi_status)
                set_bit(R1BIO_Uptodate, &r1_bio->state);
 
        if (atomic_dec_and_test(&r1_bio->remaining))
@@ -1865,7 +1865,7 @@ static void end_sync_read(struct bio *bio)
 
 static void end_sync_write(struct bio *bio)
 {
-       int uptodate = !bio->bi_error;
+       int uptodate = !bio->bi_status;
        struct r1bio *r1_bio = get_resync_r1bio(bio);
        struct mddev *mddev = r1_bio->mddev;
        struct r1conf *conf = mddev->private;
@@ -2058,7 +2058,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
                idx ++;
        }
        set_bit(R1BIO_Uptodate, &r1_bio->state);
-       bio->bi_error = 0;
+       bio->bi_status = 0;
        return 1;
 }
 
@@ -2082,16 +2082,16 @@ static void process_checks(struct r1bio *r1_bio)
        for (i = 0; i < conf->raid_disks * 2; i++) {
                int j;
                int size;
-               int error;
+               blk_status_t status;
                struct bio_vec *bi;
                struct bio *b = r1_bio->bios[i];
                struct resync_pages *rp = get_resync_pages(b);
                if (b->bi_end_io != end_sync_read)
                        continue;
                /* fixup the bio for reuse, but preserve errno */
-               error = b->bi_error;
+               status = b->bi_status;
                bio_reset(b);
-               b->bi_error = error;
+               b->bi_status = status;
                b->bi_vcnt = vcnt;
                b->bi_iter.bi_size = r1_bio->sectors << 9;
                b->bi_iter.bi_sector = r1_bio->sector +
@@ -2113,7 +2113,7 @@ static void process_checks(struct r1bio *r1_bio)
        }
        for (primary = 0; primary < conf->raid_disks * 2; primary++)
                if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
-                   !r1_bio->bios[primary]->bi_error) {
+                   !r1_bio->bios[primary]->bi_status) {
                        r1_bio->bios[primary]->bi_end_io = NULL;
                        rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
                        break;
@@ -2123,7 +2123,7 @@ static void process_checks(struct r1bio *r1_bio)
                int j;
                struct bio *pbio = r1_bio->bios[primary];
                struct bio *sbio = r1_bio->bios[i];
-               int error = sbio->bi_error;
+               blk_status_t status = sbio->bi_status;
                struct page **ppages = get_resync_pages(pbio)->pages;
                struct page **spages = get_resync_pages(sbio)->pages;
                struct bio_vec *bi;
@@ -2132,12 +2132,12 @@ static void process_checks(struct r1bio *r1_bio)
                if (sbio->bi_end_io != end_sync_read)
                        continue;
                /* Now we can 'fixup' the error value */
-               sbio->bi_error = 0;
+               sbio->bi_status = 0;
 
                bio_for_each_segment_all(bi, sbio, j)
                        page_len[j] = bi->bv_len;
 
-               if (!error) {
+               if (!status) {
                        for (j = vcnt; j-- ; ) {
                                if (memcmp(page_address(ppages[j]),
                                           page_address(spages[j]),
@@ -2149,7 +2149,7 @@ static void process_checks(struct r1bio *r1_bio)
                if (j >= 0)
                        atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
                if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
-                             && !error)) {
+                             && !status)) {
                        /* No need to write to this device. */
                        sbio->bi_end_io = NULL;
                        rdev_dec_pending(conf->mirrors[i].rdev, mddev);
@@ -2400,11 +2400,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
                struct bio *bio = r1_bio->bios[m];
                if (bio->bi_end_io == NULL)
                        continue;
-               if (!bio->bi_error &&
+               if (!bio->bi_status &&
                    test_bit(R1BIO_MadeGood, &r1_bio->state)) {
                        rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
                }
-               if (bio->bi_error &&
+               if (bio->bi_status &&
                    test_bit(R1BIO_WriteError, &r1_bio->state)) {
                        if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
                                md_error(conf->mddev, rdev);
@@ -2955,7 +2955,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
        if (!conf->r1bio_pool)
                goto abort;
 
-       conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+       conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
        if (!conf->bio_split)
                goto abort;
 
index 797ed60..57a250f 100644 (file)
@@ -336,7 +336,7 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
        struct r10conf *conf = r10_bio->mddev->private;
 
        if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
-               bio->bi_error = -EIO;
+               bio->bi_status = BLK_STS_IOERR;
 
        bio_endio(bio);
        /*
@@ -389,7 +389,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
 
 static void raid10_end_read_request(struct bio *bio)
 {
-       int uptodate = !bio->bi_error;
+       int uptodate = !bio->bi_status;
        struct r10bio *r10_bio = bio->bi_private;
        int slot, dev;
        struct md_rdev *rdev;
@@ -477,7 +477,7 @@ static void raid10_end_write_request(struct bio *bio)
        struct bio *to_put = NULL;
        bool discard_error;
 
-       discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+       discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
 
        dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
 
@@ -491,7 +491,7 @@ static void raid10_end_write_request(struct bio *bio)
        /*
         * this branch is our 'one mirror IO has finished' event handler:
         */
-       if (bio->bi_error && !discard_error) {
+       if (bio->bi_status && !discard_error) {
                if (repl)
                        /* Never record new bad blocks to replacement,
                         * just fail it.
@@ -913,7 +913,7 @@ static void flush_pending_writes(struct r10conf *conf)
                        bio->bi_next = NULL;
                        bio->bi_bdev = rdev->bdev;
                        if (test_bit(Faulty, &rdev->flags)) {
-                               bio->bi_error = -EIO;
+                               bio->bi_status = BLK_STS_IOERR;
                                bio_endio(bio);
                        } else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
                                            !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1098,7 +1098,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
                bio->bi_next = NULL;
                bio->bi_bdev = rdev->bdev;
                if (test_bit(Faulty, &rdev->flags)) {
-                       bio->bi_error = -EIO;
+                       bio->bi_status = BLK_STS_IOERR;
                        bio_endio(bio);
                } else if (unlikely((bio_op(bio) ==  REQ_OP_DISCARD) &&
                                    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1888,7 +1888,7 @@ static void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d)
 {
        struct r10conf *conf = r10_bio->mddev->private;
 
-       if (!bio->bi_error)
+       if (!bio->bi_status)
                set_bit(R10BIO_Uptodate, &r10_bio->state);
        else
                /* The write handler will notice the lack of
@@ -1972,7 +1972,7 @@ static void end_sync_write(struct bio *bio)
        else
                rdev = conf->mirrors[d].rdev;
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                if (repl)
                        md_error(mddev, rdev);
                else {
@@ -2021,7 +2021,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 
        /* find the first device with a block */
        for (i=0; i<conf->copies; i++)
-               if (!r10_bio->devs[i].bio->bi_error)
+               if (!r10_bio->devs[i].bio->bi_status)
                        break;
 
        if (i == conf->copies)
@@ -2050,7 +2050,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
                tpages = get_resync_pages(tbio)->pages;
                d = r10_bio->devs[i].devnum;
                rdev = conf->mirrors[d].rdev;
-               if (!r10_bio->devs[i].bio->bi_error) {
+               if (!r10_bio->devs[i].bio->bi_status) {
                        /* We know that the bi_io_vec layout is the same for
                         * both 'first' and 'i', so we just compare them.
                         * All vec entries are PAGE_SIZE;
@@ -2633,7 +2633,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
                        rdev = conf->mirrors[dev].rdev;
                        if (r10_bio->devs[m].bio == NULL)
                                continue;
-                       if (!r10_bio->devs[m].bio->bi_error) {
+                       if (!r10_bio->devs[m].bio->bi_status) {
                                rdev_clear_badblocks(
                                        rdev,
                                        r10_bio->devs[m].addr,
@@ -2649,7 +2649,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
                        if (r10_bio->devs[m].repl_bio == NULL)
                                continue;
 
-                       if (!r10_bio->devs[m].repl_bio->bi_error) {
+                       if (!r10_bio->devs[m].repl_bio->bi_status) {
                                rdev_clear_badblocks(
                                        rdev,
                                        r10_bio->devs[m].addr,
@@ -2675,7 +2675,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
                                        r10_bio->devs[m].addr,
                                        r10_bio->sectors, 0);
                                rdev_dec_pending(rdev, conf->mddev);
-                       } else if (bio != NULL && bio->bi_error) {
+                       } else if (bio != NULL && bio->bi_status) {
                                fail = true;
                                if (!narrow_write_error(r10_bio, m)) {
                                        md_error(conf->mddev, rdev);
@@ -3267,7 +3267,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                                r10_bio->devs[i].repl_bio->bi_end_io = NULL;
 
                        bio = r10_bio->devs[i].bio;
-                       bio->bi_error = -EIO;
+                       bio->bi_status = BLK_STS_IOERR;
                        rcu_read_lock();
                        rdev = rcu_dereference(conf->mirrors[d].rdev);
                        if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
@@ -3309,7 +3309,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 
                        /* Need to set up for writing to the replacement */
                        bio = r10_bio->devs[i].repl_bio;
-                       bio->bi_error = -EIO;
+                       bio->bi_status = BLK_STS_IOERR;
 
                        sector = r10_bio->devs[i].addr;
                        bio->bi_next = biolist;
@@ -3375,7 +3375,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
 
                if (bio->bi_end_io == end_sync_read) {
                        md_sync_acct(bio->bi_bdev, nr_sectors);
-                       bio->bi_error = 0;
+                       bio->bi_status = 0;
                        generic_make_request(bio);
                }
        }
@@ -3552,7 +3552,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
        if (!conf->r10bio_pool)
                goto out;
 
-       conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+       conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
        if (!conf->bio_split)
                goto out;
 
@@ -4397,7 +4397,7 @@ read_more:
        read_bio->bi_end_io = end_reshape_read;
        bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
        read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
-       read_bio->bi_error = 0;
+       read_bio->bi_status = 0;
        read_bio->bi_vcnt = 0;
        read_bio->bi_iter.bi_size = 0;
        r10_bio->master_bio = read_bio;
@@ -4641,7 +4641,7 @@ static void end_reshape_write(struct bio *bio)
                rdev = conf->mirrors[d].rdev;
        }
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                /* FIXME should record badblock */
                md_error(mddev, rdev);
        }
index 0a7af8b..bfa1e90 100644 (file)
@@ -572,7 +572,7 @@ static void r5l_log_endio(struct bio *bio)
        struct r5l_log *log = io->log;
        unsigned long flags;
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                md_error(log->rdev->mddev, log->rdev);
 
        bio_put(bio);
@@ -1247,7 +1247,7 @@ static void r5l_log_flush_endio(struct bio *bio)
        unsigned long flags;
        struct r5l_io_unit *io;
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                md_error(log->rdev->mddev, log->rdev);
 
        spin_lock_irqsave(&log->io_list_lock, flags);
@@ -3063,7 +3063,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
        if (!log->io_pool)
                goto io_pool;
 
-       log->bs = bioset_create(R5L_POOL_SIZE, 0);
+       log->bs = bioset_create(R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS);
        if (!log->bs)
                goto io_bs;
 
index ccce92e..77cce35 100644 (file)
@@ -397,7 +397,7 @@ static void ppl_log_endio(struct bio *bio)
 
        pr_debug("%s: seq: %llu\n", __func__, io->seq);
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                md_error(ppl_conf->mddev, log->rdev);
 
        list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
@@ -1150,7 +1150,7 @@ int ppl_init_log(struct r5conf *conf)
                goto err;
        }
 
-       ppl_conf->bs = bioset_create(conf->raid_disks, 0);
+       ppl_conf->bs = bioset_create(conf->raid_disks, 0, 0);
        if (!ppl_conf->bs) {
                ret = -ENOMEM;
                goto err;
index ec0f951..62c965b 100644 (file)
@@ -2476,7 +2476,7 @@ static void raid5_end_read_request(struct bio * bi)
 
        pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
                (unsigned long long)sh->sector, i, atomic_read(&sh->count),
-               bi->bi_error);
+               bi->bi_status);
        if (i == disks) {
                bio_reset(bi);
                BUG();
@@ -2496,7 +2496,7 @@ static void raid5_end_read_request(struct bio * bi)
                s = sh->sector + rdev->new_data_offset;
        else
                s = sh->sector + rdev->data_offset;
-       if (!bi->bi_error) {
+       if (!bi->bi_status) {
                set_bit(R5_UPTODATE, &sh->dev[i].flags);
                if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
                        /* Note that this cannot happen on a
@@ -2613,7 +2613,7 @@ static void raid5_end_write_request(struct bio *bi)
        }
        pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
                (unsigned long long)sh->sector, i, atomic_read(&sh->count),
-               bi->bi_error);
+               bi->bi_status);
        if (i == disks) {
                bio_reset(bi);
                BUG();
@@ -2621,14 +2621,14 @@ static void raid5_end_write_request(struct bio *bi)
        }
 
        if (replacement) {
-               if (bi->bi_error)
+               if (bi->bi_status)
                        md_error(conf->mddev, rdev);
                else if (is_badblock(rdev, sh->sector,
                                     STRIPE_SECTORS,
                                     &first_bad, &bad_sectors))
                        set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
        } else {
-               if (bi->bi_error) {
+               if (bi->bi_status) {
                        set_bit(STRIPE_DEGRADED, &sh->state);
                        set_bit(WriteErrorSeen, &rdev->flags);
                        set_bit(R5_WriteError, &sh->dev[i].flags);
@@ -2649,7 +2649,7 @@ static void raid5_end_write_request(struct bio *bi)
        }
        rdev_dec_pending(rdev, conf->mddev);
 
-       if (sh->batch_head && bi->bi_error && !replacement)
+       if (sh->batch_head && bi->bi_status && !replacement)
                set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
 
        bio_reset(bi);
@@ -3381,7 +3381,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                        sh->dev[i].sector + STRIPE_SECTORS) {
                        struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
 
-                       bi->bi_error = -EIO;
+                       bi->bi_status = BLK_STS_IOERR;
                        md_write_end(conf->mddev);
                        bio_endio(bi);
                        bi = nextbi;
@@ -3403,7 +3403,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                       sh->dev[i].sector + STRIPE_SECTORS) {
                        struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
 
-                       bi->bi_error = -EIO;
+                       bi->bi_status = BLK_STS_IOERR;
                        md_write_end(conf->mddev);
                        bio_endio(bi);
                        bi = bi2;
@@ -3429,7 +3429,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                                struct bio *nextbi =
                                        r5_next_bio(bi, sh->dev[i].sector);
 
-                               bi->bi_error = -EIO;
+                               bi->bi_status = BLK_STS_IOERR;
                                bio_endio(bi);
                                bi = nextbi;
                        }
@@ -5154,7 +5154,7 @@ static void raid5_align_endio(struct bio *bi)
        struct mddev *mddev;
        struct r5conf *conf;
        struct md_rdev *rdev;
-       int error = bi->bi_error;
+       blk_status_t error = bi->bi_status;
 
        bio_put(bi);
 
@@ -5731,7 +5731,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
                        release_stripe_plug(mddev, sh);
                } else {
                        /* cannot get stripe for read-ahead, just give-up */
-                       bi->bi_error = -EIO;
+                       bi->bi_status = BLK_STS_IOERR;
                        break;
                }
        }
@@ -6943,7 +6943,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
                        goto abort;
        }
 
-       conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+       conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
        if (!conf->bio_split)
                goto abort;
        conf->mddev = mddev;
index 4e25a95..43428ce 100644 (file)
@@ -1,5 +1,6 @@
 config MEDIA_CEC_RC
        bool "HDMI CEC RC integration"
        depends on CEC_CORE && RC_CORE
+       depends on CEC_CORE=m || RC_CORE=y
        ---help---
          Pass on CEC remote control messages to the RC framework.
index 0860fb4..999926f 100644 (file)
@@ -271,16 +271,10 @@ static long cec_receive(struct cec_adapter *adap, struct cec_fh *fh,
                        bool block, struct cec_msg __user *parg)
 {
        struct cec_msg msg = {};
-       long err = 0;
+       long err;
 
        if (copy_from_user(&msg, parg, sizeof(msg)))
                return -EFAULT;
-       mutex_lock(&adap->lock);
-       if (!adap->is_configured && fh->mode_follower < CEC_MODE_MONITOR)
-               err = -ENONET;
-       mutex_unlock(&adap->lock);
-       if (err)
-               return err;
 
        err = cec_receive_msg(fh, &msg, block);
        if (err)
index acef4ec..3251cba 100644 (file)
@@ -223,7 +223,7 @@ static void i2c_wr8(struct v4l2_subdev *sd, u16 reg, u8 val)
 static void i2c_wr8_and_or(struct v4l2_subdev *sd, u16 reg,
                u8 mask, u8 val)
 {
-       i2c_wrreg(sd, reg, (i2c_rdreg(sd, reg, 2) & mask) | val, 2);
+       i2c_wrreg(sd, reg, (i2c_rdreg(sd, reg, 1) & mask) | val, 1);
 }
 
 static u16 i2c_rd16(struct v4l2_subdev *sd, u16 reg)
index e12ec50..90a5f8f 100644 (file)
@@ -183,9 +183,15 @@ static irqreturn_t sir_interrupt(int irq, void *dev_id)
        static unsigned long delt;
        unsigned long deltintr;
        unsigned long flags;
+       int counter = 0;
        int iir, lsr;
 
        while ((iir = inb(io + UART_IIR) & UART_IIR_ID)) {
+               if (++counter > 256) {
+                       dev_err(&sir_ir_dev->dev, "Trapped in interrupt");
+                       break;
+               }
+
                switch (iir & UART_IIR_ID) { /* FIXME toto treba preriedit */
                case UART_IIR_MSI:
                        (void)inb(io + UART_MSR);
index 71bd685..4126552 100644 (file)
@@ -336,6 +336,7 @@ static int rain_connect(struct serio *serio, struct serio_driver *drv)
        serio_set_drvdata(serio, rain);
        INIT_WORK(&rain->work, rain_irq_work_handler);
        mutex_init(&rain->write_lock);
+       spin_lock_init(&rain->buf_lock);
 
        err = serio_open(serio, drv);
        if (err)
index 94afbbf..c0175ea 100644 (file)
@@ -868,7 +868,7 @@ EXPORT_SYMBOL_GPL(vb2_core_create_bufs);
 
 void *vb2_plane_vaddr(struct vb2_buffer *vb, unsigned int plane_no)
 {
-       if (plane_no > vb->num_planes || !vb->planes[plane_no].mem_priv)
+       if (plane_no >= vb->num_planes || !vb->planes[plane_no].mem_priv)
                return NULL;
 
        return call_ptr_memop(vb, vaddr, vb->planes[plane_no].mem_priv);
index 99e651c..22de7f5 100644 (file)
@@ -1921,12 +1921,13 @@ static void msb_io_work(struct work_struct *work)
                spin_lock_irqsave(&msb->q_lock, flags);
 
                if (len)
-                       if (!__blk_end_request(msb->req, 0, len))
+                       if (!__blk_end_request(msb->req, BLK_STS_OK, len))
                                msb->req = NULL;
 
                if (error && msb->req) {
+                       blk_status_t ret = errno_to_blk_status(error);
                        dbg_verbose("IO: ending one sector of the request with error");
-                       if (!__blk_end_request(msb->req, error, msb->page_size))
+                       if (!__blk_end_request(msb->req, ret, msb->page_size))
                                msb->req = NULL;
                }
 
@@ -2014,7 +2015,7 @@ static void msb_submit_req(struct request_queue *q)
                WARN_ON(!msb->io_queue_stopped);
 
                while ((req = blk_fetch_request(q)) != NULL)
-                       __blk_end_request_all(req, -ENODEV);
+                       __blk_end_request_all(req, BLK_STS_IOERR);
                return;
        }
 
index c00d8a2..8897962 100644 (file)
@@ -709,7 +709,8 @@ try_again:
                                               msb->req_sg);
 
                if (!msb->seg_count) {
-                       chunk = __blk_end_request_cur(msb->block_req, -ENOMEM);
+                       chunk = __blk_end_request_cur(msb->block_req,
+                                       BLK_STS_RESOURCE);
                        continue;
                }
 
@@ -776,7 +777,8 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error)
                if (error && !t_len)
                        t_len = blk_rq_cur_bytes(msb->block_req);
 
-               chunk = __blk_end_request(msb->block_req, error, t_len);
+               chunk = __blk_end_request(msb->block_req,
+                               errno_to_blk_status(error), t_len);
 
                error = mspro_block_issue_req(card, chunk);
 
@@ -838,7 +840,7 @@ static void mspro_block_submit_req(struct request_queue *q)
 
        if (msb->eject) {
                while ((req = blk_fetch_request(q)) != NULL)
-                       __blk_end_request_all(req, -ENODEV);
+                       __blk_end_request_all(req, BLK_STS_IOERR);
 
                return;
        }
index 75488e6..8d46e3a 100644 (file)
@@ -245,8 +245,7 @@ static int arizona_poll_reg(struct arizona *arizona,
        int ret;
 
        ret = regmap_read_poll_timeout(arizona->regmap,
-                                      ARIZONA_INTERRUPT_RAW_STATUS_5, val,
-                                      ((val & mask) == target),
+                                      reg, val, ((val & mask) == target),
                                       ARIZONA_REG_POLL_DELAY_US,
                                       timeout_ms * 1000);
        if (ret)
index 4472ce1..8c32040 100644 (file)
@@ -45,7 +45,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
        mutex_init(&ctx->mapping_lock);
        ctx->mapping = NULL;
 
-       if (cxl_is_psl8(afu)) {
+       if (cxl_is_power8()) {
                spin_lock_init(&ctx->sste_lock);
 
                /*
@@ -189,7 +189,7 @@ int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma)
                if (start + len > ctx->afu->adapter->ps_size)
                        return -EINVAL;
 
-               if (cxl_is_psl9(ctx->afu)) {
+               if (cxl_is_power9()) {
                        /*
                         * Make sure there is a valid problem state
                         * area space for this AFU.
@@ -324,7 +324,7 @@ static void reclaim_ctx(struct rcu_head *rcu)
 {
        struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu);
 
-       if (cxl_is_psl8(ctx->afu))
+       if (cxl_is_power8())
                free_page((u64)ctx->sstp);
        if (ctx->ff_page)
                __free_page(ctx->ff_page);
index c8568ea..a03f8e7 100644 (file)
@@ -357,6 +357,7 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An     = {0x0A0};
 #define CXL_PSL9_DSISR_An_PF_RGP  0x0000000000000090ULL  /* PTE not found (Radix Guest (parent)) 0b10010000 */
 #define CXL_PSL9_DSISR_An_PF_HRH  0x0000000000000094ULL  /* PTE not found (HPT/Radix Host)       0b10010100 */
 #define CXL_PSL9_DSISR_An_PF_STEG 0x000000000000009CULL  /* PTE not found (STEG VA)              0b10011100 */
+#define CXL_PSL9_DSISR_An_URTCH   0x00000000000000B4ULL  /* Unsupported Radix Tree Configuration 0b10110100 */
 
 /****** CXL_PSL_TFC_An ******************************************************/
 #define CXL_PSL_TFC_An_A  (1ull << (63-28)) /* Acknowledge non-translation fault */
@@ -844,24 +845,15 @@ static inline bool cxl_is_power8(void)
 
 static inline bool cxl_is_power9(void)
 {
-       /* intermediate solution */
-       if (!cxl_is_power8() &&
-          (cpu_has_feature(CPU_FTRS_POWER9) ||
-           cpu_has_feature(CPU_FTR_POWER9_DD1)))
+       if (pvr_version_is(PVR_POWER9))
                return true;
        return false;
 }
 
-static inline bool cxl_is_psl8(struct cxl_afu *afu)
+static inline bool cxl_is_power9_dd1(void)
 {
-       if (afu->adapter->caia_major == 1)
-               return true;
-       return false;
-}
-
-static inline bool cxl_is_psl9(struct cxl_afu *afu)
-{
-       if (afu->adapter->caia_major == 2)
+       if ((pvr_version_is(PVR_POWER9)) &&
+           cpu_has_feature(CPU_FTR_POWER9_DD1))
                return true;
        return false;
 }
index 5344448..c79e39b 100644 (file)
@@ -187,7 +187,7 @@ static struct mm_struct *get_mem_context(struct cxl_context *ctx)
 
 static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
 {
-       if ((cxl_is_psl8(ctx->afu)) && (dsisr & CXL_PSL_DSISR_An_DS))
+       if ((cxl_is_power8() && (dsisr & CXL_PSL_DSISR_An_DS)))
                return true;
 
        return false;
@@ -195,16 +195,23 @@ static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
 
 static bool cxl_is_page_fault(struct cxl_context *ctx, u64 dsisr)
 {
-       if ((cxl_is_psl8(ctx->afu)) && (dsisr & CXL_PSL_DSISR_An_DM))
-               return true;
+       u64 crs; /* Translation Checkout Response Status */
 
-       if ((cxl_is_psl9(ctx->afu)) &&
-          ((dsisr & CXL_PSL9_DSISR_An_CO_MASK) &
-               (CXL_PSL9_DSISR_An_PF_SLR | CXL_PSL9_DSISR_An_PF_RGC |
-                CXL_PSL9_DSISR_An_PF_RGP | CXL_PSL9_DSISR_An_PF_HRH |
-                CXL_PSL9_DSISR_An_PF_STEG)))
+       if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_An_DM))
                return true;
 
+       if (cxl_is_power9()) {
+               crs = (dsisr & CXL_PSL9_DSISR_An_CO_MASK);
+               if ((crs == CXL_PSL9_DSISR_An_PF_SLR) ||
+                   (crs == CXL_PSL9_DSISR_An_PF_RGC) ||
+                   (crs == CXL_PSL9_DSISR_An_PF_RGP) ||
+                   (crs == CXL_PSL9_DSISR_An_PF_HRH) ||
+                   (crs == CXL_PSL9_DSISR_An_PF_STEG) ||
+                   (crs == CXL_PSL9_DSISR_An_URTCH)) {
+                       return true;
+               }
+       }
+
        return false;
 }
 
index 1703655..c1ba0d4 100644 (file)
@@ -329,8 +329,15 @@ static int __init init_cxl(void)
 
        cxl_debugfs_init();
 
-       if ((rc = register_cxl_calls(&cxl_calls)))
-               goto err;
+       /*
+        * we don't register the callback on P9. slb callack is only
+        * used for the PSL8 MMU and CX4.
+        */
+       if (cxl_is_power8()) {
+               rc = register_cxl_calls(&cxl_calls);
+               if (rc)
+                       goto err;
+       }
 
        if (cpu_has_feature(CPU_FTR_HVMODE)) {
                cxl_ops = &cxl_native_ops;
@@ -347,7 +354,8 @@ static int __init init_cxl(void)
 
        return 0;
 err1:
-       unregister_cxl_calls(&cxl_calls);
+       if (cxl_is_power8())
+               unregister_cxl_calls(&cxl_calls);
 err:
        cxl_debugfs_exit();
        cxl_file_exit();
@@ -366,7 +374,8 @@ static void exit_cxl(void)
 
        cxl_debugfs_exit();
        cxl_file_exit();
-       unregister_cxl_calls(&cxl_calls);
+       if (cxl_is_power8())
+               unregister_cxl_calls(&cxl_calls);
        idr_destroy(&cxl_adapter_idr);
 }
 
index 8d6ea97..2b2f889 100644 (file)
@@ -105,11 +105,16 @@ static int native_afu_reset(struct cxl_afu *afu)
                           CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK,
                           false);
 
-       /* Re-enable any masked interrupts */
-       serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
-       serr &= ~CXL_PSL_SERR_An_IRQ_MASKS;
-       cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
-
+       /*
+        * Re-enable any masked interrupts when the AFU is not
+        * activated to avoid side effects after attaching a process
+        * in dedicated mode.
+        */
+       if (afu->current_mode == 0) {
+               serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+               serr &= ~CXL_PSL_SERR_An_IRQ_MASKS;
+               cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
+       }
 
        return rc;
 }
@@ -139,9 +144,9 @@ int cxl_psl_purge(struct cxl_afu *afu)
 
        pr_devel("PSL purge request\n");
 
-       if (cxl_is_psl8(afu))
+       if (cxl_is_power8())
                trans_fault = CXL_PSL_DSISR_TRANS;
-       if (cxl_is_psl9(afu))
+       if (cxl_is_power9())
                trans_fault = CXL_PSL9_DSISR_An_TF;
 
        if (!cxl_ops->link_ok(afu->adapter, afu)) {
@@ -603,7 +608,7 @@ static u64 calculate_sr(struct cxl_context *ctx)
                if (!test_tsk_thread_flag(current, TIF_32BIT))
                        sr |= CXL_PSL_SR_An_SF;
        }
-       if (cxl_is_psl9(ctx->afu)) {
+       if (cxl_is_power9()) {
                if (radix_enabled())
                        sr |= CXL_PSL_SR_An_XLAT_ror;
                else
@@ -1117,10 +1122,10 @@ static irqreturn_t native_handle_psl_slice_error(struct cxl_context *ctx,
 
 static bool cxl_is_translation_fault(struct cxl_afu *afu, u64 dsisr)
 {
-       if ((cxl_is_psl8(afu)) && (dsisr & CXL_PSL_DSISR_TRANS))
+       if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_TRANS))
                return true;
 
-       if ((cxl_is_psl9(afu)) && (dsisr & CXL_PSL9_DSISR_An_TF))
+       if ((cxl_is_power9()) && (dsisr & CXL_PSL9_DSISR_An_TF))
                return true;
 
        return false;
@@ -1194,10 +1199,10 @@ static void native_irq_wait(struct cxl_context *ctx)
                if (ph != ctx->pe)
                        return;
                dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An);
-               if (cxl_is_psl8(ctx->afu) &&
+               if (cxl_is_power8() &&
                   ((dsisr & CXL_PSL_DSISR_PENDING) == 0))
                        return;
-               if (cxl_is_psl9(ctx->afu) &&
+               if (cxl_is_power9() &&
                   ((dsisr & CXL_PSL9_DSISR_PENDING) == 0))
                        return;
                /*
index 6dc1ee5..1eb9859 100644 (file)
@@ -436,7 +436,7 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter, struct pci
        /* nMMU_ID Defaults to: b’000001001’*/
        xsl_dsnctl |= ((u64)0x09 << (63-28));
 
-       if (cxl_is_power9() && !cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+       if (!(cxl_is_power9_dd1())) {
                /*
                 * Used to identify CAPI packets which should be sorted into
                 * the Non-Blocking queues by the PHB. This field should match
@@ -491,7 +491,7 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter, struct pci
        cxl_p1_write(adapter, CXL_PSL9_APCDEDTYPE, 0x40000003FFFF0000ULL);
 
        /* Disable vc dd1 fix */
-       if ((cxl_is_power9() && cpu_has_feature(CPU_FTR_POWER9_DD1)))
+       if (cxl_is_power9_dd1())
                cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0400000000000001ULL);
 
        return 0;
@@ -1439,8 +1439,7 @@ int cxl_pci_reset(struct cxl *adapter)
         * The adapter is about to be reset, so ignore errors.
         * Not supported on P9 DD1
         */
-       if ((cxl_is_power8()) ||
-           ((cxl_is_power9() && !cpu_has_feature(CPU_FTR_POWER9_DD1))))
+       if ((cxl_is_power8()) || (!(cxl_is_power9_dd1())))
                cxl_data_cache_flush(adapter);
 
        /* pcie_warm_reset requests a fundamental pci reset which includes a
@@ -1750,7 +1749,6 @@ static const struct cxl_service_layer_ops psl9_ops = {
        .debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl9,
        .debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl9,
        .psl_irq_dump_registers = cxl_native_irq_dump_regs_psl9,
-       .err_irq_dump_registers = cxl_native_err_irq_dump_regs,
        .debugfs_stop_trace = cxl_stop_trace_psl9,
        .write_timebase_ctrl = write_timebase_ctrl_psl9,
        .timebase_read = timebase_read_psl9,
@@ -1889,8 +1887,7 @@ static void cxl_pci_remove_adapter(struct cxl *adapter)
         * Flush adapter datacache as its about to be removed.
         * Not supported on P9 DD1.
         */
-       if ((cxl_is_power8()) ||
-           ((cxl_is_power9() && !cpu_has_feature(CPU_FTR_POWER9_DD1))))
+       if ((cxl_is_power8()) || (!(cxl_is_power9_dd1())))
                cxl_data_cache_flush(adapter);
 
        cxl_deconfigure_adapter(adapter);
index 8273b07..6ff94a9 100644 (file)
@@ -1184,9 +1184,10 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
        struct mmc_card *card = md->queue.card;
        unsigned int from, nr, arg;
        int err = 0, type = MMC_BLK_DISCARD;
+       blk_status_t status = BLK_STS_OK;
 
        if (!mmc_can_erase(card)) {
-               err = -EOPNOTSUPP;
+               status = BLK_STS_NOTSUPP;
                goto fail;
        }
 
@@ -1212,10 +1213,12 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
                if (!err)
                        err = mmc_erase(card, from, nr, arg);
        } while (err == -EIO && !mmc_blk_reset(md, card->host, type));
-       if (!err)
+       if (err)
+               status = BLK_STS_IOERR;
+       else
                mmc_blk_reset_success(md, type);
 fail:
-       blk_end_request(req, err, blk_rq_bytes(req));
+       blk_end_request(req, status, blk_rq_bytes(req));
 }
 
 static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
@@ -1225,9 +1228,10 @@ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
        struct mmc_card *card = md->queue.card;
        unsigned int from, nr, arg;
        int err = 0, type = MMC_BLK_SECDISCARD;
+       blk_status_t status = BLK_STS_OK;
 
        if (!(mmc_can_secure_erase_trim(card))) {
-               err = -EOPNOTSUPP;
+               status = BLK_STS_NOTSUPP;
                goto out;
        }
 
@@ -1254,8 +1258,10 @@ retry:
        err = mmc_erase(card, from, nr, arg);
        if (err == -EIO)
                goto out_retry;
-       if (err)
+       if (err) {
+               status = BLK_STS_IOERR;
                goto out;
+       }
 
        if (arg == MMC_SECURE_TRIM1_ARG) {
                if (card->quirks & MMC_QUIRK_INAND_CMD38) {
@@ -1270,8 +1276,10 @@ retry:
                err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG);
                if (err == -EIO)
                        goto out_retry;
-               if (err)
+               if (err) {
+                       status = BLK_STS_IOERR;
                        goto out;
+               }
        }
 
 out_retry:
@@ -1280,7 +1288,7 @@ out_retry:
        if (!err)
                mmc_blk_reset_success(md, type);
 out:
-       blk_end_request(req, err, blk_rq_bytes(req));
+       blk_end_request(req, status, blk_rq_bytes(req));
 }
 
 static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
@@ -1290,10 +1298,7 @@ static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
        int ret = 0;
 
        ret = mmc_flush_cache(card);
-       if (ret)
-               ret = -EIO;
-
-       blk_end_request_all(req, ret);
+       blk_end_request_all(req, ret ? BLK_STS_IOERR : BLK_STS_OK);
 }
 
 /*
@@ -1641,7 +1646,7 @@ static void mmc_blk_rw_cmd_abort(struct mmc_queue *mq, struct mmc_card *card,
 {
        if (mmc_card_removed(card))
                req->rq_flags |= RQF_QUIET;
-       while (blk_end_request(req, -EIO, blk_rq_cur_bytes(req)));
+       while (blk_end_request(req, BLK_STS_IOERR, blk_rq_cur_bytes(req)));
        mmc_queue_req_free(mq, mqrq);
 }
 
@@ -1661,7 +1666,7 @@ static void mmc_blk_rw_try_restart(struct mmc_queue *mq, struct request *req,
         */
        if (mmc_card_removed(mq->card)) {
                req->rq_flags |= RQF_QUIET;
-               blk_end_request_all(req, -EIO);
+               blk_end_request_all(req, BLK_STS_IOERR);
                mmc_queue_req_free(mq, mqrq);
                return;
        }
@@ -1743,7 +1748,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
                         */
                        mmc_blk_reset_success(md, type);
 
-                       req_pending = blk_end_request(old_req, 0,
+                       req_pending = blk_end_request(old_req, BLK_STS_OK,
                                                      brq->data.bytes_xfered);
                        /*
                         * If the blk_end_request function returns non-zero even
@@ -1811,7 +1816,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
                         * time, so we only reach here after trying to
                         * read a single sector.
                         */
-                       req_pending = blk_end_request(old_req, -EIO,
+                       req_pending = blk_end_request(old_req, BLK_STS_IOERR,
                                                      brq->data.blksz);
                        if (!req_pending) {
                                mmc_queue_req_free(mq, mq_rq);
@@ -1860,7 +1865,7 @@ void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
        ret = mmc_blk_part_switch(card, md);
        if (ret) {
                if (req) {
-                       blk_end_request_all(req, -EIO);
+                       blk_end_request_all(req, BLK_STS_IOERR);
                }
                goto out;
        }
index 5c37b6b..b659a28 100644 (file)
@@ -133,7 +133,7 @@ static void mmc_request_fn(struct request_queue *q)
        if (!mq) {
                while ((req = blk_fetch_request(q)) != NULL) {
                        req->rq_flags |= RQF_QUIET;
-                       __blk_end_request_all(req, -EIO);
+                       __blk_end_request_all(req, BLK_STS_IOERR);
                }
                return;
        }
@@ -388,7 +388,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
                mmc_queue_setup_discard(mq->queue, card);
 
        if (card->bouncesz) {
-               blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
                blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512);
                blk_queue_max_segments(mq->queue, card->bouncesz / 512);
                blk_queue_max_segment_size(mq->queue, card->bouncesz);
index 1842ed3..de962c2 100644 (file)
@@ -210,6 +210,15 @@ static void meson_mmc_get_transfer_mode(struct mmc_host *mmc,
        int i;
        bool use_desc_chain_mode = true;
 
+       /*
+        * Broken SDIO with AP6255-based WiFi on Khadas VIM Pro has been
+        * reported. For some strange reason this occurs in descriptor
+        * chain mode only. So let's fall back to bounce buffer mode
+        * for command SD_IO_RW_EXTENDED.
+        */
+       if (mrq->cmd->opcode == SD_IO_RW_EXTENDED)
+               return;
+
        for_each_sg(data->sg, sg, data->sg_len, i)
                /* check for 8 byte alignment */
                if (sg->offset & 7) {
index 92fc3f7..18957fe 100644 (file)
@@ -404,10 +404,9 @@ struct intel_host {
        bool    d3_retune;
 };
 
-const u8 intel_dsm_uuid[] = {
-       0xA5, 0x3E, 0xC1, 0xF6, 0xCD, 0x65, 0x1F, 0x46,
-       0xAB, 0x7A, 0x29, 0xF7, 0xE8, 0xD5, 0xBD, 0x61,
-};
+static const guid_t intel_dsm_guid =
+       GUID_INIT(0xF6C13EA5, 0x65CD, 0x461F,
+                 0xAB, 0x7A, 0x29, 0xF7, 0xE8, 0xD5, 0xBD, 0x61);
 
 static int __intel_dsm(struct intel_host *intel_host, struct device *dev,
                       unsigned int fn, u32 *result)
@@ -416,7 +415,7 @@ static int __intel_dsm(struct intel_host *intel_host, struct device *dev,
        int err = 0;
        size_t len;
 
-       obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), intel_dsm_uuid, 0, fn, NULL);
+       obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), &intel_dsm_guid, 0, fn, NULL);
        if (!obj)
                return -EOPNOTSUPP;
 
index 6b8d5cd..f336a9b 100644 (file)
@@ -73,7 +73,7 @@ static void blktrans_dev_put(struct mtd_blktrans_dev *dev)
 }
 
 
-static int do_blktrans_request(struct mtd_blktrans_ops *tr,
+static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr,
                               struct mtd_blktrans_dev *dev,
                               struct request *req)
 {
@@ -84,33 +84,37 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
        nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
        buf = bio_data(req->bio);
 
-       if (req_op(req) == REQ_OP_FLUSH)
-               return tr->flush(dev);
+       if (req_op(req) == REQ_OP_FLUSH) {
+               if (tr->flush(dev))
+                       return BLK_STS_IOERR;
+               return BLK_STS_OK;
+       }
 
        if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
            get_capacity(req->rq_disk))
-               return -EIO;
+               return BLK_STS_IOERR;
 
        switch (req_op(req)) {
        case REQ_OP_DISCARD:
-               return tr->discard(dev, block, nsect);
+               if (tr->discard(dev, block, nsect))
+                       return BLK_STS_IOERR;
+               return BLK_STS_OK;
        case REQ_OP_READ:
                for (; nsect > 0; nsect--, block++, buf += tr->blksize)
                        if (tr->readsect(dev, block, buf))
-                               return -EIO;
+                               return BLK_STS_IOERR;
                rq_flush_dcache_pages(req);
-               return 0;
+               return BLK_STS_OK;
        case REQ_OP_WRITE:
                if (!tr->writesect)
-                       return -EIO;
+                       return BLK_STS_IOERR;
 
                rq_flush_dcache_pages(req);
                for (; nsect > 0; nsect--, block++, buf += tr->blksize)
                        if (tr->writesect(dev, block, buf))
-                               return -EIO;
-               return 0;
+                               return BLK_STS_IOERR;
        default:
-               return -EIO;
+               return BLK_STS_IOERR;
        }
 }
 
@@ -132,7 +136,7 @@ static void mtd_blktrans_work(struct work_struct *work)
        spin_lock_irq(rq->queue_lock);
 
        while (1) {
-               int res;
+               blk_status_t res;
 
                dev->bg_stop = false;
                if (!req && !(req = blk_fetch_request(rq))) {
@@ -178,7 +182,7 @@ static void mtd_blktrans_request(struct request_queue *rq)
 
        if (!dev)
                while ((req = blk_fetch_request(rq)) != NULL)
-                       __blk_end_request_all(req, -ENODEV);
+                       __blk_end_request_all(req, BLK_STS_IOERR);
        else
                queue_work(dev->wq, &dev->work);
 }
@@ -413,6 +417,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
        new->rq->queuedata = new;
        blk_queue_logical_block_size(new->rq, tr->blksize);
 
+       blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH);
        queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq);
        queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq);
 
index 5497e65..c3963f8 100644 (file)
@@ -313,10 +313,10 @@ static void ubiblock_do_work(struct work_struct *work)
        ret = ubiblock_read(pdu);
        rq_flush_dcache_pages(req);
 
-       blk_mq_end_request(req, ret);
+       blk_mq_end_request(req, errno_to_blk_status(ret));
 }
 
-static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
                             const struct blk_mq_queue_data *bd)
 {
        struct request *req = bd->rq;
@@ -327,9 +327,9 @@ static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
        case REQ_OP_READ:
                ubi_sgl_init(&pdu->usgl);
                queue_work(dev->wq, &pdu->work);
-               return BLK_MQ_RQ_QUEUE_OK;
+               return BLK_STS_OK;
        default:
-               return BLK_MQ_RQ_QUEUE_ERROR;
+               return BLK_STS_IOERR;
        }
 
 }
index 62ee439..53a1cb5 100644 (file)
@@ -756,6 +756,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
        struct net_device *dev = dev_id;
        struct arcnet_local *lp;
        int recbuf, status, diagstatus, didsomething, boguscount;
+       unsigned long flags;
        int retval = IRQ_NONE;
 
        arc_printk(D_DURING, dev, "\n");
@@ -765,7 +766,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
        lp = netdev_priv(dev);
        BUG_ON(!lp);
 
-       spin_lock(&lp->lock);
+       spin_lock_irqsave(&lp->lock, flags);
 
        /* RESET flag was enabled - if device is not running, we must
         * clear it right away (but nothing else).
@@ -774,7 +775,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
                if (lp->hw.status(dev) & RESETflag)
                        lp->hw.command(dev, CFLAGScmd | RESETclear);
                lp->hw.intmask(dev, 0);
-               spin_unlock(&lp->lock);
+               spin_unlock_irqrestore(&lp->lock, flags);
                return retval;
        }
 
@@ -998,7 +999,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
        udelay(1);
        lp->hw.intmask(dev, lp->intmask);
 
-       spin_unlock(&lp->lock);
+       spin_unlock_irqrestore(&lp->lock, flags);
        return retval;
 }
 EXPORT_SYMBOL(arcnet_interrupt);
index 2056878..4fa2e46 100644 (file)
@@ -212,7 +212,7 @@ static int ack_tx(struct net_device *dev, int acked)
        ackpkt->soft.cap.proto = 0; /* using protocol 0 for acknowledge */
        ackpkt->soft.cap.mes.ack = acked;
 
-       arc_printk(D_PROTO, dev, "Ackknowledge for cap packet %x.\n",
+       arc_printk(D_PROTO, dev, "Acknowledge for cap packet %x.\n",
                   *((int *)&ackpkt->soft.cap.cookie[0]));
 
        ackskb->protocol = cpu_to_be16(ETH_P_ARCNET);
index 239de38..47f80b8 100644 (file)
@@ -135,6 +135,7 @@ static int com20020pci_probe(struct pci_dev *pdev,
        for (i = 0; i < ci->devcount; i++) {
                struct com20020_pci_channel_map *cm = &ci->chan_map_tbl[i];
                struct com20020_dev *card;
+               int dev_id_mask = 0xf;
 
                dev = alloc_arcdev(device);
                if (!dev) {
@@ -166,6 +167,7 @@ static int com20020pci_probe(struct pci_dev *pdev,
                arcnet_outb(0x00, ioaddr, COM20020_REG_W_COMMAND);
                arcnet_inb(ioaddr, COM20020_REG_R_DIAGSTAT);
 
+               SET_NETDEV_DEV(dev, &pdev->dev);
                dev->base_addr = ioaddr;
                dev->dev_addr[0] = node;
                dev->irq = pdev->irq;
@@ -179,8 +181,8 @@ static int com20020pci_probe(struct pci_dev *pdev,
 
                /* Get the dev_id from the PLX rotary coder */
                if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15))
-                       dev->dev_id = 0xc;
-               dev->dev_id ^= inb(priv->misc + ci->rotary) >> 4;
+                       dev_id_mask = 0x3;
+               dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask;
 
                snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i);
 
index 13d9ad4..78043a9 100644 (file)
@@ -246,8 +246,6 @@ int com20020_found(struct net_device *dev, int shared)
                return -ENODEV;
        }
 
-       dev->base_addr = ioaddr;
-
        arc_printk(D_NORMAL, dev, "%s: station %02Xh found at %03lXh, IRQ %d.\n",
                   lp->card_name, dev->dev_addr[0], dev->base_addr, dev->irq);
 
index b44a6ae..e5386ab 100644 (file)
@@ -90,10 +90,13 @@ enum ad_link_speed_type {
        AD_LINK_SPEED_100MBPS,
        AD_LINK_SPEED_1000MBPS,
        AD_LINK_SPEED_2500MBPS,
+       AD_LINK_SPEED_5000MBPS,
        AD_LINK_SPEED_10000MBPS,
+       AD_LINK_SPEED_14000MBPS,
        AD_LINK_SPEED_20000MBPS,
        AD_LINK_SPEED_25000MBPS,
        AD_LINK_SPEED_40000MBPS,
+       AD_LINK_SPEED_50000MBPS,
        AD_LINK_SPEED_56000MBPS,
        AD_LINK_SPEED_100000MBPS,
 };
@@ -259,10 +262,13 @@ static inline int __check_agg_selection_timer(struct port *port)
  *     %AD_LINK_SPEED_100MBPS,
  *     %AD_LINK_SPEED_1000MBPS,
  *     %AD_LINK_SPEED_2500MBPS,
+ *     %AD_LINK_SPEED_5000MBPS,
  *     %AD_LINK_SPEED_10000MBPS
+ *     %AD_LINK_SPEED_14000MBPS,
  *     %AD_LINK_SPEED_20000MBPS
  *     %AD_LINK_SPEED_25000MBPS
  *     %AD_LINK_SPEED_40000MBPS
+ *     %AD_LINK_SPEED_50000MBPS
  *     %AD_LINK_SPEED_56000MBPS
  *     %AD_LINK_SPEED_100000MBPS
  */
@@ -296,10 +302,18 @@ static u16 __get_link_speed(struct port *port)
                        speed = AD_LINK_SPEED_2500MBPS;
                        break;
 
+               case SPEED_5000:
+                       speed = AD_LINK_SPEED_5000MBPS;
+                       break;
+
                case SPEED_10000:
                        speed = AD_LINK_SPEED_10000MBPS;
                        break;
 
+               case SPEED_14000:
+                       speed = AD_LINK_SPEED_14000MBPS;
+                       break;
+
                case SPEED_20000:
                        speed = AD_LINK_SPEED_20000MBPS;
                        break;
@@ -312,6 +326,10 @@ static u16 __get_link_speed(struct port *port)
                        speed = AD_LINK_SPEED_40000MBPS;
                        break;
 
+               case SPEED_50000:
+                       speed = AD_LINK_SPEED_50000MBPS;
+                       break;
+
                case SPEED_56000:
                        speed = AD_LINK_SPEED_56000MBPS;
                        break;
@@ -707,9 +725,15 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator)
                case AD_LINK_SPEED_2500MBPS:
                        bandwidth = nports * 2500;
                        break;
+               case AD_LINK_SPEED_5000MBPS:
+                       bandwidth = nports * 5000;
+                       break;
                case AD_LINK_SPEED_10000MBPS:
                        bandwidth = nports * 10000;
                        break;
+               case AD_LINK_SPEED_14000MBPS:
+                       bandwidth = nports * 14000;
+                       break;
                case AD_LINK_SPEED_20000MBPS:
                        bandwidth = nports * 20000;
                        break;
@@ -719,6 +743,9 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator)
                case AD_LINK_SPEED_40000MBPS:
                        bandwidth = nports * 40000;
                        break;
+               case AD_LINK_SPEED_50000MBPS:
+                       bandwidth = nports * 50000;
+                       break;
                case AD_LINK_SPEED_56000MBPS:
                        bandwidth = nports * 56000;
                        break;
index 2359478..8ab6bdb 100644 (file)
@@ -4192,7 +4192,6 @@ static void bond_destructor(struct net_device *bond_dev)
        struct bonding *bond = netdev_priv(bond_dev);
        if (bond->wq)
                destroy_workqueue(bond->wq);
-       free_netdev(bond_dev);
 }
 
 void bond_setup(struct net_device *bond_dev)
@@ -4212,7 +4211,8 @@ void bond_setup(struct net_device *bond_dev)
        bond_dev->netdev_ops = &bond_netdev_ops;
        bond_dev->ethtool_ops = &bond_ethtool_ops;
 
-       bond_dev->destructor = bond_destructor;
+       bond_dev->needs_free_netdev = true;
+       bond_dev->priv_destructor = bond_destructor;
 
        SET_NETDEV_DEVTYPE(bond_dev, &bond_type);
 
@@ -4736,7 +4736,7 @@ int bond_create(struct net *net, const char *name)
 
        rtnl_unlock();
        if (res < 0)
-               bond_destructor(bond_dev);
+               free_netdev(bond_dev);
        return res;
 }
 
index ddabce7..71a7c3b 100644 (file)
@@ -1121,7 +1121,7 @@ static void cfhsi_setup(struct net_device *dev)
        dev->flags = IFF_POINTOPOINT | IFF_NOARP;
        dev->mtu = CFHSI_MAX_CAIF_FRAME_SZ;
        dev->priv_flags |= IFF_NO_QUEUE;
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
        dev->netdev_ops = &cfhsi_netdevops;
        for (i = 0; i < CFHSI_PRIO_LAST; ++i)
                skb_queue_head_init(&cfhsi->qhead[i]);
index c2dea49..76e1d35 100644 (file)
@@ -428,7 +428,7 @@ static void caifdev_setup(struct net_device *dev)
        dev->flags = IFF_POINTOPOINT | IFF_NOARP;
        dev->mtu = CAIF_MAX_MTU;
        dev->priv_flags |= IFF_NO_QUEUE;
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
        skb_queue_head_init(&serdev->head);
        serdev->common.link_select = CAIF_LINK_LOW_LATENCY;
        serdev->common.use_frag = true;
index 3a529fb..fc21afe 100644 (file)
@@ -712,7 +712,7 @@ static void cfspi_setup(struct net_device *dev)
        dev->flags = IFF_NOARP | IFF_POINTOPOINT;
        dev->priv_flags |= IFF_NO_QUEUE;
        dev->mtu = SPI_MAX_PAYLOAD_SIZE;
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
        skb_queue_head_init(&cfspi->qhead);
        skb_queue_head_init(&cfspi->chead);
        cfspi->cfdev.link_select = CAIF_LINK_HIGH_BANDW;
index 6122768..1794ea0 100644 (file)
@@ -617,7 +617,7 @@ static void cfv_netdev_setup(struct net_device *netdev)
        netdev->tx_queue_len = 100;
        netdev->flags = IFF_POINTOPOINT | IFF_NOARP;
        netdev->mtu = CFV_DEF_MTU_SIZE;
-       netdev->destructor = free_netdev;
+       netdev->needs_free_netdev = true;
 }
 
 /* Create debugfs counters for the device */
index 611d16a..ae4ed03 100644 (file)
@@ -391,6 +391,9 @@ void can_change_state(struct net_device *dev, struct can_frame *cf,
        can_update_state_error_stats(dev, new_state);
        priv->state = new_state;
 
+       if (!cf)
+               return;
+
        if (unlikely(new_state == CAN_STATE_BUS_OFF)) {
                cf->can_id |= CAN_ERR_BUSOFF;
                return;
index 0d57be5..85268be 100644 (file)
@@ -489,7 +489,7 @@ int peak_canfd_handle_msgs_list(struct peak_canfd_priv *priv,
                                struct pucan_rx_msg *msg_list, int msg_count)
 {
        void *msg_ptr = msg_list;
-       int i, msg_size;
+       int i, msg_size = 0;
 
        for (i = 0; i < msg_count; i++) {
                msg_size = peak_canfd_handle_msg(priv, msg_ptr);
index eb71737..6a6e896 100644 (file)
@@ -417,7 +417,7 @@ static int slc_open(struct net_device *dev)
 static void slc_free_netdev(struct net_device *dev)
 {
        int i = dev->base_addr;
-       free_netdev(dev);
+
        slcan_devs[i] = NULL;
 }
 
@@ -436,7 +436,8 @@ static const struct net_device_ops slc_netdev_ops = {
 static void slc_setup(struct net_device *dev)
 {
        dev->netdev_ops         = &slc_netdev_ops;
-       dev->destructor         = slc_free_netdev;
+       dev->needs_free_netdev  = true;
+       dev->priv_destructor    = slc_free_netdev;
 
        dev->hard_header_len    = 0;
        dev->addr_len           = 0;
@@ -761,8 +762,6 @@ static void __exit slcan_exit(void)
                if (sl->tty) {
                        printk(KERN_ERR "%s: tty discipline still running\n",
                               dev->name);
-                       /* Intentionally leak the control block. */
-                       dev->destructor = NULL;
                }
 
                unregister_netdev(dev);
index eecee7f..afcc131 100644 (file)
@@ -265,6 +265,8 @@ static int gs_cmd_reset(struct gs_usb *gsusb, struct gs_can *gsdev)
                             sizeof(*dm),
                             1000);
 
+       kfree(dm);
+
        return rc;
 }
 
index 57913db..1ca76e0 100644 (file)
@@ -908,8 +908,6 @@ static int peak_usb_probe(struct usb_interface *intf,
        const struct peak_usb_adapter *peak_usb_adapter = NULL;
        int i, err = -ENOMEM;
 
-       usb_dev = interface_to_usbdev(intf);
-
        /* get corresponding PCAN-USB adapter */
        for (i = 0; i < ARRAY_SIZE(peak_usb_adapters_list); i++)
                if (peak_usb_adapters_list[i]->device_id == usb_id_product) {
@@ -920,7 +918,7 @@ static int peak_usb_probe(struct usb_interface *intf,
        if (!peak_usb_adapter) {
                /* should never come except device_id bad usage in this file */
                pr_err("%s: didn't find device id. 0x%x in devices list\n",
-                       PCAN_USB_DRIVER_NAME, usb_dev->descriptor.idProduct);
+                       PCAN_USB_DRIVER_NAME, usb_id_product);
                return -ENODEV;
        }
 
index facca33..a8cb332 100644 (file)
@@ -152,7 +152,7 @@ static const struct net_device_ops vcan_netdev_ops = {
 static void vcan_setup(struct net_device *dev)
 {
        dev->type               = ARPHRD_CAN;
-       dev->mtu                = CAN_MTU;
+       dev->mtu                = CANFD_MTU;
        dev->hard_header_len    = 0;
        dev->addr_len           = 0;
        dev->tx_queue_len       = 0;
@@ -163,7 +163,7 @@ static void vcan_setup(struct net_device *dev)
                dev->flags |= IFF_ECHO;
 
        dev->netdev_ops         = &vcan_netdev_ops;
-       dev->destructor         = free_netdev;
+       dev->needs_free_netdev  = true;
 }
 
 static struct rtnl_link_ops vcan_link_ops __read_mostly = {
index 7fbb247..cfe889e 100644 (file)
@@ -150,13 +150,13 @@ static const struct net_device_ops vxcan_netdev_ops = {
 static void vxcan_setup(struct net_device *dev)
 {
        dev->type               = ARPHRD_CAN;
-       dev->mtu                = CAN_MTU;
+       dev->mtu                = CANFD_MTU;
        dev->hard_header_len    = 0;
        dev->addr_len           = 0;
        dev->tx_queue_len       = 0;
        dev->flags              = (IFF_NOARP|IFF_ECHO);
        dev->netdev_ops         = &vxcan_netdev_ops;
-       dev->destructor         = free_netdev;
+       dev->needs_free_netdev  = true;
 }
 
 /* forward declaration for rtnl_create_link() */
index 149244a..9905b52 100644 (file)
@@ -328,7 +328,6 @@ static void dummy_free_netdev(struct net_device *dev)
        struct dummy_priv *priv = netdev_priv(dev);
 
        kfree(priv->vfinfo);
-       free_netdev(dev);
 }
 
 static void dummy_setup(struct net_device *dev)
@@ -338,7 +337,8 @@ static void dummy_setup(struct net_device *dev)
        /* Initialize the device structure. */
        dev->netdev_ops = &dummy_netdev_ops;
        dev->ethtool_ops = &dummy_ethtool_ops;
-       dev->destructor = dummy_free_netdev;
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = dummy_free_netdev;
 
        /* Fill in device structure with ethernet-generic values. */
        dev->flags |= IFF_NOARP;
index 08d11ce..f5b237e 100644 (file)
@@ -61,6 +61,8 @@
 
 #define ENA_MMIO_READ_TIMEOUT 0xFFFFFFFF
 
+#define ENA_REGS_ADMIN_INTR_MASK 1
+
 /*****************************************************************************/
 /*****************************************************************************/
 /*****************************************************************************/
@@ -232,11 +234,9 @@ static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queu
        tail_masked = admin_queue->sq.tail & queue_size_mask;
 
        /* In case of queue FULL */
-       cnt = admin_queue->sq.tail - admin_queue->sq.head;
+       cnt = atomic_read(&admin_queue->outstanding_cmds);
        if (cnt >= admin_queue->q_depth) {
-               pr_debug("admin queue is FULL (tail %d head %d depth: %d)\n",
-                        admin_queue->sq.tail, admin_queue->sq.head,
-                        admin_queue->q_depth);
+               pr_debug("admin queue is full.\n");
                admin_queue->stats.out_of_space++;
                return ERR_PTR(-ENOSPC);
        }
@@ -508,15 +508,20 @@ static int ena_com_comp_status_to_errno(u8 comp_status)
 static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx,
                                                     struct ena_com_admin_queue *admin_queue)
 {
-       unsigned long flags;
-       u32 start_time;
+       unsigned long flags, timeout;
        int ret;
 
-       start_time = ((u32)jiffies_to_usecs(jiffies));
+       timeout = jiffies + ADMIN_CMD_TIMEOUT_US;
+
+       while (1) {
+               spin_lock_irqsave(&admin_queue->q_lock, flags);
+               ena_com_handle_admin_completion(admin_queue);
+               spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+               if (comp_ctx->status != ENA_CMD_SUBMITTED)
+                       break;
 
-       while (comp_ctx->status == ENA_CMD_SUBMITTED) {
-               if ((((u32)jiffies_to_usecs(jiffies)) - start_time) >
-                   ADMIN_CMD_TIMEOUT_US) {
+               if (time_is_before_jiffies(timeout)) {
                        pr_err("Wait for completion (polling) timeout\n");
                        /* ENA didn't have any completion */
                        spin_lock_irqsave(&admin_queue->q_lock, flags);
@@ -528,10 +533,6 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c
                        goto err;
                }
 
-               spin_lock_irqsave(&admin_queue->q_lock, flags);
-               ena_com_handle_admin_completion(admin_queue);
-               spin_unlock_irqrestore(&admin_queue->q_lock, flags);
-
                msleep(100);
        }
 
@@ -1455,6 +1456,12 @@ void ena_com_admin_destroy(struct ena_com_dev *ena_dev)
 
 void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling)
 {
+       u32 mask_value = 0;
+
+       if (polling)
+               mask_value = ENA_REGS_ADMIN_INTR_MASK;
+
+       writel(mask_value, ena_dev->reg_bar + ENA_REGS_INTR_MASK_OFF);
        ena_dev->admin_queue.polling = polling;
 }
 
index 67b2338..3ee55e2 100644 (file)
@@ -80,7 +80,6 @@ static const struct ena_stats ena_stats_tx_strings[] = {
        ENA_STAT_TX_ENTRY(tx_poll),
        ENA_STAT_TX_ENTRY(doorbells),
        ENA_STAT_TX_ENTRY(prepare_ctx_err),
-       ENA_STAT_TX_ENTRY(missing_tx_comp),
        ENA_STAT_TX_ENTRY(bad_req_id),
 };
 
@@ -94,6 +93,7 @@ static const struct ena_stats ena_stats_rx_strings[] = {
        ENA_STAT_RX_ENTRY(dma_mapping_err),
        ENA_STAT_RX_ENTRY(bad_desc_num),
        ENA_STAT_RX_ENTRY(rx_copybreak_pkt),
+       ENA_STAT_RX_ENTRY(empty_rx_ring),
 };
 
 static const struct ena_stats ena_stats_ena_com_strings[] = {
index 7c1214d..4f16ed3 100644 (file)
@@ -190,6 +190,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter)
                rxr->sgl_size = adapter->max_rx_sgl_size;
                rxr->smoothed_interval =
                        ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
+               rxr->empty_rx_queue = 0;
        }
 }
 
@@ -1078,6 +1079,26 @@ inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring,
        rx_ring->per_napi_bytes = 0;
 }
 
+static inline void ena_unmask_interrupt(struct ena_ring *tx_ring,
+                                       struct ena_ring *rx_ring)
+{
+       struct ena_eth_io_intr_reg intr_reg;
+
+       /* Update intr register: rx intr delay,
+        * tx intr delay and interrupt unmask
+        */
+       ena_com_update_intr_reg(&intr_reg,
+                               rx_ring->smoothed_interval,
+                               tx_ring->smoothed_interval,
+                               true);
+
+       /* It is a shared MSI-X.
+        * Tx and Rx CQ have pointer to it.
+        * So we use one of them to reach the intr reg
+        */
+       ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+}
+
 static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring,
                                             struct ena_ring *rx_ring)
 {
@@ -1108,7 +1129,6 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
 {
        struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
        struct ena_ring *tx_ring, *rx_ring;
-       struct ena_eth_io_intr_reg intr_reg;
 
        u32 tx_work_done;
        u32 rx_work_done;
@@ -1149,22 +1169,9 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
                        if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
                                ena_adjust_intr_moderation(rx_ring, tx_ring);
 
-                       /* Update intr register: rx intr delay,
-                        * tx intr delay and interrupt unmask
-                        */
-                       ena_com_update_intr_reg(&intr_reg,
-                                               rx_ring->smoothed_interval,
-                                               tx_ring->smoothed_interval,
-                                               true);
-
-                       /* It is a shared MSI-X.
-                        * Tx and Rx CQ have pointer to it.
-                        * So we use one of them to reach the intr reg
-                        */
-                       ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+                       ena_unmask_interrupt(tx_ring, rx_ring);
                }
 
-
                ena_update_ring_numa_node(tx_ring, rx_ring);
 
                ret = rx_work_done;
@@ -1485,6 +1492,11 @@ static int ena_up_complete(struct ena_adapter *adapter)
 
        ena_napi_enable_all(adapter);
 
+       /* Enable completion queues interrupt */
+       for (i = 0; i < adapter->num_queues; i++)
+               ena_unmask_interrupt(&adapter->tx_ring[i],
+                                    &adapter->rx_ring[i]);
+
        /* schedule napi in case we had pending packets
         * from the last time we disable napi
         */
@@ -1532,6 +1544,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
                          "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
                          qid, rc);
                ena_com_destroy_io_queue(ena_dev, ena_qid);
+               return rc;
        }
 
        ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
@@ -1596,6 +1609,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
                          "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
                          qid, rc);
                ena_com_destroy_io_queue(ena_dev, ena_qid);
+               return rc;
        }
 
        ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
@@ -1981,6 +1995,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
        tx_info->tx_descs = nb_hw_desc;
        tx_info->last_jiffies = jiffies;
+       tx_info->print_once = 0;
 
        tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
                tx_ring->ring_size);
@@ -2550,13 +2565,44 @@ err:
                "Reset attempt failed. Can not reset the device\n");
 }
 
-static void check_for_missing_tx_completions(struct ena_adapter *adapter)
+static int check_missing_comp_in_queue(struct ena_adapter *adapter,
+                                      struct ena_ring *tx_ring)
 {
        struct ena_tx_buffer *tx_buf;
        unsigned long last_jiffies;
+       u32 missed_tx = 0;
+       int i;
+
+       for (i = 0; i < tx_ring->ring_size; i++) {
+               tx_buf = &tx_ring->tx_buffer_info[i];
+               last_jiffies = tx_buf->last_jiffies;
+               if (unlikely(last_jiffies &&
+                            time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) {
+                       if (!tx_buf->print_once)
+                               netif_notice(adapter, tx_err, adapter->netdev,
+                                            "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
+                                            tx_ring->qid, i);
+
+                       tx_buf->print_once = 1;
+                       missed_tx++;
+
+                       if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) {
+                               netif_err(adapter, tx_err, adapter->netdev,
+                                         "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
+                                         missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS);
+                               set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+                               return -EIO;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static void check_for_missing_tx_completions(struct ena_adapter *adapter)
+{
        struct ena_ring *tx_ring;
-       int i, j, budget;
-       u32 missed_tx;
+       int i, budget, rc;
 
        /* Make sure the driver doesn't turn the device in other process */
        smp_rmb();
@@ -2572,31 +2618,9 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)
        for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
                tx_ring = &adapter->tx_ring[i];
 
-               for (j = 0; j < tx_ring->ring_size; j++) {
-                       tx_buf = &tx_ring->tx_buffer_info[j];
-                       last_jiffies = tx_buf->last_jiffies;
-                       if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) {
-                               netif_notice(adapter, tx_err, adapter->netdev,
-                                            "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
-                                            tx_ring->qid, j);
-
-                               u64_stats_update_begin(&tx_ring->syncp);
-                               missed_tx = tx_ring->tx_stats.missing_tx_comp++;
-                               u64_stats_update_end(&tx_ring->syncp);
-
-                               /* Clear last jiffies so the lost buffer won't
-                                * be counted twice.
-                                */
-                               tx_buf->last_jiffies = 0;
-
-                               if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) {
-                                       netif_err(adapter, tx_err, adapter->netdev,
-                                                 "The number of lost tx completion is above the threshold (%d > %d). Reset the device\n",
-                                                 missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS);
-                                       set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
-                               }
-                       }
-               }
+               rc = check_missing_comp_in_queue(adapter, tx_ring);
+               if (unlikely(rc))
+                       return;
 
                budget--;
                if (!budget)
@@ -2606,6 +2630,58 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)
        adapter->last_monitored_tx_qid = i % adapter->num_queues;
 }
 
+/* trigger napi schedule after 2 consecutive detections */
+#define EMPTY_RX_REFILL 2
+/* For the rare case where the device runs out of Rx descriptors and the
+ * napi handler failed to refill new Rx descriptors (due to a lack of memory
+ * for example).
+ * This case will lead to a deadlock:
+ * The device won't send interrupts since all the new Rx packets will be dropped
+ * The napi handler won't allocate new Rx descriptors so the device will be
+ * able to send new packets.
+ *
+ * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
+ * It is recommended to have at least 512MB, with a minimum of 128MB for
+ * constrained environment).
+ *
+ * When such a situation is detected - Reschedule napi
+ */
+static void check_for_empty_rx_ring(struct ena_adapter *adapter)
+{
+       struct ena_ring *rx_ring;
+       int i, refill_required;
+
+       if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+               return;
+
+       if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
+               return;
+
+       for (i = 0; i < adapter->num_queues; i++) {
+               rx_ring = &adapter->rx_ring[i];
+
+               refill_required =
+                       ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
+               if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
+                       rx_ring->empty_rx_queue++;
+
+                       if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
+                               u64_stats_update_begin(&rx_ring->syncp);
+                               rx_ring->rx_stats.empty_rx_ring++;
+                               u64_stats_update_end(&rx_ring->syncp);
+
+                               netif_err(adapter, drv, adapter->netdev,
+                                         "trigger refill for ring %d\n", i);
+
+                               napi_schedule(rx_ring->napi);
+                               rx_ring->empty_rx_queue = 0;
+                       }
+               } else {
+                       rx_ring->empty_rx_queue = 0;
+               }
+       }
+}
+
 /* Check for keep alive expiration */
 static void check_for_missing_keep_alive(struct ena_adapter *adapter)
 {
@@ -2660,6 +2736,8 @@ static void ena_timer_service(unsigned long data)
 
        check_for_missing_tx_completions(adapter);
 
+       check_for_empty_rx_ring(adapter);
+
        if (debug_area)
                ena_dump_stats_to_buf(adapter, debug_area);
 
@@ -2840,6 +2918,11 @@ static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
 {
        int release_bars;
 
+       if (ena_dev->mem_bar)
+               devm_iounmap(&pdev->dev, ena_dev->mem_bar);
+
+       devm_iounmap(&pdev->dev, ena_dev->reg_bar);
+
        release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
        pci_release_selected_regions(pdev, release_bars);
 }
@@ -2927,8 +3010,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto err_free_ena_dev;
        }
 
-       ena_dev->reg_bar = ioremap(pci_resource_start(pdev, ENA_REG_BAR),
-                                  pci_resource_len(pdev, ENA_REG_BAR));
+       ena_dev->reg_bar = devm_ioremap(&pdev->dev,
+                                       pci_resource_start(pdev, ENA_REG_BAR),
+                                       pci_resource_len(pdev, ENA_REG_BAR));
        if (!ena_dev->reg_bar) {
                dev_err(&pdev->dev, "failed to remap regs bar\n");
                rc = -EFAULT;
@@ -2948,8 +3032,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        ena_set_push_mode(pdev, ena_dev, &get_feat_ctx);
 
        if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
-               ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR),
-                                             pci_resource_len(pdev, ENA_MEM_BAR));
+               ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
+                                                  pci_resource_start(pdev, ENA_MEM_BAR),
+                                                  pci_resource_len(pdev, ENA_MEM_BAR));
                if (!ena_dev->mem_bar) {
                        rc = -EFAULT;
                        goto err_device_destroy;
index 0e22bce..a4d3d5e 100644 (file)
@@ -45,7 +45,7 @@
 
 #define DRV_MODULE_VER_MAJOR   1
 #define DRV_MODULE_VER_MINOR   1
-#define DRV_MODULE_VER_SUBMINOR 2
+#define DRV_MODULE_VER_SUBMINOR 7
 
 #define DRV_MODULE_NAME                "ena"
 #ifndef DRV_MODULE_VERSION
@@ -146,7 +146,18 @@ struct ena_tx_buffer {
        u32 tx_descs;
        /* num of buffers used by this skb */
        u32 num_of_bufs;
-       /* Save the last jiffies to detect missing tx packets */
+
+       /* Used for detect missing tx packets to limit the number of prints */
+       u32 print_once;
+       /* Save the last jiffies to detect missing tx packets
+        *
+        * sets to non zero value on ena_start_xmit and set to zero on
+        * napi and timer_Service_routine.
+        *
+        * while this value is not protected by lock,
+        * a given packet is not expected to be handled by ena_start_xmit
+        * and by napi/timer_service at the same time.
+        */
        unsigned long last_jiffies;
        struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
 } ____cacheline_aligned;
@@ -170,7 +181,6 @@ struct ena_stats_tx {
        u64 napi_comp;
        u64 tx_poll;
        u64 doorbells;
-       u64 missing_tx_comp;
        u64 bad_req_id;
 };
 
@@ -184,6 +194,7 @@ struct ena_stats_rx {
        u64 dma_mapping_err;
        u64 bad_desc_num;
        u64 rx_copybreak_pkt;
+       u64 empty_rx_ring;
 };
 
 struct ena_ring {
@@ -231,6 +242,7 @@ struct ena_ring {
                struct ena_stats_tx tx_stats;
                struct ena_stats_rx rx_stats;
        };
+       int empty_rx_queue;
 } ____cacheline_aligned;
 
 struct ena_stats_dev {
index b8e3d88..a66aee5 100644 (file)
@@ -193,9 +193,6 @@ int hw_atl_utils_hw_get_regs(struct aq_hw_s *self,
                             struct aq_hw_caps_s *aq_hw_caps,
                             u32 *regs_buff);
 
-int hw_atl_utils_hw_get_settings(struct aq_hw_s *self,
-                                struct ethtool_cmd *cmd);
-
 int hw_atl_utils_hw_set_power(struct aq_hw_s *self,
                              unsigned int power_state);
 
index 5f49334..f619c4c 100644 (file)
@@ -3883,15 +3883,26 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
                /* when transmitting in a vf, start bd must hold the ethertype
                 * for fw to enforce it
                 */
+               u16 vlan_tci = 0;
 #ifndef BNX2X_STOP_ON_ERROR
-               if (IS_VF(bp))
+               if (IS_VF(bp)) {
 #endif
-                       tx_start_bd->vlan_or_ethertype =
-                               cpu_to_le16(ntohs(eth->h_proto));
+                       /* Still need to consider inband vlan for enforced */
+                       if (__vlan_get_tag(skb, &vlan_tci)) {
+                               tx_start_bd->vlan_or_ethertype =
+                                       cpu_to_le16(ntohs(eth->h_proto));
+                       } else {
+                               tx_start_bd->bd_flags.as_bitfield |=
+                                       (X_ETH_INBAND_VLAN <<
+                                        ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
+                               tx_start_bd->vlan_or_ethertype =
+                                       cpu_to_le16(vlan_tci);
+                       }
 #ifndef BNX2X_STOP_ON_ERROR
-               else
+               } else {
                        /* used by FW for packet accounting */
                        tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod);
+               }
 #endif
        }
 
index a851f95..349a465 100644 (file)
@@ -12729,7 +12729,7 @@ static int bnx2x_set_mc_list(struct bnx2x *bp)
        } else {
                /* If no mc addresses are required, flush the configuration */
                rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
-               if (rc)
+               if (rc < 0)
                        BNX2X_ERR("Failed to clear multicast configuration %d\n",
                                  rc);
        }
index bdfd53b..9ca994d 100644 (file)
@@ -901,6 +901,8 @@ static void bnx2x_vf_flr(struct bnx2x *bp, struct bnx2x_virtf *vf)
        /* release VF resources */
        bnx2x_vf_free_resc(bp, vf);
 
+       vf->malicious = false;
+
        /* re-open the mailbox */
        bnx2x_vf_enable_mbx(bp, vf->abs_vfid);
        return;
@@ -1822,9 +1824,11 @@ get_vf:
                   vf->abs_vfid, qidx);
                bnx2x_vf_handle_rss_update_eqe(bp, vf);
        case EVENT_RING_OPCODE_VF_FLR:
-       case EVENT_RING_OPCODE_MALICIOUS_VF:
                /* Do nothing for now */
                return 0;
+       case EVENT_RING_OPCODE_MALICIOUS_VF:
+               vf->malicious = true;
+               return 0;
        }
 
        return 0;
@@ -1905,6 +1909,13 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
                        continue;
                }
 
+               if (vf->malicious) {
+                       DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
+                              "vf %d malicious so no stats for it\n",
+                              vf->abs_vfid);
+                       continue;
+               }
+
                DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
                       "add addresses for vf %d\n", vf->abs_vfid);
                for_each_vfq(vf, j) {
@@ -3042,7 +3053,7 @@ void bnx2x_vf_pci_dealloc(struct bnx2x *bp)
 {
        BNX2X_PCI_FREE(bp->vf2pf_mbox, bp->vf2pf_mbox_mapping,
                       sizeof(struct bnx2x_vf_mbx_msg));
-       BNX2X_PCI_FREE(bp->vf2pf_mbox, bp->pf2vf_bulletin_mapping,
+       BNX2X_PCI_FREE(bp->pf2vf_bulletin, bp->pf2vf_bulletin_mapping,
                       sizeof(union pf_vf_bulletin));
 }
 
index 888d0b6..53466f6 100644 (file)
@@ -141,6 +141,7 @@ struct bnx2x_virtf {
 #define VF_RESET       3       /* VF FLR'd, pending cleanup */
 
        bool flr_clnup_stage;   /* true during flr cleanup */
+       bool malicious;         /* true if FW indicated so, until FLR */
 
        /* dma */
        dma_addr_t fw_stat_map;
index 03f55da..74e8e21 100644 (file)
@@ -1301,10 +1301,11 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
                cp_cons = NEXT_CMP(cp_cons);
        }
 
-       if (unlikely(agg_bufs > MAX_SKB_FRAGS)) {
+       if (unlikely(agg_bufs > MAX_SKB_FRAGS || TPA_END_ERRORS(tpa_end1))) {
                bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs);
-               netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
-                           agg_bufs, (int)MAX_SKB_FRAGS);
+               if (agg_bufs > MAX_SKB_FRAGS)
+                       netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
+                                   agg_bufs, (int)MAX_SKB_FRAGS);
                return NULL;
        }
 
@@ -1562,6 +1563,45 @@ next_rx_no_prod:
        return rc;
 }
 
+/* In netpoll mode, if we are using a combined completion ring, we need to
+ * discard the rx packets and recycle the buffers.
+ */
+static int bnxt_force_rx_discard(struct bnxt *bp, struct bnxt_napi *bnapi,
+                                u32 *raw_cons, u8 *event)
+{
+       struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+       u32 tmp_raw_cons = *raw_cons;
+       struct rx_cmp_ext *rxcmp1;
+       struct rx_cmp *rxcmp;
+       u16 cp_cons;
+       u8 cmp_type;
+
+       cp_cons = RING_CMP(tmp_raw_cons);
+       rxcmp = (struct rx_cmp *)
+                       &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+
+       tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons);
+       cp_cons = RING_CMP(tmp_raw_cons);
+       rxcmp1 = (struct rx_cmp_ext *)
+                       &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+
+       if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
+               return -EBUSY;
+
+       cmp_type = RX_CMP_TYPE(rxcmp);
+       if (cmp_type == CMP_TYPE_RX_L2_CMP) {
+               rxcmp1->rx_cmp_cfa_code_errors_v2 |=
+                       cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR);
+       } else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) {
+               struct rx_tpa_end_cmp_ext *tpa_end1;
+
+               tpa_end1 = (struct rx_tpa_end_cmp_ext *)rxcmp1;
+               tpa_end1->rx_tpa_end_cmp_errors_v2 |=
+                       cpu_to_le32(RX_TPA_END_CMP_ERRORS);
+       }
+       return bnxt_rx_pkt(bp, bnapi, raw_cons, event);
+}
+
 #define BNXT_GET_EVENT_PORT(data)      \
        ((data) &                       \
         ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK)
@@ -1744,7 +1784,11 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
                        if (unlikely(tx_pkts > bp->tx_wake_thresh))
                                rx_pkts = budget;
                } else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) {
-                       rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+                       if (likely(budget))
+                               rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+                       else
+                               rc = bnxt_force_rx_discard(bp, bnapi, &raw_cons,
+                                                          &event);
                        if (likely(rc >= 0))
                                rx_pkts += rc;
                        else if (rc == -EBUSY)  /* partial completion */
@@ -6663,12 +6707,11 @@ static void bnxt_poll_controller(struct net_device *dev)
        struct bnxt *bp = netdev_priv(dev);
        int i;
 
-       for (i = 0; i < bp->cp_nr_rings; i++) {
-               struct bnxt_irq *irq = &bp->irq_tbl[i];
+       /* Only process tx rings/combined rings in netpoll mode. */
+       for (i = 0; i < bp->tx_nr_rings; i++) {
+               struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
 
-               disable_irq(irq->vector);
-               irq->handler(irq->vector, bp->bnapi[i]);
-               enable_irq(irq->vector);
+               napi_schedule(&txr->bnapi->napi);
        }
 }
 #endif
index 3ef42db..d46a850 100644 (file)
@@ -374,12 +374,16 @@ struct rx_tpa_end_cmp_ext {
 
        __le32 rx_tpa_end_cmp_errors_v2;
        #define RX_TPA_END_CMP_V2                               (0x1 << 0)
-       #define RX_TPA_END_CMP_ERRORS                           (0x7fff << 1)
+       #define RX_TPA_END_CMP_ERRORS                           (0x3 << 1)
        #define RX_TPA_END_CMPL_ERRORS_SHIFT                     1
 
        u32 rx_tpa_end_cmp_start_opaque;
 };
 
+#define TPA_END_ERRORS(rx_tpa_end_ext)                                 \
+       ((rx_tpa_end_ext)->rx_tpa_end_cmp_errors_v2 &                   \
+        cpu_to_le32(RX_TPA_END_CMP_ERRORS))
+
 #define DB_IDX_MASK                                            0xffffff
 #define DB_IDX_VALID                                           (0x1 << 26)
 #define DB_IRQ_DIS                                             (0x1 << 27)
index 77ed2f6..53309f6 100644 (file)
@@ -2171,9 +2171,10 @@ static int cxgb_up(struct adapter *adap)
 {
        int err;
 
+       mutex_lock(&uld_mutex);
        err = setup_sge_queues(adap);
        if (err)
-               goto out;
+               goto rel_lock;
        err = setup_rss(adap);
        if (err)
                goto freeq;
@@ -2197,7 +2198,6 @@ static int cxgb_up(struct adapter *adap)
                        goto irq_err;
        }
 
-       mutex_lock(&uld_mutex);
        enable_rx(adap);
        t4_sge_start(adap);
        t4_intr_enable(adap);
@@ -2210,13 +2210,15 @@ static int cxgb_up(struct adapter *adap)
 #endif
        /* Initialize hash mac addr list*/
        INIT_LIST_HEAD(&adap->mac_hlist);
- out:
        return err;
+
  irq_err:
        dev_err(adap->pdev_dev, "request_irq failed, err %d\n", err);
  freeq:
        t4_free_sge_resources(adap);
-       goto out;
+ rel_lock:
+       mutex_unlock(&uld_mutex);
+       return err;
 }
 
 static void cxgb_down(struct adapter *adapter)
@@ -4525,7 +4527,7 @@ static void dummy_setup(struct net_device *dev)
        /* Initialize the device structure. */
        dev->netdev_ops = &cxgb4_mgmt_netdev_ops;
        dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
 }
 
 static int config_mgmt_dev(struct pci_dev *pdev)
index 9a520e4..290ad05 100644 (file)
@@ -2647,7 +2647,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
        priv->buf_layout[TX].priv_data_size = DPAA_TX_PRIV_DATA_SIZE; /* Tx */
 
        /* device used for DMA mapping */
-       arch_setup_dma_ops(dev, 0, 0, NULL, false);
+       set_dma_ops(dev, get_dma_ops(&pdev->dev));
        err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40));
        if (err) {
                dev_err(dev, "dma_coerce_mask_and_coherent() failed\n");
index dc0850b..8870a9a 100644 (file)
@@ -2,6 +2,7 @@ config FSL_FMAN
        tristate "FMan support"
        depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST
        select GENERIC_ALLOCATOR
+       depends on HAS_DMA
        select PHYLIB
        default n
        help
index 0b31f85..6e67d22 100644 (file)
@@ -623,6 +623,8 @@ static struct platform_device *dpaa_eth_add_device(int fman_id,
                goto no_mem;
        }
 
+       set_dma_ops(&pdev->dev, get_dma_ops(priv->dev));
+
        ret = platform_device_add_data(pdev, &data, sizeof(data));
        if (ret)
                goto err;
index e13aa06..7a8addd 100644 (file)
@@ -29,10 +29,9 @@ enum _dsm_rst_type {
        HNS_ROCE_RESET_FUNC     = 0x7,
 };
 
-const u8 hns_dsaf_acpi_dsm_uuid[] = {
-       0x1A, 0xAA, 0x85, 0x1A, 0x93, 0xE2, 0x5E, 0x41,
-       0x8E, 0x28, 0x8D, 0x69, 0x0A, 0x0F, 0x82, 0x0A
-};
+static const guid_t hns_dsaf_acpi_dsm_guid =
+       GUID_INIT(0x1A85AA1A, 0xE293, 0x415E,
+                 0x8E, 0x28, 0x8D, 0x69, 0x0A, 0x0F, 0x82, 0x0A);
 
 static void dsaf_write_sub(struct dsaf_device *dsaf_dev, u32 reg, u32 val)
 {
@@ -151,7 +150,7 @@ static void hns_dsaf_acpi_srst_by_port(struct dsaf_device *dsaf_dev, u8 op_type,
        argv4.package.elements = obj_args;
 
        obj = acpi_evaluate_dsm(ACPI_HANDLE(dsaf_dev->dev),
-                               hns_dsaf_acpi_dsm_uuid, 0, op_type, &argv4);
+                               &hns_dsaf_acpi_dsm_guid, 0, op_type, &argv4);
        if (!obj) {
                dev_warn(dsaf_dev->dev, "reset port_type%d port%d fail!",
                         port_type, port);
@@ -434,7 +433,7 @@ static phy_interface_t hns_mac_get_phy_if_acpi(struct hns_mac_cb *mac_cb)
        argv4.package.elements = &obj_args,
 
        obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
-                               hns_dsaf_acpi_dsm_uuid, 0,
+                               &hns_dsaf_acpi_dsm_guid, 0,
                                HNS_OP_GET_PORT_TYPE_FUNC, &argv4);
 
        if (!obj || obj->type != ACPI_TYPE_INTEGER)
@@ -474,7 +473,7 @@ int hns_mac_get_sfp_prsnt_acpi(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
        argv4.package.elements = &obj_args,
 
        obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
-                               hns_dsaf_acpi_dsm_uuid, 0,
+                               &hns_dsaf_acpi_dsm_guid, 0,
                                HNS_OP_GET_SFP_STAT_FUNC, &argv4);
 
        if (!obj || obj->type != ACPI_TYPE_INTEGER)
@@ -565,7 +564,7 @@ hns_mac_config_sds_loopback_acpi(struct hns_mac_cb *mac_cb, bool en)
        argv4.package.elements = obj_args;
 
        obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dsaf_dev->dev),
-                               hns_dsaf_acpi_dsm_uuid, 0,
+                               &hns_dsaf_acpi_dsm_guid, 0,
                                HNS_OP_SERDES_LP_FUNC, &argv4);
        if (!obj) {
                dev_warn(mac_cb->dsaf_dev->dev, "set port%d serdes lp fail!",
index b8fab14..e95795b 100644 (file)
@@ -288,9 +288,15 @@ static int hns_nic_config_phy_loopback(struct phy_device *phy_dev, u8 en)
 
                /* Force 1000M Link, Default is 0x0200 */
                phy_write(phy_dev, 7, 0x20C);
-               phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
 
-               /* Enable PHY loop-back */
+               /* Powerup Fiber */
+               phy_write(phy_dev, HNS_PHY_PAGE_REG, 1);
+               val = phy_read(phy_dev, COPPER_CONTROL_REG);
+               val &= ~PHY_POWER_DOWN;
+               phy_write(phy_dev, COPPER_CONTROL_REG, val);
+
+               /* Enable Phy Loopback */
+               phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
                val = phy_read(phy_dev, COPPER_CONTROL_REG);
                val |= PHY_LOOP_BACK;
                val &= ~PHY_POWER_DOWN;
@@ -299,6 +305,12 @@ static int hns_nic_config_phy_loopback(struct phy_device *phy_dev, u8 en)
                phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA);
                phy_write(phy_dev, 1, 0x400);
                phy_write(phy_dev, 7, 0x200);
+
+               phy_write(phy_dev, HNS_PHY_PAGE_REG, 1);
+               val = phy_read(phy_dev, COPPER_CONTROL_REG);
+               val |= PHY_POWER_DOWN;
+               phy_write(phy_dev, COPPER_CONTROL_REG, val);
+
                phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
                phy_write(phy_dev, 9, 0xF00);
 
index 508923f..259e69a 100644 (file)
@@ -343,6 +343,7 @@ static int emac_reset(struct emac_instance *dev)
 {
        struct emac_regs __iomem *p = dev->emacp;
        int n = 20;
+       bool __maybe_unused try_internal_clock = false;
 
        DBG(dev, "reset" NL);
 
@@ -355,6 +356,7 @@ static int emac_reset(struct emac_instance *dev)
        }
 
 #ifdef CONFIG_PPC_DCR_NATIVE
+do_retry:
        /*
         * PPC460EX/GT Embedded Processor Advanced User's Manual
         * section 28.10.1 Mode Register 0 (EMACx_MR0) states:
@@ -362,10 +364,19 @@ static int emac_reset(struct emac_instance *dev)
         * of the EMAC. If none is present, select the internal clock
         * (SDR0_ETH_CFG[EMACx_PHY_CLK] = 1).
         * After a soft reset, select the external clock.
+        *
+        * The AR8035-A PHY Meraki MR24 does not provide a TX Clk if the
+        * ethernet cable is not attached. This causes the reset to timeout
+        * and the PHY detection code in emac_init_phy() is unable to
+        * communicate and detect the AR8035-A PHY. As a result, the emac
+        * driver bails out early and the user has no ethernet.
+        * In order to stay compatible with existing configurations, the
+        * driver will temporarily switch to the internal clock, after
+        * the first reset fails.
         */
        if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) {
-               if (dev->phy_address == 0xffffffff &&
-                   dev->phy_map == 0xffffffff) {
+               if (try_internal_clock || (dev->phy_address == 0xffffffff &&
+                                          dev->phy_map == 0xffffffff)) {
                        /* No PHY: select internal loop clock before reset */
                        dcri_clrset(SDR0, SDR0_ETH_CFG,
                                    0, SDR0_ETH_CFG_ECS << dev->cell_index);
@@ -383,8 +394,15 @@ static int emac_reset(struct emac_instance *dev)
 
 #ifdef CONFIG_PPC_DCR_NATIVE
        if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) {
-               if (dev->phy_address == 0xffffffff &&
-                   dev->phy_map == 0xffffffff) {
+               if (!n && !try_internal_clock) {
+                       /* first attempt has timed out. */
+                       n = 20;
+                       try_internal_clock = true;
+                       goto do_retry;
+               }
+
+               if (try_internal_clock || (dev->phy_address == 0xffffffff &&
+                                          dev->phy_map == 0xffffffff)) {
                        /* No PHY: restore external clock source after reset */
                        dcri_clrset(SDR0, SDR0_ETH_CFG,
                                    SDR0_ETH_CFG_ECS << dev->cell_index, 0);
@@ -2460,20 +2478,24 @@ static int emac_mii_bus_reset(struct mii_bus *bus)
        return emac_reset(dev);
 }
 
+static int emac_mdio_phy_start_aneg(struct mii_phy *phy,
+                                   struct phy_device *phy_dev)
+{
+       phy_dev->autoneg = phy->autoneg;
+       phy_dev->speed = phy->speed;
+       phy_dev->duplex = phy->duplex;
+       phy_dev->advertising = phy->advertising;
+       return phy_start_aneg(phy_dev);
+}
+
 static int emac_mdio_setup_aneg(struct mii_phy *phy, u32 advertise)
 {
        struct net_device *ndev = phy->dev;
        struct emac_instance *dev = netdev_priv(ndev);
 
-       dev->phy.autoneg = AUTONEG_ENABLE;
-       dev->phy.speed = SPEED_1000;
-       dev->phy.duplex = DUPLEX_FULL;
-       dev->phy.advertising = advertise;
        phy->autoneg = AUTONEG_ENABLE;
-       phy->speed = dev->phy.speed;
-       phy->duplex = dev->phy.duplex;
        phy->advertising = advertise;
-       return phy_start_aneg(dev->phy_dev);
+       return emac_mdio_phy_start_aneg(phy, dev->phy_dev);
 }
 
 static int emac_mdio_setup_forced(struct mii_phy *phy, int speed, int fd)
@@ -2481,13 +2503,10 @@ static int emac_mdio_setup_forced(struct mii_phy *phy, int speed, int fd)
        struct net_device *ndev = phy->dev;
        struct emac_instance *dev = netdev_priv(ndev);
 
-       dev->phy.autoneg =  AUTONEG_DISABLE;
-       dev->phy.speed = speed;
-       dev->phy.duplex = fd;
        phy->autoneg = AUTONEG_DISABLE;
        phy->speed = speed;
        phy->duplex = fd;
-       return phy_start_aneg(dev->phy_dev);
+       return emac_mdio_phy_start_aneg(phy, dev->phy_dev);
 }
 
 static int emac_mdio_poll_link(struct mii_phy *phy)
@@ -2509,16 +2528,17 @@ static int emac_mdio_read_link(struct mii_phy *phy)
 {
        struct net_device *ndev = phy->dev;
        struct emac_instance *dev = netdev_priv(ndev);
+       struct phy_device *phy_dev = dev->phy_dev;
        int res;
 
-       res = phy_read_status(dev->phy_dev);
+       res = phy_read_status(phy_dev);
        if (res)
                return res;
 
-       dev->phy.speed = phy->speed;
-       dev->phy.duplex = phy->duplex;
-       dev->phy.pause = phy->pause;
-       dev->phy.asym_pause = phy->asym_pause;
+       phy->speed = phy_dev->speed;
+       phy->duplex = phy_dev->duplex;
+       phy->pause = phy_dev->pause;
+       phy->asym_pause = phy_dev->asym_pause;
        return 0;
 }
 
@@ -2528,13 +2548,6 @@ static int emac_mdio_init_phy(struct mii_phy *phy)
        struct emac_instance *dev = netdev_priv(ndev);
 
        phy_start(dev->phy_dev);
-       dev->phy.autoneg = phy->autoneg;
-       dev->phy.speed = phy->speed;
-       dev->phy.duplex = phy->duplex;
-       dev->phy.advertising = phy->advertising;
-       dev->phy.pause = phy->pause;
-       dev->phy.asym_pause = phy->asym_pause;
-
        return phy_init_hw(dev->phy_dev);
 }
 
index a93757c..c0fbeb3 100644 (file)
@@ -1468,6 +1468,11 @@ static void ibmvnic_netpoll_controller(struct net_device *dev)
 }
 #endif
 
+static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu)
+{
+       return -EOPNOTSUPP;
+}
+
 static const struct net_device_ops ibmvnic_netdev_ops = {
        .ndo_open               = ibmvnic_open,
        .ndo_stop               = ibmvnic_close,
@@ -1479,6 +1484,7 @@ static const struct net_device_ops ibmvnic_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = ibmvnic_netpoll_controller,
 #endif
+       .ndo_change_mtu         = ibmvnic_change_mtu,
 };
 
 /* ethtool functions */
index cdde3cc..44d9610 100644 (file)
@@ -399,6 +399,7 @@ struct i40e_pf {
 #define I40E_FLAG_RX_CSUM_ENABLED              BIT_ULL(1)
 #define I40E_FLAG_MSI_ENABLED                  BIT_ULL(2)
 #define I40E_FLAG_MSIX_ENABLED                 BIT_ULL(3)
+#define I40E_FLAG_HW_ATR_EVICT_ENABLED         BIT_ULL(4)
 #define I40E_FLAG_RSS_ENABLED                  BIT_ULL(6)
 #define I40E_FLAG_VMDQ_ENABLED                 BIT_ULL(7)
 #define I40E_FLAG_IWARP_ENABLED                        BIT_ULL(10)
index 7a8eb48..894c8e5 100644 (file)
@@ -224,7 +224,7 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
        I40E_PRIV_FLAG("LinkPolling", I40E_FLAG_LINK_POLLING_ENABLED, 0),
        I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
        I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
-       I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_CAPABLE, 0),
+       I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
        I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
 };
 
@@ -4092,7 +4092,7 @@ flags_complete:
 
        /* Only allow ATR evict on hardware that is capable of handling it */
        if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
-               pf->flags &= ~I40E_FLAG_HW_ATR_EVICT_CAPABLE;
+               pf->flags &= ~I40E_FLAG_HW_ATR_EVICT_ENABLED;
 
        if (changed_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT) {
                u16 sw_flags = 0, valid_flags = 0;
index 150caf6..a7a4b28 100644 (file)
@@ -8821,11 +8821,12 @@ static int i40e_sw_init(struct i40e_pf *pf)
                    (pf->hw.aq.api_min_ver > 4))) {
                /* Supported in FW API version higher than 1.4 */
                pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE;
-               pf->flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
-       } else {
-               pf->flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
        }
 
+       /* Enable HW ATR eviction if possible */
+       if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
+               pf->flags |= I40E_FLAG_HW_ATR_EVICT_ENABLED;
+
        pf->eeprom_version = 0xDEAD;
        pf->lan_veb = I40E_NO_VEB;
        pf->lan_vsi = I40E_NO_VSI;
index cd894f4..77115c2 100644 (file)
@@ -2341,7 +2341,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
        /* Due to lack of space, no more new filters can be programmed */
        if (th->syn && (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED))
                return;
-       if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) {
+       if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED) {
                /* HW ATR eviction will take care of removing filters on FIN
                 * and RST packets.
                 */
@@ -2403,7 +2403,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
                        I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
                        I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
 
-       if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
+       if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED)
                dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
 
        fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
index 95c23fb..0fb38ca 100644 (file)
@@ -3017,10 +3017,12 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
                                           VLAN_VID_MASK));
        }
 
+       spin_unlock_bh(&vsi->mac_filter_hash_lock);
        if (vlan_id || qos)
                ret = i40e_vsi_add_pvid(vsi, vlanprio);
        else
                i40e_vsi_remove_pvid(vsi);
+       spin_lock_bh(&vsi->mac_filter_hash_lock);
 
        if (vlan_id) {
                dev_info(&pf->pdev->dev, "Setting VLAN %d, QOS 0x%x on VF %d\n",
index 9b875d7..33c9016 100644 (file)
@@ -3719,7 +3719,7 @@ static void mvpp2_bm_bufs_get_addrs(struct device *dev, struct mvpp2 *priv,
                                    dma_addr_t *dma_addr,
                                    phys_addr_t *phys_addr)
 {
-       int cpu = smp_processor_id();
+       int cpu = get_cpu();
 
        *dma_addr = mvpp2_percpu_read(priv, cpu,
                                      MVPP2_BM_PHY_ALLOC_REG(bm_pool->id));
@@ -3740,6 +3740,8 @@ static void mvpp2_bm_bufs_get_addrs(struct device *dev, struct mvpp2 *priv,
                if (sizeof(phys_addr_t) == 8)
                        *phys_addr |= (u64)phys_addr_highbits << 32;
        }
+
+       put_cpu();
 }
 
 /* Free all buffers from the pool */
@@ -3920,18 +3922,12 @@ static inline u32 mvpp2_bm_cookie_pool_set(u32 cookie, int pool)
        return bm;
 }
 
-/* Get pool number from a BM cookie */
-static inline int mvpp2_bm_cookie_pool_get(unsigned long cookie)
-{
-       return (cookie >> MVPP2_BM_COOKIE_POOL_OFFS) & 0xFF;
-}
-
 /* Release buffer to BM */
 static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
                                     dma_addr_t buf_dma_addr,
                                     phys_addr_t buf_phys_addr)
 {
-       int cpu = smp_processor_id();
+       int cpu = get_cpu();
 
        if (port->priv->hw_version == MVPP22) {
                u32 val = 0;
@@ -3958,15 +3954,15 @@ static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
                           MVPP2_BM_VIRT_RLS_REG, buf_phys_addr);
        mvpp2_percpu_write(port->priv, cpu,
                           MVPP2_BM_PHY_RLS_REG(pool), buf_dma_addr);
+
+       put_cpu();
 }
 
 /* Refill BM pool */
-static void mvpp2_pool_refill(struct mvpp2_port *port, u32 bm,
+static void mvpp2_pool_refill(struct mvpp2_port *port, int pool,
                              dma_addr_t dma_addr,
                              phys_addr_t phys_addr)
 {
-       int pool = mvpp2_bm_cookie_pool_get(bm);
-
        mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
 }
 
@@ -4186,8 +4182,6 @@ static void mvpp22_port_mii_set(struct mvpp2_port *port)
 {
        u32 val;
 
-       return;
-
        /* Only GOP port 0 has an XLG MAC */
        if (port->gop_id == 0) {
                val = readl(port->base + MVPP22_XLG_CTRL3_REG);
@@ -4515,21 +4509,6 @@ static void mvpp2_rxq_offset_set(struct mvpp2_port *port,
        mvpp2_write(port->priv, MVPP2_RXQ_CONFIG_REG(prxq), val);
 }
 
-/* Obtain BM cookie information from descriptor */
-static u32 mvpp2_bm_cookie_build(struct mvpp2_port *port,
-                                struct mvpp2_rx_desc *rx_desc)
-{
-       int cpu = smp_processor_id();
-       int pool;
-
-       pool = (mvpp2_rxdesc_status_get(port, rx_desc) &
-               MVPP2_RXD_BM_POOL_ID_MASK) >>
-               MVPP2_RXD_BM_POOL_ID_OFFS;
-
-       return ((pool & 0xFF) << MVPP2_BM_COOKIE_POOL_OFFS) |
-              ((cpu & 0xFF) << MVPP2_BM_COOKIE_CPU_OFFS);
-}
-
 /* Tx descriptors helper methods */
 
 /* Get pointer to next Tx descriptor to be processed (send) by HW */
@@ -4757,7 +4736,7 @@ static void mvpp2_txp_max_tx_size_set(struct mvpp2_port *port)
 static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port,
                                   struct mvpp2_rx_queue *rxq)
 {
-       int cpu = smp_processor_id();
+       int cpu = get_cpu();
 
        if (rxq->pkts_coal > MVPP2_OCCUPIED_THRESH_MASK)
                rxq->pkts_coal = MVPP2_OCCUPIED_THRESH_MASK;
@@ -4765,6 +4744,8 @@ static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port,
        mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
        mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_THRESH_REG,
                           rxq->pkts_coal);
+
+       put_cpu();
 }
 
 static u32 mvpp2_usec_to_cycles(u32 usec, unsigned long clk_hz)
@@ -4945,7 +4926,7 @@ static int mvpp2_rxq_init(struct mvpp2_port *port,
        mvpp2_write(port->priv, MVPP2_RXQ_STATUS_REG(rxq->id), 0);
 
        /* Set Rx descriptors queue starting address - indirect access */
-       cpu = smp_processor_id();
+       cpu = get_cpu();
        mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
        if (port->priv->hw_version == MVPP21)
                rxq_dma = rxq->descs_dma;
@@ -4954,6 +4935,7 @@ static int mvpp2_rxq_init(struct mvpp2_port *port,
        mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_ADDR_REG, rxq_dma);
        mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_SIZE_REG, rxq->size);
        mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_INDEX_REG, 0);
+       put_cpu();
 
        /* Set Offset */
        mvpp2_rxq_offset_set(port, rxq->id, NET_SKB_PAD);
@@ -4980,9 +4962,13 @@ static void mvpp2_rxq_drop_pkts(struct mvpp2_port *port,
 
        for (i = 0; i < rx_received; i++) {
                struct mvpp2_rx_desc *rx_desc = mvpp2_rxq_next_desc_get(rxq);
-               u32 bm = mvpp2_bm_cookie_build(port, rx_desc);
+               u32 status = mvpp2_rxdesc_status_get(port, rx_desc);
+               int pool;
+
+               pool = (status & MVPP2_RXD_BM_POOL_ID_MASK) >>
+                       MVPP2_RXD_BM_POOL_ID_OFFS;
 
-               mvpp2_pool_refill(port, bm,
+               mvpp2_pool_refill(port, pool,
                                  mvpp2_rxdesc_dma_addr_get(port, rx_desc),
                                  mvpp2_rxdesc_cookie_get(port, rx_desc));
        }
@@ -5012,10 +4998,11 @@ static void mvpp2_rxq_deinit(struct mvpp2_port *port,
         * free descriptor number
         */
        mvpp2_write(port->priv, MVPP2_RXQ_STATUS_REG(rxq->id), 0);
-       cpu = smp_processor_id();
+       cpu = get_cpu();
        mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_NUM_REG, rxq->id);
        mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_ADDR_REG, 0);
        mvpp2_percpu_write(port->priv, cpu, MVPP2_RXQ_DESC_SIZE_REG, 0);
+       put_cpu();
 }
 
 /* Create and initialize a Tx queue */
@@ -5038,7 +5025,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
        txq->last_desc = txq->size - 1;
 
        /* Set Tx descriptors queue starting address - indirect access */
-       cpu = smp_processor_id();
+       cpu = get_cpu();
        mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
        mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_ADDR_REG,
                           txq->descs_dma);
@@ -5063,6 +5050,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
        mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG,
                           MVPP2_PREF_BUF_PTR(desc) | MVPP2_PREF_BUF_SIZE_16 |
                           MVPP2_PREF_BUF_THRESH(desc_per_txq / 2));
+       put_cpu();
 
        /* WRR / EJP configuration - indirect access */
        tx_port_num = mvpp2_egress_port(port);
@@ -5133,10 +5121,11 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port,
        mvpp2_write(port->priv, MVPP2_TXQ_SCHED_TOKEN_CNTR_REG(txq->id), 0);
 
        /* Set Tx descriptors queue starting address and size */
-       cpu = smp_processor_id();
+       cpu = get_cpu();
        mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
        mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_ADDR_REG, 0);
        mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_DESC_SIZE_REG, 0);
+       put_cpu();
 }
 
 /* Cleanup Tx ports */
@@ -5146,7 +5135,7 @@ static void mvpp2_txq_clean(struct mvpp2_port *port, struct mvpp2_tx_queue *txq)
        int delay, pending, cpu;
        u32 val;
 
-       cpu = smp_processor_id();
+       cpu = get_cpu();
        mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
        val = mvpp2_percpu_read(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG);
        val |= MVPP2_TXQ_DRAIN_EN_MASK;
@@ -5173,6 +5162,7 @@ static void mvpp2_txq_clean(struct mvpp2_port *port, struct mvpp2_tx_queue *txq)
 
        val &= ~MVPP2_TXQ_DRAIN_EN_MASK;
        mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_PREF_BUF_REG, val);
+       put_cpu();
 
        for_each_present_cpu(cpu) {
                txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
@@ -5420,7 +5410,7 @@ static void mvpp2_rx_csum(struct mvpp2_port *port, u32 status,
 
 /* Reuse skb if possible, or allocate a new skb and add it to BM pool */
 static int mvpp2_rx_refill(struct mvpp2_port *port,
-                          struct mvpp2_bm_pool *bm_pool, u32 bm)
+                          struct mvpp2_bm_pool *bm_pool, int pool)
 {
        dma_addr_t dma_addr;
        phys_addr_t phys_addr;
@@ -5432,7 +5422,7 @@ static int mvpp2_rx_refill(struct mvpp2_port *port,
        if (!buf)
                return -ENOMEM;
 
-       mvpp2_pool_refill(port, bm, dma_addr, phys_addr);
+       mvpp2_pool_refill(port, pool, dma_addr, phys_addr);
 
        return 0;
 }
@@ -5490,7 +5480,7 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
                unsigned int frag_size;
                dma_addr_t dma_addr;
                phys_addr_t phys_addr;
-               u32 bm, rx_status;
+               u32 rx_status;
                int pool, rx_bytes, err;
                void *data;
 
@@ -5502,8 +5492,8 @@ static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
                phys_addr = mvpp2_rxdesc_cookie_get(port, rx_desc);
                data = (void *)phys_to_virt(phys_addr);
 
-               bm = mvpp2_bm_cookie_build(port, rx_desc);
-               pool = mvpp2_bm_cookie_pool_get(bm);
+               pool = (rx_status & MVPP2_RXD_BM_POOL_ID_MASK) >>
+                       MVPP2_RXD_BM_POOL_ID_OFFS;
                bm_pool = &port->priv->bm_pools[pool];
 
                /* In case of an error, release the requested buffer pointer
@@ -5516,7 +5506,7 @@ err_drop_frame:
                        dev->stats.rx_errors++;
                        mvpp2_rx_error(port, rx_desc);
                        /* Return the buffer to the pool */
-                       mvpp2_pool_refill(port, bm, dma_addr, phys_addr);
+                       mvpp2_pool_refill(port, pool, dma_addr, phys_addr);
                        continue;
                }
 
@@ -5531,7 +5521,7 @@ err_drop_frame:
                        goto err_drop_frame;
                }
 
-               err = mvpp2_rx_refill(port, bm_pool, bm);
+               err = mvpp2_rx_refill(port, bm_pool, pool);
                if (err) {
                        netdev_err(port->dev, "failed to refill BM pools\n");
                        goto err_drop_frame;
index 2fd044b..944fc17 100644 (file)
@@ -458,13 +458,15 @@ struct mlx5e_mpw_info {
 
 struct mlx5e_rx_am_stats {
        int ppms; /* packets per msec */
+       int bpms; /* bytes per msec */
        int epms; /* events per msec */
 };
 
 struct mlx5e_rx_am_sample {
-       ktime_t         time;
-       unsigned int    pkt_ctr;
-       u16             event_ctr;
+       ktime_t time;
+       u32     pkt_ctr;
+       u32     byte_ctr;
+       u16     event_ctr;
 };
 
 struct mlx5e_rx_am { /* Adaptive Moderation */
index 8209aff..16486df 100644 (file)
@@ -1242,11 +1242,11 @@ static int mlx5e_get_ts_info(struct net_device *dev,
                                 SOF_TIMESTAMPING_RX_HARDWARE |
                                 SOF_TIMESTAMPING_RAW_HARDWARE;
 
-       info->tx_types = (BIT(1) << HWTSTAMP_TX_OFF) |
-                        (BIT(1) << HWTSTAMP_TX_ON);
+       info->tx_types = BIT(HWTSTAMP_TX_OFF) |
+                        BIT(HWTSTAMP_TX_ON);
 
-       info->rx_filters = (BIT(1) << HWTSTAMP_FILTER_NONE) |
-                          (BIT(1) << HWTSTAMP_FILTER_ALL);
+       info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+                          BIT(HWTSTAMP_FILTER_ALL);
 
        return 0;
 }
index 41cd22a..277f4de 100644 (file)
@@ -4241,7 +4241,8 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
        return netdev;
 
 err_cleanup_nic:
-       profile->cleanup(priv);
+       if (profile->cleanup)
+               profile->cleanup(priv);
        free_netdev(netdev);
 
        return NULL;
index 79462c0..46984a5 100644 (file)
@@ -791,6 +791,8 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
        params->tx_max_inline         = mlx5e_get_max_inline_cap(mdev);
        params->num_tc                = 1;
        params->lro_wqe_sz            = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+
+       mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
 }
 
 static void mlx5e_build_rep_netdev(struct net_device *netdev)
index 02dd3a9..acf32fe 100644 (file)
@@ -183,28 +183,27 @@ static void mlx5e_am_exit_parking(struct mlx5e_rx_am *am)
        mlx5e_am_step(am);
 }
 
+#define IS_SIGNIFICANT_DIFF(val, ref) \
+       (((100 * abs((val) - (ref))) / (ref)) > 10) /* more than 10% difference */
+
 static int mlx5e_am_stats_compare(struct mlx5e_rx_am_stats *curr,
                                  struct mlx5e_rx_am_stats *prev)
 {
-       int diff;
-
-       if (!prev->ppms)
-               return curr->ppms ? MLX5E_AM_STATS_BETTER :
+       if (!prev->bpms)
+               return curr->bpms ? MLX5E_AM_STATS_BETTER :
                                    MLX5E_AM_STATS_SAME;
 
-       diff = curr->ppms - prev->ppms;
-       if (((100 * abs(diff)) / prev->ppms) > 10) /* more than 10% diff */
-               return (diff > 0) ? MLX5E_AM_STATS_BETTER :
-                                   MLX5E_AM_STATS_WORSE;
+       if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
+               return (curr->bpms > prev->bpms) ? MLX5E_AM_STATS_BETTER :
+                                                  MLX5E_AM_STATS_WORSE;
 
-       if (!prev->epms)
-               return curr->epms ? MLX5E_AM_STATS_WORSE :
-                                   MLX5E_AM_STATS_SAME;
+       if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
+               return (curr->ppms > prev->ppms) ? MLX5E_AM_STATS_BETTER :
+                                                  MLX5E_AM_STATS_WORSE;
 
-       diff = curr->epms - prev->epms;
-       if (((100 * abs(diff)) / prev->epms) > 10) /* more than 10% diff */
-               return (diff < 0) ? MLX5E_AM_STATS_BETTER :
-                                   MLX5E_AM_STATS_WORSE;
+       if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
+               return (curr->epms < prev->epms) ? MLX5E_AM_STATS_BETTER :
+                                                  MLX5E_AM_STATS_WORSE;
 
        return MLX5E_AM_STATS_SAME;
 }
@@ -266,10 +265,13 @@ static void mlx5e_am_sample(struct mlx5e_rq *rq,
 {
        s->time      = ktime_get();
        s->pkt_ctr   = rq->stats.packets;
+       s->byte_ctr  = rq->stats.bytes;
        s->event_ctr = rq->cq.event_ctr;
 }
 
 #define MLX5E_AM_NEVENTS 64
+#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
+#define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) & (BIT_ULL(bits) - 1))
 
 static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start,
                                struct mlx5e_rx_am_sample *end,
@@ -277,13 +279,17 @@ static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start,
 {
        /* u32 holds up to 71 minutes, should be enough */
        u32 delta_us = ktime_us_delta(end->time, start->time);
-       unsigned int npkts = end->pkt_ctr - start->pkt_ctr;
+       u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr);
+       u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr,
+                            start->byte_ctr);
 
        if (!delta_us)
                return;
 
-       curr_stats->ppms =            (npkts * USEC_PER_MSEC) / delta_us;
-       curr_stats->epms = (MLX5E_AM_NEVENTS * USEC_PER_MSEC) / delta_us;
+       curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us);
+       curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us);
+       curr_stats->epms = DIV_ROUND_UP(MLX5E_AM_NEVENTS * USEC_PER_MSEC,
+                                       delta_us);
 }
 
 void mlx5e_rx_am_work(struct work_struct *work)
@@ -308,7 +314,8 @@ void mlx5e_rx_am(struct mlx5e_rq *rq)
 
        switch (am->state) {
        case MLX5E_AM_MEASURE_IN_PROGRESS:
-               nevents = rq->cq.event_ctr - am->start_sample.event_ctr;
+               nevents = BIT_GAP(BITS_PER_TYPE(u16), rq->cq.event_ctr,
+                                 am->start_sample.event_ctr);
                if (nevents < MLX5E_AM_NEVENTS)
                        break;
                mlx5e_am_sample(rq, &end_sample);
index 53e4992..f81c3aa 100644 (file)
@@ -417,20 +417,13 @@ struct mlx5e_stats {
 };
 
 static const struct counter_desc mlx5e_pme_status_desc[] = {
-       { "module_plug", 0 },
        { "module_unplug", 8 },
 };
 
 static const struct counter_desc mlx5e_pme_error_desc[] = {
-       { "module_pwr_budget_exd", 0 },  /* power budget exceed */
-       { "module_long_range", 8 },      /* long range for non MLNX cable */
-       { "module_bus_stuck", 16 },      /* bus stuck (I2C or data shorted) */
-       { "module_no_eeprom", 24 },      /* no eeprom/retry time out */
-       { "module_enforce_part", 32 },   /* enforce part number list */
-       { "module_unknown_id", 40 },     /* unknown identifier */
-       { "module_high_temp", 48 },      /* high temperature */
+       { "module_bus_stuck", 16 },       /* bus stuck (I2C or data shorted) */
+       { "module_high_temp", 48 },       /* high temperature */
        { "module_bad_shorted", 56 },    /* bad or shorted cable/module */
-       { "module_unknown_status", 64 },
 };
 
 #endif /* __MLX5_EN_STATS_H__ */
index ec63158..9df9fc0 100644 (file)
@@ -895,7 +895,6 @@ static struct mlx5_fields fields[] = {
        {MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0,  2, offsetof(struct pedit_headers, eth.h_source[4])},
        {MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE,  2, offsetof(struct pedit_headers, eth.h_proto)},
 
-       {MLX5_ACTION_IN_FIELD_OUT_IP_DSCP, 1, offsetof(struct pedit_headers, ip4.tos)},
        {MLX5_ACTION_IN_FIELD_OUT_IP_TTL,  1, offsetof(struct pedit_headers, ip4.ttl)},
        {MLX5_ACTION_IN_FIELD_OUT_SIPV4,   4, offsetof(struct pedit_headers, ip4.saddr)},
        {MLX5_ACTION_IN_FIELD_OUT_DIPV4,   4, offsetof(struct pedit_headers, ip4.daddr)},
index f991f66..a53e982 100644 (file)
@@ -906,21 +906,34 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
        return 0;
 }
 
-int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+static int mlx5_devlink_eswitch_check(struct devlink *devlink)
 {
-       struct mlx5_core_dev *dev;
-       u16 cur_mlx5_mode, mlx5_mode = 0;
+       struct mlx5_core_dev *dev = devlink_priv(devlink);
 
-       dev = devlink_priv(devlink);
+       if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+               return -EOPNOTSUPP;
 
        if (!MLX5_CAP_GEN(dev, vport_group_manager))
                return -EOPNOTSUPP;
 
-       cur_mlx5_mode = dev->priv.eswitch->mode;
-
-       if (cur_mlx5_mode == SRIOV_NONE)
+       if (dev->priv.eswitch->mode == SRIOV_NONE)
                return -EOPNOTSUPP;
 
+       return 0;
+}
+
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+       struct mlx5_core_dev *dev = devlink_priv(devlink);
+       u16 cur_mlx5_mode, mlx5_mode = 0;
+       int err;
+
+       err = mlx5_devlink_eswitch_check(devlink);
+       if (err)
+               return err;
+
+       cur_mlx5_mode = dev->priv.eswitch->mode;
+
        if (esw_mode_from_devlink(mode, &mlx5_mode))
                return -EINVAL;
 
@@ -937,15 +950,12 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
 
 int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 {
-       struct mlx5_core_dev *dev;
-
-       dev = devlink_priv(devlink);
-
-       if (!MLX5_CAP_GEN(dev, vport_group_manager))
-               return -EOPNOTSUPP;
+       struct mlx5_core_dev *dev = devlink_priv(devlink);
+       int err;
 
-       if (dev->priv.eswitch->mode == SRIOV_NONE)
-               return -EOPNOTSUPP;
+       err = mlx5_devlink_eswitch_check(devlink);
+       if (err)
+               return err;
 
        return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
 }
@@ -954,15 +964,12 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
 {
        struct mlx5_core_dev *dev = devlink_priv(devlink);
        struct mlx5_eswitch *esw = dev->priv.eswitch;
-       int num_vports = esw->enabled_vports;
        int err, vport;
        u8 mlx5_mode;
 
-       if (!MLX5_CAP_GEN(dev, vport_group_manager))
-               return -EOPNOTSUPP;
-
-       if (esw->mode == SRIOV_NONE)
-               return -EOPNOTSUPP;
+       err = mlx5_devlink_eswitch_check(devlink);
+       if (err)
+               return err;
 
        switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
        case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
@@ -985,7 +992,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
        if (err)
                goto out;
 
-       for (vport = 1; vport < num_vports; vport++) {
+       for (vport = 1; vport < esw->enabled_vports; vport++) {
                err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
                if (err) {
                        esw_warn(dev, "Failed to set min inline on vport %d\n",
@@ -1010,12 +1017,11 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
 {
        struct mlx5_core_dev *dev = devlink_priv(devlink);
        struct mlx5_eswitch *esw = dev->priv.eswitch;
+       int err;
 
-       if (!MLX5_CAP_GEN(dev, vport_group_manager))
-               return -EOPNOTSUPP;
-
-       if (esw->mode == SRIOV_NONE)
-               return -EOPNOTSUPP;
+       err = mlx5_devlink_eswitch_check(devlink);
+       if (err)
+               return err;
 
        return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
 }
@@ -1062,11 +1068,9 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap)
        struct mlx5_eswitch *esw = dev->priv.eswitch;
        int err;
 
-       if (!MLX5_CAP_GEN(dev, vport_group_manager))
-               return -EOPNOTSUPP;
-
-       if (esw->mode == SRIOV_NONE)
-               return -EOPNOTSUPP;
+       err = mlx5_devlink_eswitch_check(devlink);
+       if (err)
+               return err;
 
        if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
            (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) ||
@@ -1105,12 +1109,11 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
 {
        struct mlx5_core_dev *dev = devlink_priv(devlink);
        struct mlx5_eswitch *esw = dev->priv.eswitch;
+       int err;
 
-       if (!MLX5_CAP_GEN(dev, vport_group_manager))
-               return -EOPNOTSUPP;
-
-       if (esw->mode == SRIOV_NONE)
-               return -EOPNOTSUPP;
+       err = mlx5_devlink_eswitch_check(devlink);
+       if (err)
+               return err;
 
        *encap = esw->offloads.encap;
        return 0;
index 0e487e8..8f5125c 100644 (file)
@@ -862,7 +862,7 @@ struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace
        ft_attr.level   = level;
        ft_attr.prio    = prio;
 
-       return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, 0);
+       return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, vport);
 }
 
 struct mlx5_flow_table*
index 44f59b1..f27f84f 100644 (file)
@@ -275,10 +275,8 @@ static void poll_health(unsigned long data)
        struct mlx5_core_health *health = &dev->priv.health;
        u32 count;
 
-       if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-               mod_timer(&health->timer, get_next_poll_jiffies());
-               return;
-       }
+       if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+               goto out;
 
        count = ioread32be(health->health_counter);
        if (count == health->prev)
@@ -290,8 +288,6 @@ static void poll_health(unsigned long data)
        if (health->miss_counter == MAX_MISSES) {
                dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n");
                print_health_info(dev);
-       } else {
-               mod_timer(&health->timer, get_next_poll_jiffies());
        }
 
        if (in_fatal(dev) && !health->sick) {
@@ -305,6 +301,9 @@ static void poll_health(unsigned long data)
                                "new health works are not permitted at this stage\n");
                spin_unlock(&health->wq_lock);
        }
+
+out:
+       mod_timer(&health->timer, get_next_poll_jiffies());
 }
 
 void mlx5_start_health_poll(struct mlx5_core_dev *dev)
index af945ed..13be264 100644 (file)
@@ -175,8 +175,9 @@ static struct mlx5_profile profile[] = {
        },
 };
 
-#define FW_INIT_TIMEOUT_MILI   2000
-#define FW_INIT_WAIT_MS                2
+#define FW_INIT_TIMEOUT_MILI           2000
+#define FW_INIT_WAIT_MS                        2
+#define FW_PRE_INIT_TIMEOUT_MILI       10000
 
 static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
 {
@@ -537,8 +538,10 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
        /* disable cmdif checksum */
        MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
 
-       /* If the HCA supports 4K UARs use it */
-       if (MLX5_CAP_GEN_MAX(dev, uar_4k))
+       /* Enable 4K UAR only when HCA supports it and page size is bigger
+        * than 4K.
+        */
+       if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096)
                MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1);
 
        MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
@@ -1011,6 +1014,15 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
         */
        dev->state = MLX5_DEVICE_STATE_UP;
 
+       /* wait for firmware to accept initialization segments configurations
+        */
+       err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI);
+       if (err) {
+               dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n",
+                       FW_PRE_INIT_TIMEOUT_MILI);
+               goto out;
+       }
+
        err = mlx5_cmd_init(dev);
        if (err) {
                dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");
index 9f89c41..0744452 100644 (file)
@@ -3334,6 +3334,9 @@ static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
        u16 vid = vlan_dev_vlan_id(vlan_dev);
 
+       if (netif_is_bridge_port(vlan_dev))
+               return 0;
+
        if (mlxsw_sp_port_dev_check(real_dev))
                return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event,
                                                     vid);
index 483241b..a672f6a 100644 (file)
@@ -2956,7 +2956,7 @@ static u32 qed_grc_dump_ctx_data(struct qed_hwfn *p_hwfn,
                                qed_wr(p_hwfn,
                                       p_ptt,
                                       s_storm_defs[storm_id].cm_ctx_wr_addr,
-                                      BIT(9) | lid);
+                                      (i << 9) | lid);
                                *(dump_buf + offset) = qed_rd(p_hwfn,
                                                              p_ptt,
                                                              rd_reg_addr);
index 2ae8524..a9ce82d 100644 (file)
@@ -1505,8 +1505,8 @@ static int ofdpa_port_ipv4_nh(struct ofdpa_port *ofdpa_port,
                *index = entry->index;
                resolved = false;
        } else if (removing) {
-               ofdpa_neigh_del(trans, found);
                *index = found->index;
+               ofdpa_neigh_del(trans, found);
        } else if (updating) {
                ofdpa_neigh_update(found, trans, NULL, false);
                resolved = !is_zero_ether_addr(found->eth_dst);
index 78efb28..78f9e43 100644 (file)
@@ -4172,7 +4172,7 @@ found:
         * recipients
         */
        if (is_mc_recip) {
-               MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+               MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
                unsigned int depth, i;
 
                memset(inbuf, 0, sizeof(inbuf));
@@ -4320,7 +4320,7 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
                        efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
                } else {
                        efx_mcdi_display_error(efx, MC_CMD_FILTER_OP,
-                                              MC_CMD_FILTER_OP_IN_LEN,
+                                              MC_CMD_FILTER_OP_EXT_IN_LEN,
                                               NULL, 0, rc);
                }
        }
@@ -4453,7 +4453,7 @@ static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx,
                                      struct efx_filter_spec *spec)
 {
        struct efx_ef10_filter_table *table = efx->filter_state;
-       MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+       MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
        struct efx_filter_spec *saved_spec;
        unsigned int hash, i, depth = 1;
        bool replacing = false;
@@ -4940,7 +4940,7 @@ not_restored:
 static void efx_ef10_filter_table_remove(struct efx_nic *efx)
 {
        struct efx_ef10_filter_table *table = efx->filter_state;
-       MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+       MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
        struct efx_filter_spec *spec;
        unsigned int filter_idx;
        int rc;
@@ -5105,6 +5105,7 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
 
        /* Insert/renew filters */
        for (i = 0; i < addr_count; i++) {
+               EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID);
                efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
                efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr);
                rc = efx_ef10_filter_insert(efx, &spec, true);
@@ -5122,11 +5123,11 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
                                }
                                return rc;
                        } else {
-                               /* mark as not inserted, and carry on */
-                               rc = EFX_EF10_FILTER_ID_INVALID;
+                               /* keep invalid ID, and carry on */
                        }
+               } else {
+                       ids[i] = efx_ef10_filter_get_unsafe_id(rc);
                }
-               ids[i] = efx_ef10_filter_get_unsafe_id(rc);
        }
 
        if (multicast && rollback) {
index b7e4345..019cef1 100644 (file)
@@ -661,8 +661,6 @@ restore_filters:
                up_write(&vf->efx->filter_sem);
                mutex_unlock(&vf->efx->mac_lock);
 
-               up_write(&vf->efx->filter_sem);
-
                rc2 = efx_net_open(vf->efx->net_dev);
                if (rc2)
                        goto reset_nic;
index aa64764..e0ef02f 100644 (file)
@@ -214,13 +214,13 @@ static int dwmac4_wrback_get_tx_timestamp_status(struct dma_desc *p)
 {
        /* Context type from W/B descriptor must be zero */
        if (le32_to_cpu(p->des3) & TDES3_CONTEXT_TYPE)
-               return -EINVAL;
+               return 0;
 
        /* Tx Timestamp Status is 1 so des0 and des1'll have valid values */
        if (le32_to_cpu(p->des3) & TDES3_TIMESTAMP_STATUS)
-               return 0;
+               return 1;
 
-       return 1;
+       return 0;
 }
 
 static inline u64 dwmac4_get_timestamp(void *desc, u32 ats)
@@ -282,7 +282,10 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
                }
        }
 exit:
-       return ret;
+       if (likely(ret == 0))
+               return 1;
+
+       return 0;
 }
 
 static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
index 12236da..6e4cbc6 100644 (file)
@@ -434,14 +434,14 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
                return;
 
        /* check tx tstamp status */
-       if (!priv->hw->desc->get_tx_timestamp_status(p)) {
+       if (priv->hw->desc->get_tx_timestamp_status(p)) {
                /* get the valid tstamp */
                ns = priv->hw->desc->get_timestamp(p, priv->adv_ts);
 
                memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
                shhwtstamp.hwtstamp = ns_to_ktime(ns);
 
-               netdev_info(priv->dev, "get valid TX hw timestamp %llu\n", ns);
+               netdev_dbg(priv->dev, "get valid TX hw timestamp %llu\n", ns);
                /* pass tstamp to stack */
                skb_tstamp_tx(skb, &shhwtstamp);
        }
@@ -468,19 +468,19 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
                return;
 
        /* Check if timestamp is available */
-       if (!priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) {
+       if (priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) {
                /* For GMAC4, the valid timestamp is from CTX next desc. */
                if (priv->plat->has_gmac4)
                        ns = priv->hw->desc->get_timestamp(np, priv->adv_ts);
                else
                        ns = priv->hw->desc->get_timestamp(p, priv->adv_ts);
 
-               netdev_info(priv->dev, "get valid RX hw timestamp %llu\n", ns);
+               netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
                shhwtstamp = skb_hwtstamps(skb);
                memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
                shhwtstamp->hwtstamp = ns_to_ktime(ns);
        } else  {
-               netdev_err(priv->dev, "cannot get RX hw timestamp\n");
+               netdev_dbg(priv->dev, "cannot get RX hw timestamp\n");
        }
 }
 
@@ -546,7 +546,10 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
                        /* PTP v1, UDP, any kind of event packet */
                        config.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
                        /* take time stamp for all event messages */
-                       snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
+                       if (priv->plat->has_gmac4)
+                               snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1;
+                       else
+                               snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
 
                        ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA;
                        ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA;
@@ -578,7 +581,10 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
                        config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
                        ptp_v2 = PTP_TCR_TSVER2ENA;
                        /* take time stamp for all event messages */
-                       snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
+                       if (priv->plat->has_gmac4)
+                               snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1;
+                       else
+                               snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
 
                        ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA;
                        ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA;
@@ -612,7 +618,10 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
                        config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
                        ptp_v2 = PTP_TCR_TSVER2ENA;
                        /* take time stamp for all event messages */
-                       snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
+                       if (priv->plat->has_gmac4)
+                               snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1;
+                       else
+                               snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
 
                        ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA;
                        ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA;
@@ -2822,7 +2831,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
        tx_q->tx_skbuff_dma[first_entry].buf = des;
        tx_q->tx_skbuff_dma[first_entry].len = skb_headlen(skb);
-       tx_q->tx_skbuff[first_entry] = skb;
 
        first->des0 = cpu_to_le32(des);
 
@@ -2856,6 +2864,14 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
        tx_q->tx_skbuff_dma[tx_q->cur_tx].last_segment = true;
 
+       /* Only the last descriptor gets to point to the skb. */
+       tx_q->tx_skbuff[tx_q->cur_tx] = skb;
+
+       /* We've used all descriptors we need for this skb, however,
+        * advance cur_tx so that it references a fresh descriptor.
+        * ndo_start_xmit will fill this descriptor the next time it's
+        * called and stmmac_tx_clean may clean up to this descriptor.
+        */
        tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
 
        if (unlikely(stmmac_tx_avail(priv, queue) <= (MAX_SKB_FRAGS + 1))) {
@@ -2989,8 +3005,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
        first = desc;
 
-       tx_q->tx_skbuff[first_entry] = skb;
-
        enh_desc = priv->plat->enh_desc;
        /* To program the descriptors according to the size of the frame */
        if (enh_desc)
@@ -3038,8 +3052,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                                                skb->len);
        }
 
-       entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+       /* Only the last descriptor gets to point to the skb. */
+       tx_q->tx_skbuff[entry] = skb;
 
+       /* We've used all descriptors we need for this skb, however,
+        * advance cur_tx so that it references a fresh descriptor.
+        * ndo_start_xmit will fill this descriptor the next time it's
+        * called and stmmac_tx_clean may clean up to this descriptor.
+        */
+       entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
        tx_q->cur_tx = entry;
 
        if (netif_msg_pktdata(priv)) {
index 48fb72f..f4b31d6 100644 (file)
@@ -59,7 +59,8 @@
 /* Enable Snapshot for Messages Relevant to Master */
 #define        PTP_TCR_TSMSTRENA       BIT(15)
 /* Select PTP packets for Taking Snapshots */
-#define        PTP_TCR_SNAPTYPSEL_1    GENMASK(17, 16)
+#define        PTP_TCR_SNAPTYPSEL_1    BIT(16)
+#define        PTP_GMAC4_TCR_SNAPTYPSEL_1      GENMASK(17, 16)
 /* Enable MAC address for PTP Frame Filtering */
 #define        PTP_TCR_TSENMACADDR     BIT(18)
 
index 1562ab4..56ba411 100644 (file)
@@ -90,7 +90,7 @@ int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr)
        if (of_device_is_compatible(dev->of_node, "ti,dm816-emac"))
                return cpsw_am33xx_cm_get_macid(dev, 0x30, slave, mac_addr);
 
-       if (of_machine_is_compatible("ti,am4372"))
+       if (of_machine_is_compatible("ti,am43"))
                return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr);
 
        if (of_machine_is_compatible("ti,dra7"))
index 6ebb0f5..199459b 100644 (file)
@@ -1007,7 +1007,7 @@ static void geneve_setup(struct net_device *dev)
 
        dev->netdev_ops = &geneve_netdev_ops;
        dev->ethtool_ops = &geneve_ethtool_ops;
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
 
        SET_NETDEV_DEVTYPE(dev, &geneve_type);
 
index 7b652bb..ca110cd 100644 (file)
@@ -611,7 +611,7 @@ static const struct net_device_ops gtp_netdev_ops = {
 static void gtp_link_setup(struct net_device *dev)
 {
        dev->netdev_ops         = &gtp_netdev_ops;
-       dev->destructor         = free_netdev;
+       dev->needs_free_netdev  = true;
 
        dev->hard_header_len = 0;
        dev->addr_len = 0;
index 922bf44..021a8ec 100644 (file)
@@ -311,7 +311,7 @@ static void sp_setup(struct net_device *dev)
 {
        /* Finish setting up the DEVICE info. */
        dev->netdev_ops         = &sp_netdev_ops;
-       dev->destructor         = free_netdev;
+       dev->needs_free_netdev  = true;
        dev->mtu                = SIXP_MTU;
        dev->hard_header_len    = AX25_MAX_HEADER_LEN;
        dev->header_ops         = &ax25_header_ops;
index f62e7f3..78a6414 100644 (file)
@@ -476,7 +476,7 @@ static const struct net_device_ops bpq_netdev_ops = {
 static void bpq_setup(struct net_device *dev)
 {
        dev->netdev_ops      = &bpq_netdev_ops;
-       dev->destructor      = free_netdev;
+       dev->needs_free_netdev = true;
 
        memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN);
        memcpy(dev->dev_addr,  &ax25_defaddr, AX25_ADDR_LEN);
index 262b2ea..6066f1b 100644 (file)
@@ -171,6 +171,8 @@ struct rndis_device {
        spinlock_t request_lock;
        struct list_head req_list;
 
+       struct work_struct mcast_work;
+
        u8 hw_mac_adr[ETH_ALEN];
        u8 rss_key[NETVSC_HASH_KEYLEN];
        u16 ind_table[ITAB_NUM];
@@ -201,6 +203,7 @@ int rndis_filter_open(struct netvsc_device *nvdev);
 int rndis_filter_close(struct netvsc_device *nvdev);
 int rndis_filter_device_add(struct hv_device *dev,
                            struct netvsc_device_info *info);
+void rndis_filter_update(struct netvsc_device *nvdev);
 void rndis_filter_device_remove(struct hv_device *dev,
                                struct netvsc_device *nvdev);
 int rndis_filter_set_rss_param(struct rndis_device *rdev,
@@ -211,7 +214,6 @@ int rndis_filter_receive(struct net_device *ndev,
                         struct vmbus_channel *channel,
                         void *data, u32 buflen);
 
-int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter);
 int rndis_filter_set_device_mac(struct net_device *ndev, char *mac);
 
 void netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
@@ -696,7 +698,6 @@ struct net_device_context {
        /* list protection */
        spinlock_t lock;
 
-       struct work_struct work;
        u32 msg_enable; /* debug level */
 
        u32 tx_checksum_mask;
index 4421a6d..643c539 100644 (file)
@@ -56,37 +56,12 @@ static int debug = -1;
 module_param(debug, int, S_IRUGO);
 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 
-static void do_set_multicast(struct work_struct *w)
-{
-       struct net_device_context *ndevctx =
-               container_of(w, struct net_device_context, work);
-       struct hv_device *device_obj = ndevctx->device_ctx;
-       struct net_device *ndev = hv_get_drvdata(device_obj);
-       struct netvsc_device *nvdev = rcu_dereference(ndevctx->nvdev);
-       struct rndis_device *rdev;
-
-       if (!nvdev)
-               return;
-
-       rdev = nvdev->extension;
-       if (rdev == NULL)
-               return;
-
-       if (ndev->flags & IFF_PROMISC)
-               rndis_filter_set_packet_filter(rdev,
-                       NDIS_PACKET_TYPE_PROMISCUOUS);
-       else
-               rndis_filter_set_packet_filter(rdev,
-                       NDIS_PACKET_TYPE_BROADCAST |
-                       NDIS_PACKET_TYPE_ALL_MULTICAST |
-                       NDIS_PACKET_TYPE_DIRECTED);
-}
-
 static void netvsc_set_multicast_list(struct net_device *net)
 {
        struct net_device_context *net_device_ctx = netdev_priv(net);
+       struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 
-       schedule_work(&net_device_ctx->work);
+       rndis_filter_update(nvdev);
 }
 
 static int netvsc_open(struct net_device *net)
@@ -123,8 +98,6 @@ static int netvsc_close(struct net_device *net)
 
        netif_tx_disable(net);
 
-       /* Make sure netvsc_set_multicast_list doesn't re-enable filter! */
-       cancel_work_sync(&net_device_ctx->work);
        ret = rndis_filter_close(nvdev);
        if (ret != 0) {
                netdev_err(net, "unable to close device (ret %d).\n", ret);
@@ -803,7 +776,7 @@ static int netvsc_set_channels(struct net_device *net,
            channels->rx_count || channels->tx_count || channels->other_count)
                return -EINVAL;
 
-       if (count > net->num_tx_queues || count > net->num_rx_queues)
+       if (count > net->num_tx_queues || count > VRSS_CHANNEL_MAX)
                return -EINVAL;
 
        if (!nvdev || nvdev->destroy)
@@ -1028,7 +1001,7 @@ static const struct {
 static int netvsc_get_sset_count(struct net_device *dev, int string_set)
 {
        struct net_device_context *ndc = netdev_priv(dev);
-       struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+       struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 
        if (!nvdev)
                return -ENODEV;
@@ -1158,11 +1131,22 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-static void netvsc_poll_controller(struct net_device *net)
+static void netvsc_poll_controller(struct net_device *dev)
 {
-       /* As netvsc_start_xmit() works synchronous we don't have to
-        * trigger anything here.
-        */
+       struct net_device_context *ndc = netdev_priv(dev);
+       struct netvsc_device *ndev;
+       int i;
+
+       rcu_read_lock();
+       ndev = rcu_dereference(ndc->nvdev);
+       if (ndev) {
+               for (i = 0; i < ndev->num_chn; i++) {
+                       struct netvsc_channel *nvchan = &ndev->chan_table[i];
+
+                       napi_schedule(&nvchan->napi);
+               }
+       }
+       rcu_read_unlock();
 }
 #endif
 
@@ -1219,7 +1203,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
        rndis_dev = ndev->extension;
        if (indir) {
                for (i = 0; i < ITAB_NUM; i++)
-                       if (indir[i] >= dev->num_rx_queues)
+                       if (indir[i] >= VRSS_CHANNEL_MAX)
                                return -EINVAL;
 
                for (i = 0; i < ITAB_NUM; i++)
@@ -1552,7 +1536,6 @@ static int netvsc_probe(struct hv_device *dev,
        hv_set_drvdata(dev, net);
 
        INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
-       INIT_WORK(&net_device_ctx->work, do_set_multicast);
 
        spin_lock_init(&net_device_ctx->lock);
        INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
@@ -1622,7 +1605,6 @@ static int netvsc_remove(struct hv_device *dev)
        netif_device_detach(net);
 
        cancel_delayed_work_sync(&ndev_ctx->dwork);
-       cancel_work_sync(&ndev_ctx->work);
 
        /*
         * Call to the vsc driver to let it know that the device is being
index f9d5b0b..cb79cd0 100644 (file)
@@ -31,6 +31,7 @@
 
 #include "hyperv_net.h"
 
+static void rndis_set_multicast(struct work_struct *w);
 
 #define RNDIS_EXT_LEN PAGE_SIZE
 struct rndis_request {
@@ -76,6 +77,7 @@ static struct rndis_device *get_rndis_device(void)
        spin_lock_init(&device->request_lock);
 
        INIT_LIST_HEAD(&device->req_list);
+       INIT_WORK(&device->mcast_work, rndis_set_multicast);
 
        device->state = RNDIS_DEV_UNINITIALIZED;
 
@@ -815,7 +817,8 @@ static int rndis_filter_query_link_speed(struct rndis_device *dev)
        return ret;
 }
 
-int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
+static int rndis_filter_set_packet_filter(struct rndis_device *dev,
+                                         u32 new_filter)
 {
        struct rndis_request *request;
        struct rndis_set_request *set;
@@ -846,6 +849,28 @@ int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
        return ret;
 }
 
+static void rndis_set_multicast(struct work_struct *w)
+{
+       struct rndis_device *rdev
+               = container_of(w, struct rndis_device, mcast_work);
+
+       if (rdev->ndev->flags & IFF_PROMISC)
+               rndis_filter_set_packet_filter(rdev,
+                                              NDIS_PACKET_TYPE_PROMISCUOUS);
+       else
+               rndis_filter_set_packet_filter(rdev,
+                                              NDIS_PACKET_TYPE_BROADCAST |
+                                              NDIS_PACKET_TYPE_ALL_MULTICAST |
+                                              NDIS_PACKET_TYPE_DIRECTED);
+}
+
+void rndis_filter_update(struct netvsc_device *nvdev)
+{
+       struct rndis_device *rdev = nvdev->extension;
+
+       schedule_work(&rdev->mcast_work);
+}
+
 static int rndis_filter_init_device(struct rndis_device *dev)
 {
        struct rndis_request *request;
@@ -973,6 +998,9 @@ static int rndis_filter_close_device(struct rndis_device *dev)
        if (dev->state != RNDIS_DEV_DATAINITIALIZED)
                return 0;
 
+       /* Make sure rndis_set_multicast doesn't re-enable filter! */
+       cancel_work_sync(&dev->mcast_work);
+
        ret = rndis_filter_set_packet_filter(dev, 0);
        if (ret == -ENODEV)
                ret = 0;
index 312fce7..144ea5a 100644 (file)
@@ -207,7 +207,6 @@ static void ifb_dev_free(struct net_device *dev)
                __skb_queue_purge(&txp->tq);
        }
        kfree(dp->tx_private);
-       free_netdev(dev);
 }
 
 static void ifb_setup(struct net_device *dev)
@@ -230,7 +229,8 @@ static void ifb_setup(struct net_device *dev)
        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
        netif_keep_dst(dev);
        eth_hw_addr_random(dev);
-       dev->destructor = ifb_dev_free;
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = ifb_dev_free;
 }
 
 static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
index 618ed88..7c7680c 100644 (file)
@@ -632,7 +632,7 @@ void ipvlan_link_setup(struct net_device *dev)
        dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
        dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
        dev->netdev_ops = &ipvlan_netdev_ops;
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
        dev->header_ops = &ipvlan_header_ops;
        dev->ethtool_ops = &ipvlan_ethtool_ops;
 }
index 224f65c..3061249 100644 (file)
@@ -159,7 +159,6 @@ static void loopback_dev_free(struct net_device *dev)
 {
        dev_net(dev)->loopback_dev = NULL;
        free_percpu(dev->lstats);
-       free_netdev(dev);
 }
 
 static const struct net_device_ops loopback_ops = {
@@ -196,7 +195,8 @@ static void loopback_setup(struct net_device *dev)
        dev->ethtool_ops        = &loopback_ethtool_ops;
        dev->header_ops         = &eth_header_ops;
        dev->netdev_ops         = &loopback_ops;
-       dev->destructor         = loopback_dev_free;
+       dev->needs_free_netdev  = true;
+       dev->priv_destructor    = loopback_dev_free;
 }
 
 /* Setup and register the loopback device. */
index cdc347b..7941167 100644 (file)
@@ -2996,7 +2996,6 @@ static void macsec_free_netdev(struct net_device *dev)
        free_percpu(macsec->secy.tx_sc.stats);
 
        dev_put(real_dev);
-       free_netdev(dev);
 }
 
 static void macsec_setup(struct net_device *dev)
@@ -3006,7 +3005,8 @@ static void macsec_setup(struct net_device *dev)
        dev->max_mtu = ETH_MAX_MTU;
        dev->priv_flags |= IFF_NO_QUEUE;
        dev->netdev_ops = &macsec_netdev_ops;
-       dev->destructor = macsec_free_netdev;
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = macsec_free_netdev;
        SET_NETDEV_DEVTYPE(dev, &macsec_type);
 
        eth_zero_addr(dev->broadcast);
index 346ad2f..72b8018 100644 (file)
 #define MACVLAN_HASH_SIZE      (1<<MACVLAN_HASH_BITS)
 #define MACVLAN_BC_QUEUE_LEN   1000
 
+#define MACVLAN_F_PASSTHRU     1
+#define MACVLAN_F_ADDRCHANGE   2
+
 struct macvlan_port {
        struct net_device       *dev;
        struct hlist_head       vlan_hash[MACVLAN_HASH_SIZE];
        struct list_head        vlans;
        struct sk_buff_head     bc_queue;
        struct work_struct      bc_work;
-       bool                    passthru;
+       u32                     flags;
        int                     count;
        struct hlist_head       vlan_source_hash[MACVLAN_HASH_SIZE];
        DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
+       unsigned char           perm_addr[ETH_ALEN];
 };
 
 struct macvlan_source_entry {
@@ -66,6 +70,31 @@ struct macvlan_skb_cb {
 
 static void macvlan_port_destroy(struct net_device *dev);
 
+static inline bool macvlan_passthru(const struct macvlan_port *port)
+{
+       return port->flags & MACVLAN_F_PASSTHRU;
+}
+
+static inline void macvlan_set_passthru(struct macvlan_port *port)
+{
+       port->flags |= MACVLAN_F_PASSTHRU;
+}
+
+static inline bool macvlan_addr_change(const struct macvlan_port *port)
+{
+       return port->flags & MACVLAN_F_ADDRCHANGE;
+}
+
+static inline void macvlan_set_addr_change(struct macvlan_port *port)
+{
+       port->flags |= MACVLAN_F_ADDRCHANGE;
+}
+
+static inline void macvlan_clear_addr_change(struct macvlan_port *port)
+{
+       port->flags &= ~MACVLAN_F_ADDRCHANGE;
+}
+
 /* Hash Ethernet address */
 static u32 macvlan_eth_hash(const unsigned char *addr)
 {
@@ -181,11 +210,12 @@ static void macvlan_hash_change_addr(struct macvlan_dev *vlan,
 static bool macvlan_addr_busy(const struct macvlan_port *port,
                              const unsigned char *addr)
 {
-       /* Test to see if the specified multicast address is
+       /* Test to see if the specified address is
         * currently in use by the underlying device or
         * another macvlan.
         */
-       if (ether_addr_equal_64bits(port->dev->dev_addr, addr))
+       if (!macvlan_passthru(port) && !macvlan_addr_change(port) &&
+           ether_addr_equal_64bits(port->dev->dev_addr, addr))
                return true;
 
        if (macvlan_hash_lookup(port, addr))
@@ -445,7 +475,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
        }
 
        macvlan_forward_source(skb, port, eth->h_source);
-       if (port->passthru)
+       if (macvlan_passthru(port))
                vlan = list_first_or_null_rcu(&port->vlans,
                                              struct macvlan_dev, list);
        else
@@ -574,7 +604,7 @@ static int macvlan_open(struct net_device *dev)
        struct net_device *lowerdev = vlan->lowerdev;
        int err;
 
-       if (vlan->port->passthru) {
+       if (macvlan_passthru(vlan->port)) {
                if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC)) {
                        err = dev_set_promiscuity(lowerdev, 1);
                        if (err < 0)
@@ -649,7 +679,7 @@ static int macvlan_stop(struct net_device *dev)
        dev_uc_unsync(lowerdev, dev);
        dev_mc_unsync(lowerdev, dev);
 
-       if (vlan->port->passthru) {
+       if (macvlan_passthru(vlan->port)) {
                if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC))
                        dev_set_promiscuity(lowerdev, -1);
                goto hash_del;
@@ -672,6 +702,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
 {
        struct macvlan_dev *vlan = netdev_priv(dev);
        struct net_device *lowerdev = vlan->lowerdev;
+       struct macvlan_port *port = vlan->port;
        int err;
 
        if (!(dev->flags & IFF_UP)) {
@@ -682,7 +713,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
                if (macvlan_addr_busy(vlan->port, addr))
                        return -EBUSY;
 
-               if (!vlan->port->passthru) {
+               if (!macvlan_passthru(port)) {
                        err = dev_uc_add(lowerdev, addr);
                        if (err)
                                return err;
@@ -692,6 +723,15 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
 
                macvlan_hash_change_addr(vlan, addr);
        }
+       if (macvlan_passthru(port) && !macvlan_addr_change(port)) {
+               /* Since addr_change isn't set, we are here due to lower
+                * device change.  Save the lower-dev address so we can
+                * restore it later.
+                */
+               ether_addr_copy(vlan->port->perm_addr,
+                               lowerdev->dev_addr);
+       }
+       macvlan_clear_addr_change(port);
        return 0;
 }
 
@@ -703,7 +743,12 @@ static int macvlan_set_mac_address(struct net_device *dev, void *p)
        if (!is_valid_ether_addr(addr->sa_data))
                return -EADDRNOTAVAIL;
 
+       /* If the addresses are the same, this is a no-op */
+       if (ether_addr_equal(dev->dev_addr, addr->sa_data))
+               return 0;
+
        if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
+               macvlan_set_addr_change(vlan->port);
                dev_set_mac_address(vlan->lowerdev, addr);
                return 0;
        }
@@ -928,7 +973,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
        /* Support unicast filter only on passthru devices.
         * Multicast filter should be allowed on all devices.
         */
-       if (!vlan->port->passthru && is_unicast_ether_addr(addr))
+       if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr))
                return -EOPNOTSUPP;
 
        if (flags & NLM_F_REPLACE)
@@ -952,7 +997,7 @@ static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
        /* Support unicast filter only on passthru devices.
         * Multicast filter should be allowed on all devices.
         */
-       if (!vlan->port->passthru && is_unicast_ether_addr(addr))
+       if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr))
                return -EOPNOTSUPP;
 
        if (is_unicast_ether_addr(addr))
@@ -1092,7 +1137,7 @@ void macvlan_common_setup(struct net_device *dev)
        netif_keep_dst(dev);
        dev->priv_flags        |= IFF_UNICAST_FLT;
        dev->netdev_ops         = &macvlan_netdev_ops;
-       dev->destructor         = free_netdev;
+       dev->needs_free_netdev  = true;
        dev->header_ops         = &macvlan_hard_header_ops;
        dev->ethtool_ops        = &macvlan_ethtool_ops;
 }
@@ -1120,8 +1165,8 @@ static int macvlan_port_create(struct net_device *dev)
        if (port == NULL)
                return -ENOMEM;
 
-       port->passthru = false;
        port->dev = dev;
+       ether_addr_copy(port->perm_addr, dev->dev_addr);
        INIT_LIST_HEAD(&port->vlans);
        for (i = 0; i < MACVLAN_HASH_SIZE; i++)
                INIT_HLIST_HEAD(&port->vlan_hash[i]);
@@ -1161,6 +1206,18 @@ static void macvlan_port_destroy(struct net_device *dev)
                kfree_skb(skb);
        }
 
+       /* If the lower device address has been changed by passthru
+        * macvlan, put it back.
+        */
+       if (macvlan_passthru(port) &&
+           !ether_addr_equal(port->dev->dev_addr, port->perm_addr)) {
+               struct sockaddr sa;
+
+               sa.sa_family = port->dev->type;
+               memcpy(&sa.sa_data, port->perm_addr, port->dev->addr_len);
+               dev_set_mac_address(port->dev, &sa);
+       }
+
        kfree(port);
 }
 
@@ -1326,7 +1383,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
        port = macvlan_port_get_rtnl(lowerdev);
 
        /* Only 1 macvlan device can be created in passthru mode */
-       if (port->passthru) {
+       if (macvlan_passthru(port)) {
                /* The macvlan port must be not created this time,
                 * still goto destroy_macvlan_port for readability.
                 */
@@ -1352,7 +1409,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
                        err = -EINVAL;
                        goto destroy_macvlan_port;
                }
-               port->passthru = true;
+               macvlan_set_passthru(port);
                eth_hw_addr_inherit(dev, lowerdev);
        }
 
@@ -1434,7 +1491,7 @@ static int macvlan_changelink(struct net_device *dev,
        if (data && data[IFLA_MACVLAN_FLAGS]) {
                __u16 flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]);
                bool promisc = (flags ^ vlan->flags) & MACVLAN_FLAG_NOPROMISC;
-               if (vlan->port->passthru && promisc) {
+               if (macvlan_passthru(vlan->port) && promisc) {
                        int err;
 
                        if (flags & MACVLAN_FLAG_NOPROMISC)
@@ -1597,7 +1654,7 @@ static int macvlan_device_event(struct notifier_block *unused,
                }
                break;
        case NETDEV_CHANGEADDR:
-               if (!port->passthru)
+               if (!macvlan_passthru(port))
                        return NOTIFY_DONE;
 
                vlan = list_first_entry_or_null(&port->vlans,
index 06ee639..0e27920 100644 (file)
@@ -358,7 +358,7 @@ static ssize_t enabled_store(struct config_item *item,
                if (err)
                        goto out_unlock;
 
-               pr_info("netconsole: network logging started\n");
+               pr_info("network logging started\n");
        } else {        /* false */
                /* We need to disable the netconsole before cleaning it up
                 * otherwise we might end up in write_msg() with
index b916038..c4b3362 100644 (file)
@@ -113,7 +113,7 @@ static void nlmon_setup(struct net_device *dev)
 
        dev->netdev_ops = &nlmon_ops;
        dev->ethtool_ops = &nlmon_ethtool_ops;
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
 
        dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
                        NETIF_F_HIGHDMA | NETIF_F_LLTX;
index c360dd6..3ab6c58 100644 (file)
@@ -127,6 +127,7 @@ config MDIO_THUNDER
        tristate "ThunderX SOCs MDIO buses"
        depends on 64BIT
        depends on PCI
+       depends on !(MDIO_DEVICE=y && PHYLIB=m)
        select MDIO_CAVIUM
        help
          This driver supports the MDIO interfaces found on Cavium
index ed0d10f..c306523 100644 (file)
@@ -908,7 +908,7 @@ static void decode_txts(struct dp83640_private *dp83640,
        if (overflow) {
                pr_debug("tx timestamp queue overflow, count %d\n", overflow);
                while (skb) {
-                       skb_complete_tx_timestamp(skb, NULL);
+                       kfree_skb(skb);
                        skb = skb_dequeue(&dp83640->tx_queue);
                }
                return;
index b9252b8..8b20388 100644 (file)
@@ -619,6 +619,8 @@ static int ksz9031_read_status(struct phy_device *phydev)
        if ((regval & 0xFF) == 0xFF) {
                phy_init_hw(phydev);
                phydev->link = 0;
+               if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
+                       phydev->drv->config_intr(phydev);
        }
 
        return 0;
index 7524caa..eebb0e1 100644 (file)
@@ -54,6 +54,8 @@ static const char *phy_speed_to_str(int speed)
                return "5Gbps";
        case SPEED_10000:
                return "10Gbps";
+       case SPEED_14000:
+               return "14Gbps";
        case SPEED_20000:
                return "20Gbps";
        case SPEED_25000:
index 1da31dc..74b9072 100644 (file)
@@ -629,7 +629,7 @@ static void sl_uninit(struct net_device *dev)
 static void sl_free_netdev(struct net_device *dev)
 {
        int i = dev->base_addr;
-       free_netdev(dev);
+
        slip_devs[i] = NULL;
 }
 
@@ -651,7 +651,8 @@ static const struct net_device_ops sl_netdev_ops = {
 static void sl_setup(struct net_device *dev)
 {
        dev->netdev_ops         = &sl_netdev_ops;
-       dev->destructor         = sl_free_netdev;
+       dev->needs_free_netdev  = true;
+       dev->priv_destructor    = sl_free_netdev;
 
        dev->hard_header_len    = 0;
        dev->addr_len           = 0;
@@ -1369,8 +1370,6 @@ static void __exit slip_exit(void)
                if (sl->tty) {
                        printk(KERN_ERR "%s: tty discipline still running\n",
                               dev->name);
-                       /* Intentionally leak the control block. */
-                       dev->destructor = NULL;
                }
 
                unregister_netdev(dev);
index 6c5d5ef..fba8c13 100644 (file)
@@ -1643,7 +1643,6 @@ static void team_destructor(struct net_device *dev)
        struct team *team = netdev_priv(dev);
 
        free_percpu(team->pcpu_stats);
-       free_netdev(dev);
 }
 
 static int team_open(struct net_device *dev)
@@ -2079,7 +2078,8 @@ static void team_setup(struct net_device *dev)
 
        dev->netdev_ops = &team_netdev_ops;
        dev->ethtool_ops = &team_ethtool_ops;
-       dev->destructor = team_destructor;
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = team_destructor;
        dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
        dev->priv_flags |= IFF_NO_QUEUE;
        dev->priv_flags |= IFF_TEAM;
index bbd707b..9ee7d42 100644 (file)
@@ -1560,7 +1560,6 @@ static void tun_free_netdev(struct net_device *dev)
        free_percpu(tun->pcpu_stats);
        tun_flow_uninit(tun);
        security_tun_dev_free_security(tun->security);
-       free_netdev(dev);
 }
 
 static void tun_setup(struct net_device *dev)
@@ -1571,7 +1570,8 @@ static void tun_setup(struct net_device *dev)
        tun->group = INVALID_GID;
 
        dev->ethtool_ops = &tun_ethtool_ops;
-       dev->destructor = tun_free_netdev;
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = tun_free_netdev;
        /* We prefer our own queue length */
        dev->tx_queue_len = TUN_READQ_SIZE;
 }
index 51cf600..4037ab2 100644 (file)
@@ -1722,6 +1722,18 @@ static const struct driver_info lenovo_info = {
        .tx_fixup = ax88179_tx_fixup,
 };
 
+static const struct driver_info belkin_info = {
+       .description = "Belkin USB Ethernet Adapter",
+       .bind   = ax88179_bind,
+       .unbind = ax88179_unbind,
+       .status = ax88179_status,
+       .link_reset = ax88179_link_reset,
+       .reset  = ax88179_reset,
+       .flags  = FLAG_ETHER | FLAG_FRAMING_AX,
+       .rx_fixup = ax88179_rx_fixup,
+       .tx_fixup = ax88179_tx_fixup,
+};
+
 static const struct usb_device_id products[] = {
 {
        /* ASIX AX88179 10/100/1000 */
@@ -1751,6 +1763,10 @@ static const struct usb_device_id products[] = {
        /* Lenovo OneLinkDock Gigabit LAN */
        USB_DEVICE(0x17ef, 0x304b),
        .driver_info = (unsigned long)&lenovo_info,
+}, {
+       /* Belkin B2B128 USB 3.0 Hub + Gigabit Ethernet Adapter */
+       USB_DEVICE(0x050d, 0x0128),
+       .driver_info = (unsigned long)&belkin_info,
 },
        { },
 };
index eb52de8..c7a350b 100644 (file)
@@ -298,7 +298,7 @@ static void usbpn_setup(struct net_device *dev)
        dev->addr_len           = 1;
        dev->tx_queue_len       = 3;
 
-       dev->destructor         = free_netdev;
+       dev->needs_free_netdev  = true;
 }
 
 /*
index 8f923a1..32a22f4 100644 (file)
@@ -123,7 +123,7 @@ static void qmimux_setup(struct net_device *dev)
        dev->addr_len        = 0;
        dev->flags           = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
        dev->netdev_ops      = &qmimux_netdev_ops;
-       dev->destructor      = free_netdev;
+       dev->needs_free_netdev = true;
 }
 
 static struct net_device *qmimux_find_dev(struct usbnet *dev, u8 mux_id)
@@ -1192,6 +1192,8 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x1199, 0x9056, 8)},    /* Sierra Wireless Modem */
        {QMI_FIXED_INTF(0x1199, 0x9057, 8)},
        {QMI_FIXED_INTF(0x1199, 0x9061, 8)},    /* Sierra Wireless Modem */
+       {QMI_FIXED_INTF(0x1199, 0x9063, 8)},    /* Sierra Wireless EM7305 */
+       {QMI_FIXED_INTF(0x1199, 0x9063, 10)},   /* Sierra Wireless EM7305 */
        {QMI_FIXED_INTF(0x1199, 0x9071, 8)},    /* Sierra Wireless MC74xx */
        {QMI_FIXED_INTF(0x1199, 0x9071, 10)},   /* Sierra Wireless MC74xx */
        {QMI_FIXED_INTF(0x1199, 0x9079, 8)},    /* Sierra Wireless EM74xx */
@@ -1206,6 +1208,8 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)},    /* Telit ME910 */
        {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)},    /* Telit LE920 */
        {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */
+       {QMI_FIXED_INTF(0x1c9e, 0x9801, 3)},    /* Telewell TW-3G HSPA+ */
+       {QMI_FIXED_INTF(0x1c9e, 0x9803, 4)},    /* Telewell TW-3G HSPA+ */
        {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)},    /* XS Stick W100-2 from 4G Systems */
        {QMI_FIXED_INTF(0x0b3c, 0xc000, 4)},    /* Olivetti Olicard 100 */
        {QMI_FIXED_INTF(0x0b3c, 0xc001, 4)},    /* Olivetti Olicard 120 */
index ddc62cb..1a419a4 100644 (file)
@@ -4368,6 +4368,8 @@ static u8 rtl_get_version(struct usb_interface *intf)
                break;
        }
 
+       dev_dbg(&intf->dev, "Detected version 0x%04x\n", version);
+
        return version;
 }
 
index 38f0f03..364fa9d 100644 (file)
@@ -222,7 +222,6 @@ static int veth_dev_init(struct net_device *dev)
 static void veth_dev_free(struct net_device *dev)
 {
        free_percpu(dev->vstats);
-       free_netdev(dev);
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -317,7 +316,8 @@ static void veth_setup(struct net_device *dev)
                               NETIF_F_HW_VLAN_STAG_TX |
                               NETIF_F_HW_VLAN_CTAG_RX |
                               NETIF_F_HW_VLAN_STAG_RX);
-       dev->destructor = veth_dev_free;
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = veth_dev_free;
        dev->max_mtu = ETH_MAX_MTU;
 
        dev->hw_features = VETH_FEATURES;
@@ -383,7 +383,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
                tbp = tb;
        }
 
-       if (tbp[IFLA_IFNAME]) {
+       if (ifmp && tbp[IFLA_IFNAME]) {
                nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
                name_assign_type = NET_NAME_USER;
        } else {
@@ -402,7 +402,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
                return PTR_ERR(peer);
        }
 
-       if (tbp[IFLA_ADDRESS] == NULL)
+       if (!ifmp || !tbp[IFLA_ADDRESS])
                eth_hw_addr_random(peer);
 
        if (ifmp && (dev->ifindex != 0))
index a871f45..143d8a9 100644 (file)
@@ -1797,6 +1797,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
        flush_work(&vi->config_work);
 
        netif_device_detach(vi->dev);
+       netif_tx_disable(vi->dev);
        cancel_delayed_work_sync(&vi->refill);
 
        if (netif_running(vi->dev)) {
index db88249..022c0b5 100644 (file)
 #include <net/addrconf.h>
 #include <net/l3mdev.h>
 #include <net/fib_rules.h>
+#include <net/netns/generic.h>
 
 #define DRV_NAME       "vrf"
 #define DRV_VERSION    "1.0"
 
 #define FIB_RULE_PREF  1000       /* default preference for FIB rules */
-static bool add_fib_rules = true;
+
+static unsigned int vrf_net_id;
 
 struct net_vrf {
        struct rtable __rcu     *rth;
@@ -1348,7 +1350,7 @@ static void vrf_setup(struct net_device *dev)
        dev->netdev_ops = &vrf_netdev_ops;
        dev->l3mdev_ops = &vrf_l3mdev_ops;
        dev->ethtool_ops = &vrf_ethtool_ops;
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
 
        /* Fill in device structure with ethernet-generic values. */
        eth_hw_addr_random(dev);
@@ -1394,6 +1396,8 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
                       struct nlattr *tb[], struct nlattr *data[])
 {
        struct net_vrf *vrf = netdev_priv(dev);
+       bool *add_fib_rules;
+       struct net *net;
        int err;
 
        if (!data || !data[IFLA_VRF_TABLE])
@@ -1409,13 +1413,15 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
        if (err)
                goto out;
 
-       if (add_fib_rules) {
+       net = dev_net(dev);
+       add_fib_rules = net_generic(net, vrf_net_id);
+       if (*add_fib_rules) {
                err = vrf_add_fib_rules(dev);
                if (err) {
                        unregister_netdevice(dev);
                        goto out;
                }
-               add_fib_rules = false;
+               *add_fib_rules = false;
        }
 
 out:
@@ -1498,16 +1504,38 @@ static struct notifier_block vrf_notifier_block __read_mostly = {
        .notifier_call = vrf_device_event,
 };
 
+/* Initialize per network namespace state */
+static int __net_init vrf_netns_init(struct net *net)
+{
+       bool *add_fib_rules = net_generic(net, vrf_net_id);
+
+       *add_fib_rules = true;
+
+       return 0;
+}
+
+static struct pernet_operations vrf_net_ops __net_initdata = {
+       .init = vrf_netns_init,
+       .id   = &vrf_net_id,
+       .size = sizeof(bool),
+};
+
 static int __init vrf_init_module(void)
 {
        int rc;
 
        register_netdevice_notifier(&vrf_notifier_block);
 
-       rc = rtnl_link_register(&vrf_link_ops);
+       rc = register_pernet_subsys(&vrf_net_ops);
        if (rc < 0)
                goto error;
 
+       rc = rtnl_link_register(&vrf_link_ops);
+       if (rc < 0) {
+               unregister_pernet_subsys(&vrf_net_ops);
+               goto error;
+       }
+
        return 0;
 
 error:
index 7f0136f..c28bdce 100644 (file)
@@ -135,7 +135,7 @@ static void vsockmon_setup(struct net_device *dev)
 
        dev->netdev_ops = &vsockmon_ops;
        dev->ethtool_ops = &vsockmon_ethtool_ops;
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
 
        dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
                        NETIF_F_HIGHDMA | NETIF_F_LLTX;
index a6b5052..5fa798a 100644 (file)
@@ -2611,7 +2611,7 @@ static void vxlan_setup(struct net_device *dev)
        eth_hw_addr_random(dev);
        ether_setup(dev);
 
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
        SET_NETDEV_DEVTYPE(dev, &vxlan_type);
 
        dev->features   |= NETIF_F_LLTX;
index 65ee2a6..a0d76f7 100644 (file)
@@ -475,7 +475,7 @@ static void dlci_setup(struct net_device *dev)
        dev->flags              = 0;
        dev->header_ops         = &dlci_header_ops;
        dev->netdev_ops         = &dlci_netdev_ops;
-       dev->destructor         = free_netdev;
+       dev->needs_free_netdev  = true;
 
        dlp->receive            = dlci_receive;
 
index eb91528..78596e4 100644 (file)
@@ -1106,7 +1106,7 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
                return -EIO;
        }
 
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
        *get_dev_p(pvc, type) = dev;
        if (!used) {
                state(hdlc)->dce_changed = 1;
index 9df9ed6..63f7490 100644 (file)
@@ -306,7 +306,7 @@ static const struct net_device_ops lapbeth_netdev_ops = {
 static void lapbeth_setup(struct net_device *dev)
 {
        dev->netdev_ops      = &lapbeth_netdev_ops;
-       dev->destructor      = free_netdev;
+       dev->needs_free_netdev = true;
        dev->type            = ARPHRD_X25;
        dev->hard_header_len = 3;
        dev->mtu             = 1000;
index 91ee542..b90c77e 100644 (file)
@@ -1287,7 +1287,7 @@ void init_netdev(struct net_device *dev)
        struct ath6kl *ar = ath6kl_priv(dev);
 
        dev->netdev_ops = &ath6kl_netdev_ops;
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
        dev->watchdog_timeo = ATH6KL_TX_TIMEOUT;
 
        dev->needed_headroom = ETH_HLEN;
index cd1d673..617199c 100644 (file)
@@ -5225,7 +5225,6 @@ void brcmf_cfg80211_free_netdev(struct net_device *ndev)
 
        if (vif)
                brcmf_free_vif(vif);
-       free_netdev(ndev);
 }
 
 static bool brcmf_is_linkup(const struct brcmf_event_msg *e)
index a3d8236..511d190 100644 (file)
@@ -624,7 +624,8 @@ struct brcmf_if *brcmf_add_if(struct brcmf_pub *drvr, s32 bsscfgidx, s32 ifidx,
                if (!ndev)
                        return ERR_PTR(-ENOMEM);
 
-               ndev->destructor = brcmf_cfg80211_free_netdev;
+               ndev->needs_free_netdev = true;
+               ndev->priv_destructor = brcmf_cfg80211_free_netdev;
                ifp = netdev_priv(ndev);
                ifp->ndev = ndev;
                /* store mapping ifidx to bsscfgidx */
index c7c1e99..d231042 100644 (file)
@@ -442,7 +442,7 @@ struct brcmf_fw {
        const char *nvram_name;
        u16 domain_nr;
        u16 bus_nr;
-       void (*done)(struct device *dev, const struct firmware *fw,
+       void (*done)(struct device *dev, int err, const struct firmware *fw,
                     void *nvram_image, u32 nvram_len);
 };
 
@@ -477,52 +477,51 @@ static void brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx)
        if (!nvram && !(fwctx->flags & BRCMF_FW_REQ_NV_OPTIONAL))
                goto fail;
 
-       fwctx->done(fwctx->dev, fwctx->code, nvram, nvram_length);
+       fwctx->done(fwctx->dev, 0, fwctx->code, nvram, nvram_length);
        kfree(fwctx);
        return;
 
 fail:
        brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev));
        release_firmware(fwctx->code);
-       device_release_driver(fwctx->dev);
+       fwctx->done(fwctx->dev, -ENOENT, NULL, NULL, 0);
        kfree(fwctx);
 }
 
 static void brcmf_fw_request_code_done(const struct firmware *fw, void *ctx)
 {
        struct brcmf_fw *fwctx = ctx;
-       int ret;
+       int ret = 0;
 
        brcmf_dbg(TRACE, "enter: dev=%s\n", dev_name(fwctx->dev));
-       if (!fw)
+       if (!fw) {
+               ret = -ENOENT;
                goto fail;
-
-       /* only requested code so done here */
-       if (!(fwctx->flags & BRCMF_FW_REQUEST_NVRAM)) {
-               fwctx->done(fwctx->dev, fw, NULL, 0);
-               kfree(fwctx);
-               return;
        }
+       /* only requested code so done here */
+       if (!(fwctx->flags & BRCMF_FW_REQUEST_NVRAM))
+               goto done;
+
        fwctx->code = fw;
        ret = request_firmware_nowait(THIS_MODULE, true, fwctx->nvram_name,
                                      fwctx->dev, GFP_KERNEL, fwctx,
                                      brcmf_fw_request_nvram_done);
 
-       if (!ret)
-               return;
-
-       brcmf_fw_request_nvram_done(NULL, fwctx);
+       /* pass NULL to nvram callback for bcm47xx fallback */
+       if (ret)
+               brcmf_fw_request_nvram_done(NULL, fwctx);
        return;
 
 fail:
        brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev));
-       device_release_driver(fwctx->dev);
+done:
+       fwctx->done(fwctx->dev, ret, fw, NULL, 0);
        kfree(fwctx);
 }
 
 int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags,
                                const char *code, const char *nvram,
-                               void (*fw_cb)(struct device *dev,
+                               void (*fw_cb)(struct device *dev, int err,
                                              const struct firmware *fw,
                                              void *nvram_image, u32 nvram_len),
                                u16 domain_nr, u16 bus_nr)
@@ -555,7 +554,7 @@ int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags,
 
 int brcmf_fw_get_firmwares(struct device *dev, u16 flags,
                           const char *code, const char *nvram,
-                          void (*fw_cb)(struct device *dev,
+                          void (*fw_cb)(struct device *dev, int err,
                                         const struct firmware *fw,
                                         void *nvram_image, u32 nvram_len))
 {
index d3c9f0d..8fa4b7e 100644 (file)
@@ -73,13 +73,13 @@ void brcmf_fw_nvram_free(void *nvram);
  */
 int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags,
                                const char *code, const char *nvram,
-                               void (*fw_cb)(struct device *dev,
+                               void (*fw_cb)(struct device *dev, int err,
                                              const struct firmware *fw,
                                              void *nvram_image, u32 nvram_len),
                                u16 domain_nr, u16 bus_nr);
 int brcmf_fw_get_firmwares(struct device *dev, u16 flags,
                           const char *code, const char *nvram,
-                          void (*fw_cb)(struct device *dev,
+                          void (*fw_cb)(struct device *dev, int err,
                                         const struct firmware *fw,
                                         void *nvram_image, u32 nvram_len));
 
index 72373e5..f59642b 100644 (file)
@@ -2145,7 +2145,7 @@ void brcmf_fws_add_interface(struct brcmf_if *ifp)
        struct brcmf_fws_info *fws = drvr_to_fws(ifp->drvr);
        struct brcmf_fws_mac_descriptor *entry;
 
-       if (!ifp->ndev || fws->fcmode == BRCMF_FWS_FCMODE_NONE)
+       if (!ifp->ndev || !brcmf_fws_queue_skbs(fws))
                return;
 
        entry = &fws->desc.iface[ifp->ifidx];
index f36b96d..f878706 100644 (file)
@@ -1650,16 +1650,23 @@ static const struct brcmf_buscore_ops brcmf_pcie_buscore_ops = {
        .write32 = brcmf_pcie_buscore_write32,
 };
 
-static void brcmf_pcie_setup(struct device *dev, const struct firmware *fw,
+static void brcmf_pcie_setup(struct device *dev, int ret,
+                            const struct firmware *fw,
                             void *nvram, u32 nvram_len)
 {
-       struct brcmf_bus *bus = dev_get_drvdata(dev);
-       struct brcmf_pciedev *pcie_bus_dev = bus->bus_priv.pcie;
-       struct brcmf_pciedev_info *devinfo = pcie_bus_dev->devinfo;
+       struct brcmf_bus *bus;
+       struct brcmf_pciedev *pcie_bus_dev;
+       struct brcmf_pciedev_info *devinfo;
        struct brcmf_commonring **flowrings;
-       int ret;
        u32 i;
 
+       /* check firmware loading result */
+       if (ret)
+               goto fail;
+
+       bus = dev_get_drvdata(dev);
+       pcie_bus_dev = bus->bus_priv.pcie;
+       devinfo = pcie_bus_dev->devinfo;
        brcmf_pcie_attach(devinfo);
 
        /* Some of the firmwares have the size of the memory of the device
index e034500..5653d6d 100644 (file)
@@ -3982,21 +3982,26 @@ static const struct brcmf_bus_ops brcmf_sdio_bus_ops = {
        .get_memdump = brcmf_sdio_bus_get_memdump,
 };
 
-static void brcmf_sdio_firmware_callback(struct device *dev,
+static void brcmf_sdio_firmware_callback(struct device *dev, int err,
                                         const struct firmware *code,
                                         void *nvram, u32 nvram_len)
 {
-       struct brcmf_bus *bus_if = dev_get_drvdata(dev);
-       struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
-       struct brcmf_sdio *bus = sdiodev->bus;
-       int err = 0;
+       struct brcmf_bus *bus_if;
+       struct brcmf_sdio_dev *sdiodev;
+       struct brcmf_sdio *bus;
        u8 saveclk;
 
-       brcmf_dbg(TRACE, "Enter: dev=%s\n", dev_name(dev));
+       brcmf_dbg(TRACE, "Enter: dev=%s, err=%d\n", dev_name(dev), err);
+       bus_if = dev_get_drvdata(dev);
+       sdiodev = bus_if->bus_priv.sdio;
+       if (err)
+               goto fail;
 
        if (!bus_if->drvr)
                return;
 
+       bus = sdiodev->bus;
+
        /* try to download image and nvram to the dongle */
        bus->alp_only = true;
        err = brcmf_sdio_download_firmware(bus, code, nvram, nvram_len);
@@ -4083,6 +4088,7 @@ release:
 fail:
        brcmf_dbg(TRACE, "failed: dev=%s, err=%d\n", dev_name(dev), err);
        device_release_driver(dev);
+       device_release_driver(&sdiodev->func[2]->dev);
 }
 
 struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
index e4d545f..0eea48e 100644 (file)
@@ -1159,17 +1159,18 @@ fail:
        return ret;
 }
 
-static void brcmf_usb_probe_phase2(struct device *dev,
+static void brcmf_usb_probe_phase2(struct device *dev, int ret,
                                   const struct firmware *fw,
                                   void *nvram, u32 nvlen)
 {
        struct brcmf_bus *bus = dev_get_drvdata(dev);
-       struct brcmf_usbdev_info *devinfo;
-       int ret;
+       struct brcmf_usbdev_info *devinfo = bus->bus_priv.usb->devinfo;
+
+       if (ret)
+               goto error;
 
        brcmf_dbg(USB, "Start fw downloading\n");
 
-       devinfo = bus->bus_priv.usb->devinfo;
        ret = check_file(fw->data);
        if (ret < 0) {
                brcmf_err("invalid firmware\n");
index 544fc09..1372b20 100644 (file)
@@ -73,7 +73,7 @@ struct net_device * hostap_add_interface(struct local_info *local,
        dev->mem_end = mdev->mem_end;
 
        hostap_setup_dev(dev, local, type);
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
 
        sprintf(dev->name, "%s%s", prefix, name);
        if (!rtnl_locked)
index 002b25c..c854a55 100644 (file)
@@ -2861,7 +2861,7 @@ static const struct net_device_ops hwsim_netdev_ops = {
 static void hwsim_mon_setup(struct net_device *dev)
 {
        dev->netdev_ops = &hwsim_netdev_ops;
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
        ether_setup(dev);
        dev->priv_flags |= IFF_NO_QUEUE;
        dev->type = ARPHRD_IEEE80211_RADIOTAP;
index dd87b9f..39b6b5e 100644 (file)
@@ -1280,7 +1280,7 @@ void mwifiex_init_priv_params(struct mwifiex_private *priv,
                              struct net_device *dev)
 {
        dev->netdev_ops = &mwifiex_netdev_ops;
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
        /* Initialize private structure */
        priv->current_key_index = 0;
        priv->media_connected = false;
index 530586b..5b1d2e8 100644 (file)
@@ -199,6 +199,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
        unsigned long   remaining_credit;
        struct timer_list credit_timeout;
        u64 credit_window_start;
+       bool rate_limited;
 
        /* Statistics */
        struct xenvif_stats stats;
index 8397f6c..e322a86 100644 (file)
@@ -106,7 +106,11 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
 
        if (work_done < budget) {
                napi_complete_done(napi, work_done);
-               xenvif_napi_schedule_or_enable_events(queue);
+               /* If the queue is rate-limited, it shall be
+                * rescheduled in the timer callback.
+                */
+               if (likely(!queue->rate_limited))
+                       xenvif_napi_schedule_or_enable_events(queue);
        }
 
        return work_done;
index 602d408..5042ff8 100644 (file)
@@ -180,6 +180,7 @@ static void tx_add_credit(struct xenvif_queue *queue)
                max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
 
        queue->remaining_credit = min(max_credit, max_burst);
+       queue->rate_limited = false;
 }
 
 void xenvif_tx_credit_callback(unsigned long data)
@@ -686,8 +687,10 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
                msecs_to_jiffies(queue->credit_usec / 1000);
 
        /* Timer could already be pending in rare cases. */
-       if (timer_pending(&queue->credit_timeout))
+       if (timer_pending(&queue->credit_timeout)) {
+               queue->rate_limited = true;
                return true;
+       }
 
        /* Passed the point where we can replenish credit? */
        if (time_after_eq64(now, next_credit)) {
@@ -702,6 +705,7 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
                mod_timer(&queue->credit_timeout,
                          next_credit);
                queue->credit_window_start = next_credit;
+               queue->rate_limited = true;
 
                return true;
        }
index c002384..7b3b6fd 100644 (file)
@@ -2878,7 +2878,7 @@ static const struct intel_ntb_reg skx_reg = {
        .link_is_up             = xeon_link_is_up,
        .db_ioread              = skx_db_ioread,
        .db_iowrite             = skx_db_iowrite,
-       .db_size                = sizeof(u64),
+       .db_size                = sizeof(u32),
        .ntb_ctl                = SKX_NTBCNTL_OFFSET,
        .mw_bar                 = {2, 4},
 };
index 02ca45f..10e5bf4 100644 (file)
@@ -177,14 +177,12 @@ struct ntb_transport_qp {
        u64 rx_err_ver;
        u64 rx_memcpy;
        u64 rx_async;
-       u64 dma_rx_prep_err;
        u64 tx_bytes;
        u64 tx_pkts;
        u64 tx_ring_full;
        u64 tx_err_no_buf;
        u64 tx_memcpy;
        u64 tx_async;
-       u64 dma_tx_prep_err;
 };
 
 struct ntb_transport_mw {
@@ -254,8 +252,6 @@ enum {
 #define QP_TO_MW(nt, qp)       ((qp) % nt->mw_count)
 #define NTB_QP_DEF_NUM_ENTRIES 100
 #define NTB_LINK_DOWN_TIMEOUT  10
-#define DMA_RETRIES            20
-#define DMA_OUT_RESOURCE_TO    msecs_to_jiffies(50)
 
 static void ntb_transport_rxc_db(unsigned long data);
 static const struct ntb_ctx_ops ntb_transport_ops;
@@ -516,12 +512,6 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
        out_offset += snprintf(buf + out_offset, out_count - out_offset,
                               "free tx - \t%u\n",
                               ntb_transport_tx_free_entry(qp));
-       out_offset += snprintf(buf + out_offset, out_count - out_offset,
-                              "DMA tx prep err - \t%llu\n",
-                              qp->dma_tx_prep_err);
-       out_offset += snprintf(buf + out_offset, out_count - out_offset,
-                              "DMA rx prep err - \t%llu\n",
-                              qp->dma_rx_prep_err);
 
        out_offset += snprintf(buf + out_offset, out_count - out_offset,
                               "\n");
@@ -623,7 +613,7 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
        if (!mw->virt_addr)
                return -ENOMEM;
 
-       if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count)
+       if (mw_num < qp_count % mw_count)
                num_qps_mw = qp_count / mw_count + 1;
        else
                num_qps_mw = qp_count / mw_count;
@@ -768,8 +758,6 @@ static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
        qp->tx_err_no_buf = 0;
        qp->tx_memcpy = 0;
        qp->tx_async = 0;
-       qp->dma_tx_prep_err = 0;
-       qp->dma_rx_prep_err = 0;
 }
 
 static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
@@ -1000,7 +988,7 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt,
        qp->event_handler = NULL;
        ntb_qp_link_down_reset(qp);
 
-       if (qp_count % mw_count && mw_num + 1 < qp_count / mw_count)
+       if (mw_num < qp_count % mw_count)
                num_qps_mw = qp_count / mw_count + 1;
        else
                num_qps_mw = qp_count / mw_count;
@@ -1128,8 +1116,8 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
        qp_count = ilog2(qp_bitmap);
        if (max_num_clients && max_num_clients < qp_count)
                qp_count = max_num_clients;
-       else if (mw_count < qp_count)
-               qp_count = mw_count;
+       else if (nt->mw_count < qp_count)
+               qp_count = nt->mw_count;
 
        qp_bitmap &= BIT_ULL(qp_count) - 1;
 
@@ -1317,7 +1305,6 @@ static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset)
        struct dmaengine_unmap_data *unmap;
        dma_cookie_t cookie;
        void *buf = entry->buf;
-       int retries = 0;
 
        len = entry->len;
        device = chan->device;
@@ -1346,22 +1333,11 @@ static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset)
 
        unmap->from_cnt = 1;
 
-       for (retries = 0; retries < DMA_RETRIES; retries++) {
-               txd = device->device_prep_dma_memcpy(chan,
-                                                    unmap->addr[1],
-                                                    unmap->addr[0], len,
-                                                    DMA_PREP_INTERRUPT);
-               if (txd)
-                       break;
-
-               set_current_state(TASK_INTERRUPTIBLE);
-               schedule_timeout(DMA_OUT_RESOURCE_TO);
-       }
-
-       if (!txd) {
-               qp->dma_rx_prep_err++;
+       txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+                                            unmap->addr[0], len,
+                                            DMA_PREP_INTERRUPT);
+       if (!txd)
                goto err_get_unmap;
-       }
 
        txd->callback_result = ntb_rx_copy_callback;
        txd->callback_param = entry;
@@ -1606,7 +1582,6 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
        struct dmaengine_unmap_data *unmap;
        dma_addr_t dest;
        dma_cookie_t cookie;
-       int retries = 0;
 
        device = chan->device;
        dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index;
@@ -1628,21 +1603,10 @@ static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
 
        unmap->to_cnt = 1;
 
-       for (retries = 0; retries < DMA_RETRIES; retries++) {
-               txd = device->device_prep_dma_memcpy(chan, dest,
-                                                    unmap->addr[0], len,
-                                                    DMA_PREP_INTERRUPT);
-               if (txd)
-                       break;
-
-               set_current_state(TASK_INTERRUPTIBLE);
-               schedule_timeout(DMA_OUT_RESOURCE_TO);
-       }
-
-       if (!txd) {
-               qp->dma_tx_prep_err++;
+       txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
+                                            DMA_PREP_INTERRUPT);
+       if (!txd)
                goto err_get_unmap;
-       }
 
        txd->callback_result = ntb_tx_copy_callback;
        txd->callback_param = entry;
index 434e1d4..5cab283 100644 (file)
@@ -90,11 +90,11 @@ MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows");
 
 static unsigned int seg_order = 19; /* 512K */
 module_param(seg_order, uint, 0644);
-MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing");
+MODULE_PARM_DESC(seg_order, "size order [2^n] of buffer segment for testing");
 
 static unsigned int run_order = 32; /* 4G */
 module_param(run_order, uint, 0644);
-MODULE_PARM_DESC(run_order, "size order [n^2] of total data to transfer");
+MODULE_PARM_DESC(run_order, "size order [2^n] of total data to transfer");
 
 static bool use_dma; /* default to 0 */
 module_param(use_dma, bool, 0644);
index 822198a..f12d23c 100644 (file)
@@ -186,7 +186,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
         * another kernel subsystem, and we just pass it through.
         */
        if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-               bio->bi_error = -EIO;
+               bio->bi_status = BLK_STS_IOERR;
                goto out;
        }
 
@@ -205,7 +205,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
                                        "io error in %s sector %lld, len %d,\n",
                                        (rw == READ) ? "READ" : "WRITE",
                                        (unsigned long long) iter.bi_sector, len);
-                       bio->bi_error = err;
+                       bio->bi_status = errno_to_blk_status(err);
                        break;
                }
        }
@@ -273,7 +273,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
 
        blk_queue_make_request(q, nd_blk_make_request);
        blk_queue_max_hw_sectors(q, UINT_MAX);
-       blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
        blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
        queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
        q->queuedata = nsblk;
index 983718b..b6ba061 100644 (file)
@@ -1210,7 +1210,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
         * another kernel subsystem, and we just pass it through.
         */
        if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-               bio->bi_error = -EIO;
+               bio->bi_status = BLK_STS_IOERR;
                goto out;
        }
 
@@ -1232,7 +1232,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
                                        (op_is_write(bio_op(bio))) ? "WRITE" :
                                        "READ",
                                        (unsigned long long) iter.bi_sector, len);
-                       bio->bi_error = err;
+                       bio->bi_status = errno_to_blk_status(err);
                        break;
                }
        }
@@ -1297,7 +1297,6 @@ static int btt_blk_init(struct btt *btt)
        blk_queue_make_request(btt->btt_queue, btt_make_request);
        blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
        blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
-       blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY);
        queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
        btt->btt_queue->queuedata = btt;
 
index ae00dc0..4c989bb 100644 (file)
@@ -222,13 +222,6 @@ struct device *nd_btt_create(struct nd_region *nd_region)
        return dev;
 }
 
-static bool uuid_is_null(u8 *uuid)
-{
-       static const u8 null_uuid[16];
-
-       return (memcmp(uuid, null_uuid, 16) == 0);
-}
-
 /**
  * nd_btt_arena_is_valid - check if the metadata layout is valid
  * @nd_btt:    device with BTT geometry and backing device info
@@ -249,7 +242,7 @@ bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
        if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0)
                return false;
 
-       if (!uuid_is_null(super->parent_uuid))
+       if (!guid_is_null((guid_t *)&super->parent_uuid))
                if (memcmp(super->parent_uuid, parent_uuid, 16) != 0)
                        return false;
 
index c544d46..6b577af 100644 (file)
@@ -49,19 +49,19 @@ static struct nd_region *to_region(struct pmem_device *pmem)
        return to_nd_region(to_dev(pmem)->parent);
 }
 
-static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
-               unsigned int len)
+static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
+               phys_addr_t offset, unsigned int len)
 {
        struct device *dev = to_dev(pmem);
        sector_t sector;
        long cleared;
-       int rc = 0;
+       blk_status_t rc = BLK_STS_OK;
 
        sector = (offset - pmem->data_offset) / 512;
 
        cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
        if (cleared < len)
-               rc = -EIO;
+               rc = BLK_STS_IOERR;
        if (cleared > 0 && cleared / 512) {
                cleared /= 512;
                dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__,
@@ -84,7 +84,7 @@ static void write_pmem(void *pmem_addr, struct page *page,
        kunmap_atomic(mem);
 }
 
-static int read_pmem(struct page *page, unsigned int off,
+static blk_status_t read_pmem(struct page *page, unsigned int off,
                void *pmem_addr, unsigned int len)
 {
        int rc;
@@ -93,15 +93,15 @@ static int read_pmem(struct page *page, unsigned int off,
        rc = memcpy_mcsafe(mem + off, pmem_addr, len);
        kunmap_atomic(mem);
        if (rc)
-               return -EIO;
-       return 0;
+               return BLK_STS_IOERR;
+       return BLK_STS_OK;
 }
 
-static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
+static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
                        unsigned int len, unsigned int off, bool is_write,
                        sector_t sector)
 {
-       int rc = 0;
+       blk_status_t rc = BLK_STS_OK;
        bool bad_pmem = false;
        phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
        void *pmem_addr = pmem->virt_addr + pmem_off;
@@ -111,7 +111,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 
        if (!is_write) {
                if (unlikely(bad_pmem))
-                       rc = -EIO;
+                       rc = BLK_STS_IOERR;
                else {
                        rc = read_pmem(page, off, pmem_addr, len);
                        flush_dcache_page(page);
@@ -149,7 +149,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 
 static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 {
-       int rc = 0;
+       blk_status_t rc = 0;
        bool do_acct;
        unsigned long start;
        struct bio_vec bvec;
@@ -166,7 +166,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
                                bvec.bv_offset, op_is_write(bio_op(bio)),
                                iter.bi_sector);
                if (rc) {
-                       bio->bi_error = rc;
+                       bio->bi_status = rc;
                        break;
                }
        }
@@ -184,7 +184,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
                       struct page *page, bool is_write)
 {
        struct pmem_device *pmem = bdev->bd_queue->queuedata;
-       int rc;
+       blk_status_t rc;
 
        rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector);
 
@@ -197,7 +197,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
        if (rc == 0)
                page_endio(page, is_write, 0);
 
-       return rc;
+       return blk_status_to_errno(rc);
 }
 
 /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
@@ -343,7 +343,6 @@ static int pmem_attach_disk(struct device *dev,
        blk_queue_make_request(q, pmem_make_request);
        blk_queue_physical_block_size(q, PAGE_SIZE);
        blk_queue_max_hw_sectors(q, UINT_MAX);
-       blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
        queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
        queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
        q->queuedata = pmem;
index 90745a6..46d6cb1 100644 (file)
@@ -13,18 +13,6 @@ config BLK_DEV_NVME
          To compile this driver as a module, choose M here: the
          module will be called nvme.
 
-config BLK_DEV_NVME_SCSI
-       bool "SCSI emulation for NVMe device nodes"
-       depends on NVME_CORE
-       ---help---
-         This adds support for the SG_IO ioctl on the NVMe character
-         and block devices nodes, as well as a translation for a small
-         number of selected SCSI commands to NVMe commands to the NVMe
-         driver.  If you don't know what this means you probably want
-         to say N here, unless you run a distro that abuses the SCSI
-         emulation to provide stable device names for mount by id, like
-         some OpenSuSE and SLES versions.
-
 config NVME_FABRICS
        tristate
 
index f1a7d94..cc0aacb 100644 (file)
@@ -5,7 +5,6 @@ obj-$(CONFIG_NVME_RDMA)                 += nvme-rdma.o
 obj-$(CONFIG_NVME_FC)                  += nvme-fc.o
 
 nvme-core-y                            := core.o
-nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI)  += scsi.o
 nvme-core-$(CONFIG_NVM)                        += lightnvm.o
 
 nvme-y                                 += pci.o
index 903d581..d70df1d 100644 (file)
@@ -27,7 +27,6 @@
 #include <linux/nvme_ioctl.h>
 #include <linux/t10-pi.h>
 #include <linux/pm_qos.h>
-#include <scsi/sg.h>
 #include <asm/unaligned.h>
 
 #include "nvme.h"
@@ -45,7 +44,7 @@ module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
 MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
 EXPORT_SYMBOL_GPL(nvme_io_timeout);
 
-unsigned char shutdown_timeout = 5;
+static unsigned char shutdown_timeout = 5;
 module_param(shutdown_timeout, byte, 0644);
 MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
 
@@ -65,34 +64,53 @@ static bool force_apst;
 module_param(force_apst, bool, 0644);
 MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
 
+static bool streams;
+module_param(streams, bool, 0644);
+MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
+
+struct workqueue_struct *nvme_wq;
+EXPORT_SYMBOL_GPL(nvme_wq);
+
 static LIST_HEAD(nvme_ctrl_list);
 static DEFINE_SPINLOCK(dev_list_lock);
 
 static struct class *nvme_class;
 
-static int nvme_error_status(struct request *req)
+int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
+{
+       if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+               return -EBUSY;
+       if (!queue_work(nvme_wq, &ctrl->reset_work))
+               return -EBUSY;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
+
+static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
+{
+       int ret;
+
+       ret = nvme_reset_ctrl(ctrl);
+       if (!ret)
+               flush_work(&ctrl->reset_work);
+       return ret;
+}
+
+static blk_status_t nvme_error_status(struct request *req)
 {
        switch (nvme_req(req)->status & 0x7ff) {
        case NVME_SC_SUCCESS:
-               return 0;
+               return BLK_STS_OK;
        case NVME_SC_CAP_EXCEEDED:
-               return -ENOSPC;
-       default:
-               return -EIO;
-
-       /*
-        * XXX: these errors are a nasty side-band protocol to
-        * drivers/md/dm-mpath.c:noretry_error() that aren't documented
-        * anywhere..
-        */
-       case NVME_SC_CMD_SEQ_ERROR:
-               return -EILSEQ;
+               return BLK_STS_NOSPC;
        case NVME_SC_ONCS_NOT_SUPPORTED:
-               return -EOPNOTSUPP;
+               return BLK_STS_NOTSUPP;
        case NVME_SC_WRITE_FAULT:
        case NVME_SC_READ_ERROR:
        case NVME_SC_UNWRITTEN_BLOCK:
-               return -ENODATA;
+               return BLK_STS_MEDIUM;
+       default:
+               return BLK_STS_IOERR;
        }
 }
 
@@ -165,7 +183,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
                switch (old_state) {
                case NVME_CTRL_NEW:
                case NVME_CTRL_LIVE:
-               case NVME_CTRL_RECONNECTING:
                        changed = true;
                        /* FALLTHRU */
                default:
@@ -283,6 +300,105 @@ struct request *nvme_alloc_request(struct request_queue *q,
 }
 EXPORT_SYMBOL_GPL(nvme_alloc_request);
 
+static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
+{
+       struct nvme_command c;
+
+       memset(&c, 0, sizeof(c));
+
+       c.directive.opcode = nvme_admin_directive_send;
+       c.directive.nsid = cpu_to_le32(0xffffffff);
+       c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
+       c.directive.dtype = NVME_DIR_IDENTIFY;
+       c.directive.tdtype = NVME_DIR_STREAMS;
+       c.directive.endir = enable ? NVME_DIR_ENDIR : 0;
+
+       return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0);
+}
+
+static int nvme_disable_streams(struct nvme_ctrl *ctrl)
+{
+       return nvme_toggle_streams(ctrl, false);
+}
+
+static int nvme_enable_streams(struct nvme_ctrl *ctrl)
+{
+       return nvme_toggle_streams(ctrl, true);
+}
+
+static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
+                                 struct streams_directive_params *s, u32 nsid)
+{
+       struct nvme_command c;
+
+       memset(&c, 0, sizeof(c));
+       memset(s, 0, sizeof(*s));
+
+       c.directive.opcode = nvme_admin_directive_recv;
+       c.directive.nsid = cpu_to_le32(nsid);
+       c.directive.numd = sizeof(*s);
+       c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
+       c.directive.dtype = NVME_DIR_STREAMS;
+
+       return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s));
+}
+
+static int nvme_configure_directives(struct nvme_ctrl *ctrl)
+{
+       struct streams_directive_params s;
+       int ret;
+
+       if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
+               return 0;
+       if (!streams)
+               return 0;
+
+       ret = nvme_enable_streams(ctrl);
+       if (ret)
+               return ret;
+
+       ret = nvme_get_stream_params(ctrl, &s, 0xffffffff);
+       if (ret)
+               return ret;
+
+       ctrl->nssa = le16_to_cpu(s.nssa);
+       if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
+               dev_info(ctrl->device, "too few streams (%u) available\n",
+                                       ctrl->nssa);
+               nvme_disable_streams(ctrl);
+               return 0;
+       }
+
+       ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
+       dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
+       return 0;
+}
+
+/*
+ * Check if 'req' has a write hint associated with it. If it does, assign
+ * a valid namespace stream to the write.
+ */
+static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
+                                    struct request *req, u16 *control,
+                                    u32 *dsmgmt)
+{
+       enum rw_hint streamid = req->write_hint;
+
+       if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE)
+               streamid = 0;
+       else {
+               streamid--;
+               if (WARN_ON_ONCE(streamid > ctrl->nr_streams))
+                       return;
+
+               *control |= NVME_RW_DTYPE_STREAMS;
+               *dsmgmt |= streamid << 16;
+       }
+
+       if (streamid < ARRAY_SIZE(req->q->write_hints))
+               req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
+}
+
 static inline void nvme_setup_flush(struct nvme_ns *ns,
                struct nvme_command *cmnd)
 {
@@ -291,7 +407,7 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
        cmnd->common.nsid = cpu_to_le32(ns->ns_id);
 }
 
-static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
+static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
                struct nvme_command *cmnd)
 {
        unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
@@ -300,7 +416,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 
        range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
        if (!range)
-               return BLK_MQ_RQ_QUEUE_BUSY;
+               return BLK_STS_RESOURCE;
 
        __rq_for_each_bio(bio, req) {
                u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
@@ -314,7 +430,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 
        if (WARN_ON_ONCE(n != segments)) {
                kfree(range);
-               return BLK_MQ_RQ_QUEUE_ERROR;
+               return BLK_STS_IOERR;
        }
 
        memset(cmnd, 0, sizeof(*cmnd));
@@ -328,15 +444,26 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
        req->special_vec.bv_len = sizeof(*range) * segments;
        req->rq_flags |= RQF_SPECIAL_PAYLOAD;
 
-       return BLK_MQ_RQ_QUEUE_OK;
+       return BLK_STS_OK;
 }
 
-static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
-               struct nvme_command *cmnd)
+static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
+               struct request *req, struct nvme_command *cmnd)
 {
+       struct nvme_ctrl *ctrl = ns->ctrl;
        u16 control = 0;
        u32 dsmgmt = 0;
 
+       /*
+        * If formated with metadata, require the block layer provide a buffer
+        * unless this namespace is formated such that the metadata can be
+        * stripped/generated by the controller with PRACT=1.
+        */
+       if (ns && ns->ms &&
+           (!ns->pi_type || ns->ms != sizeof(struct t10_pi_tuple)) &&
+           !blk_integrity_rq(req) && !blk_rq_is_passthrough(req))
+               return BLK_STS_NOTSUPP;
+
        if (req->cmd_flags & REQ_FUA)
                control |= NVME_RW_FUA;
        if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
@@ -351,6 +478,9 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
        cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
        cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
 
+       if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
+               nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
+
        if (ns->ms) {
                switch (ns->pi_type) {
                case NVME_NS_DPS_PI_TYPE3:
@@ -370,12 +500,13 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
 
        cmnd->rw.control = cpu_to_le16(control);
        cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+       return 0;
 }
 
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
                struct nvme_command *cmd)
 {
-       int ret = BLK_MQ_RQ_QUEUE_OK;
+       blk_status_t ret = BLK_STS_OK;
 
        if (!(req->rq_flags & RQF_DONTPREP)) {
                nvme_req(req)->retries = 0;
@@ -398,11 +529,11 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
                break;
        case REQ_OP_READ:
        case REQ_OP_WRITE:
-               nvme_setup_rw(ns, req, cmd);
+               ret = nvme_setup_rw(ns, req, cmd);
                break;
        default:
                WARN_ON_ONCE(1);
-               return BLK_MQ_RQ_QUEUE_ERROR;
+               return BLK_STS_IOERR;
        }
 
        cmd->common.command_id = req->tag;
@@ -555,15 +686,16 @@ int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
                        result, timeout);
 }
 
-static void nvme_keep_alive_end_io(struct request *rq, int error)
+static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
 {
        struct nvme_ctrl *ctrl = rq->end_io_data;
 
        blk_mq_free_request(rq);
 
-       if (error) {
+       if (status) {
                dev_err(ctrl->device,
-                       "failed nvme_keep_alive_end_io error=%d\n", error);
+                       "failed nvme_keep_alive_end_io error=%d\n",
+                               status);
                return;
        }
 
@@ -599,7 +731,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
        if (nvme_keep_alive(ctrl)) {
                /* allocation failure, reset the controller */
                dev_err(ctrl->device, "keep-alive failed\n");
-               ctrl->ops->reset_ctrl(ctrl);
+               nvme_reset_ctrl(ctrl);
                return;
        }
 }
@@ -623,7 +755,7 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
 
-int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
+static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 {
        struct nvme_command c = { };
        int error;
@@ -643,6 +775,77 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
        return error;
 }
 
+static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
+{
+       struct nvme_command c = { };
+       int status;
+       void *data;
+       int pos;
+       int len;
+
+       c.identify.opcode = nvme_admin_identify;
+       c.identify.nsid = cpu_to_le32(nsid);
+       c.identify.cns = NVME_ID_CNS_NS_DESC_LIST;
+
+       data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, data,
+                                     NVME_IDENTIFY_DATA_SIZE);
+       if (status)
+               goto free_data;
+
+       for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) {
+               struct nvme_ns_id_desc *cur = data + pos;
+
+               if (cur->nidl == 0)
+                       break;
+
+               switch (cur->nidt) {
+               case NVME_NIDT_EUI64:
+                       if (cur->nidl != NVME_NIDT_EUI64_LEN) {
+                               dev_warn(ns->ctrl->device,
+                                        "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n",
+                                        cur->nidl);
+                               goto free_data;
+                       }
+                       len = NVME_NIDT_EUI64_LEN;
+                       memcpy(ns->eui, data + pos + sizeof(*cur), len);
+                       break;
+               case NVME_NIDT_NGUID:
+                       if (cur->nidl != NVME_NIDT_NGUID_LEN) {
+                               dev_warn(ns->ctrl->device,
+                                        "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n",
+                                        cur->nidl);
+                               goto free_data;
+                       }
+                       len = NVME_NIDT_NGUID_LEN;
+                       memcpy(ns->nguid, data + pos + sizeof(*cur), len);
+                       break;
+               case NVME_NIDT_UUID:
+                       if (cur->nidl != NVME_NIDT_UUID_LEN) {
+                               dev_warn(ns->ctrl->device,
+                                        "ctrl returned bogus length: %d for NVME_NIDT_UUID\n",
+                                        cur->nidl);
+                               goto free_data;
+                       }
+                       len = NVME_NIDT_UUID_LEN;
+                       uuid_copy(&ns->uuid, data + pos + sizeof(*cur));
+                       break;
+               default:
+                       /* Skip unnkown types */
+                       len = cur->nidl;
+                       break;
+               }
+
+               len += sizeof(*cur);
+       }
+free_data:
+       kfree(data);
+       return status;
+}
+
 static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
 {
        struct nvme_command c = { };
@@ -653,7 +856,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
        return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
 }
 
-int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
+static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
                struct nvme_id_ns **id)
 {
        struct nvme_command c = { };
@@ -675,26 +878,7 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
        return error;
 }
 
-int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
-                     void *buffer, size_t buflen, u32 *result)
-{
-       struct nvme_command c;
-       union nvme_result res;
-       int ret;
-
-       memset(&c, 0, sizeof(c));
-       c.features.opcode = nvme_admin_get_features;
-       c.features.nsid = cpu_to_le32(nsid);
-       c.features.fid = cpu_to_le32(fid);
-
-       ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, buffer, buflen, 0,
-                       NVME_QID_ANY, 0, 0);
-       if (ret >= 0 && result)
-               *result = le32_to_cpu(res.u32);
-       return ret;
-}
-
-int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
+static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
                      void *buffer, size_t buflen, u32 *result)
 {
        struct nvme_command c;
@@ -713,28 +897,6 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
        return ret;
 }
 
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
-{
-       struct nvme_command c = { };
-       int error;
-
-       c.common.opcode = nvme_admin_get_log_page,
-       c.common.nsid = cpu_to_le32(0xFFFFFFFF),
-       c.common.cdw10[0] = cpu_to_le32(
-                       (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
-                        NVME_LOG_SMART),
-
-       *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
-       if (!*log)
-               return -ENOMEM;
-
-       error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
-                       sizeof(struct nvme_smart_log));
-       if (error)
-               kfree(*log);
-       return error;
-}
-
 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
 {
        u32 q_count = (*count - 1) | ((*count - 1) << 16);
@@ -752,7 +914,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
         * access to the admin queue, as that might be only way to fix them up.
         */
        if (status > 0) {
-               dev_err(ctrl->dev, "Could not set queue count (%d)\n", status);
+               dev_err(ctrl->device, "Could not set queue count (%d)\n", status);
                *count = 0;
        } else {
                nr_io_queues = min(result & 0xffff, result >> 16) + 1;
@@ -870,12 +1032,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
                return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
        case NVME_IOCTL_SUBMIT_IO:
                return nvme_submit_io(ns, (void __user *)arg);
-#ifdef CONFIG_BLK_DEV_NVME_SCSI
-       case SG_GET_VERSION_NUM:
-               return nvme_sg_get_version_num((void __user *)arg);
-       case SG_IO:
-               return nvme_sg_io(ns, (void __user *)arg);
-#endif
        default:
 #ifdef CONFIG_NVM
                if (ns->ndev)
@@ -892,10 +1048,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
 static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
                        unsigned int cmd, unsigned long arg)
 {
-       switch (cmd) {
-       case SG_IO:
-               return -ENOIOCTLCMD;
-       }
        return nvme_ioctl(bdev, mode, cmd, arg);
 }
 #else
@@ -983,6 +1135,12 @@ static void nvme_init_integrity(struct nvme_ns *ns)
 }
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
+static void nvme_set_chunk_size(struct nvme_ns *ns)
+{
+       u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
+       blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
+}
+
 static void nvme_config_discard(struct nvme_ns *ns)
 {
        struct nvme_ctrl *ctrl = ns->ctrl;
@@ -991,8 +1149,15 @@ static void nvme_config_discard(struct nvme_ns *ns)
        BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
                        NVME_DSM_MAX_RANGES);
 
-       ns->queue->limits.discard_alignment = logical_block_size;
-       ns->queue->limits.discard_granularity = logical_block_size;
+       if (ctrl->nr_streams && ns->sws && ns->sgs) {
+               unsigned int sz = logical_block_size * ns->sws * ns->sgs;
+
+               ns->queue->limits.discard_alignment = sz;
+               ns->queue->limits.discard_granularity = sz;
+       } else {
+               ns->queue->limits.discard_alignment = logical_block_size;
+               ns->queue->limits.discard_granularity = logical_block_size;
+       }
        blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
        blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
        queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
@@ -1016,7 +1181,15 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
        if (ns->ctrl->vs >= NVME_VS(1, 1, 0))
                memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui));
        if (ns->ctrl->vs >= NVME_VS(1, 2, 0))
-               memcpy(ns->uuid, (*id)->nguid, sizeof(ns->uuid));
+               memcpy(ns->nguid, (*id)->nguid, sizeof(ns->nguid));
+       if (ns->ctrl->vs >= NVME_VS(1, 3, 0)) {
+                /* Don't treat error as fatal we potentially
+                 * already have a NGUID or EUI-64
+                 */
+               if (nvme_identify_ns_descs(ns, ns->ns_id))
+                       dev_warn(ns->ctrl->device,
+                                "%s: Identify Descriptors failed\n", __func__);
+       }
 
        return 0;
 }
@@ -1024,6 +1197,7 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
 static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 {
        struct nvme_ns *ns = disk->private_data;
+       struct nvme_ctrl *ctrl = ns->ctrl;
        u16 bs;
 
        /*
@@ -1034,12 +1208,15 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
        if (ns->lba_shift == 0)
                ns->lba_shift = 9;
        bs = 1 << ns->lba_shift;
+       ns->noiob = le16_to_cpu(id->noiob);
 
        blk_mq_freeze_queue(disk->queue);
 
-       if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
+       if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
                nvme_prep_integrity(disk, id, bs);
        blk_queue_logical_block_size(ns->queue, bs);
+       if (ns->noiob)
+               nvme_set_chunk_size(ns);
        if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
                nvme_init_integrity(ns);
        if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
@@ -1047,7 +1224,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
        else
                set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
 
-       if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
+       if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
                nvme_config_discard(ns);
        blk_mq_unfreeze_queue(disk->queue);
 }
@@ -1283,7 +1460,7 @@ EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
 
 int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
 {
-       unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies;
+       unsigned long timeout = jiffies + (shutdown_timeout * HZ);
        u32 csts;
        int ret;
 
@@ -1372,7 +1549,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
        if (!table)
                return;
 
-       if (ctrl->ps_max_latency_us == 0) {
+       if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) {
                /* Turn off APST. */
                apste = 0;
                dev_dbg(ctrl->device, "APST disabled\n");
@@ -1528,6 +1705,31 @@ static bool quirk_matches(const struct nvme_id_ctrl *id,
                string_matches(id->fr, q->fr, sizeof(id->fr));
 }
 
+static void nvme_init_subnqn(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+{
+       size_t nqnlen;
+       int off;
+
+       nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
+       if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
+               strcpy(ctrl->subnqn, id->subnqn);
+               return;
+       }
+
+       if (ctrl->vs >= NVME_VS(1, 2, 1))
+               dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n");
+
+       /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */
+       off = snprintf(ctrl->subnqn, NVMF_NQN_SIZE,
+                       "nqn.2014.08.org.nvmexpress:%4x%4x",
+                       le16_to_cpu(id->vid), le16_to_cpu(id->ssvid));
+       memcpy(ctrl->subnqn + off, id->sn, sizeof(id->sn));
+       off += sizeof(id->sn);
+       memcpy(ctrl->subnqn + off, id->mn, sizeof(id->mn));
+       off += sizeof(id->mn);
+       memset(ctrl->subnqn + off, 0, sizeof(ctrl->subnqn) - off);
+}
+
 /*
  * Initialize the cached copies of the Identify data and various controller
  * register in our nvme_ctrl structure.  This should be called as soon as
@@ -1539,7 +1741,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        u64 cap;
        int ret, page_shift;
        u32 max_hw_sectors;
-       u8 prev_apsta;
+       bool prev_apst_enabled;
 
        ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
        if (ret) {
@@ -1563,6 +1765,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
                return -EIO;
        }
 
+       nvme_init_subnqn(ctrl, id);
+
        if (!ctrl->identified) {
                /*
                 * Check for quirks.  Quirk can depend on firmware version,
@@ -1582,7 +1786,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        }
 
        if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) {
-               dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
+               dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
                ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;
        }
 
@@ -1607,16 +1811,17 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        ctrl->kas = le16_to_cpu(id->kas);
 
        ctrl->npss = id->npss;
-       prev_apsta = ctrl->apsta;
+       ctrl->apsta = id->apsta;
+       prev_apst_enabled = ctrl->apst_enabled;
        if (ctrl->quirks & NVME_QUIRK_NO_APST) {
                if (force_apst && id->apsta) {
-                       dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
-                       ctrl->apsta = 1;
+                       dev_warn(ctrl->device, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
+                       ctrl->apst_enabled = true;
                } else {
-                       ctrl->apsta = 0;
+                       ctrl->apst_enabled = false;
                }
        } else {
-               ctrl->apsta = id->apsta;
+               ctrl->apst_enabled = id->apsta;
        }
        memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
 
@@ -1634,22 +1839,25 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
                        ret = -EINVAL;
 
                if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
-                       dev_err(ctrl->dev,
+                       dev_err(ctrl->device,
                                "keep-alive support is mandatory for fabrics\n");
                        ret = -EINVAL;
                }
        } else {
                ctrl->cntlid = le16_to_cpu(id->cntlid);
+               ctrl->hmpre = le32_to_cpu(id->hmpre);
+               ctrl->hmmin = le32_to_cpu(id->hmmin);
        }
 
        kfree(id);
 
-       if (ctrl->apsta && !prev_apsta)
+       if (ctrl->apst_enabled && !prev_apst_enabled)
                dev_pm_qos_expose_latency_tolerance(ctrl->device);
-       else if (!ctrl->apsta && prev_apsta)
+       else if (!ctrl->apst_enabled && prev_apst_enabled)
                dev_pm_qos_hide_latency_tolerance(ctrl->device);
 
        nvme_configure_apst(ctrl);
+       nvme_configure_directives(ctrl);
 
        ctrl->identified = true;
 
@@ -1735,7 +1943,7 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
                return nvme_dev_user_cmd(ctrl, argp);
        case NVME_IOCTL_RESET:
                dev_warn(ctrl->device, "resetting controller\n");
-               return ctrl->ops->reset_ctrl(ctrl);
+               return nvme_reset_ctrl_sync(ctrl);
        case NVME_IOCTL_SUBSYS_RESET:
                return nvme_reset_subsystem(ctrl);
        case NVME_IOCTL_RESCAN:
@@ -1761,7 +1969,7 @@ static ssize_t nvme_sysfs_reset(struct device *dev,
        struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
        int ret;
 
-       ret = ctrl->ops->reset_ctrl(ctrl);
+       ret = nvme_reset_ctrl_sync(ctrl);
        if (ret < 0)
                return ret;
        return count;
@@ -1787,8 +1995,8 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
        int serial_len = sizeof(ctrl->serial);
        int model_len = sizeof(ctrl->model);
 
-       if (memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
-               return sprintf(buf, "eui.%16phN\n", ns->uuid);
+       if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+               return sprintf(buf, "eui.%16phN\n", ns->nguid);
 
        if (memchr_inv(ns->eui, 0, sizeof(ns->eui)))
                return sprintf(buf, "eui.%8phN\n", ns->eui);
@@ -1803,11 +2011,28 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL);
 
+static ssize_t nguid_show(struct device *dev, struct device_attribute *attr,
+                         char *buf)
+{
+       struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
+       return sprintf(buf, "%pU\n", ns->nguid);
+}
+static DEVICE_ATTR(nguid, S_IRUGO, nguid_show, NULL);
+
 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
                                                                char *buf)
 {
        struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
-       return sprintf(buf, "%pU\n", ns->uuid);
+
+       /* For backward compatibility expose the NGUID to userspace if
+        * we have no UUID set
+        */
+       if (uuid_is_null(&ns->uuid)) {
+               printk_ratelimited(KERN_WARNING
+                                  "No UUID available providing old NGUID\n");
+               return sprintf(buf, "%pU\n", ns->nguid);
+       }
+       return sprintf(buf, "%pU\n", &ns->uuid);
 }
 static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
 
@@ -1830,6 +2055,7 @@ static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
 static struct attribute *nvme_ns_attrs[] = {
        &dev_attr_wwid.attr,
        &dev_attr_uuid.attr,
+       &dev_attr_nguid.attr,
        &dev_attr_eui.attr,
        &dev_attr_nsid.attr,
        NULL,
@@ -1842,7 +2068,12 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
        struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
 
        if (a == &dev_attr_uuid.attr) {
-               if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
+               if (uuid_is_null(&ns->uuid) ||
+                   !memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+                       return 0;
+       }
+       if (a == &dev_attr_nguid.attr) {
+               if (!memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
                        return 0;
        }
        if (a == &dev_attr_eui.attr) {
@@ -1931,8 +2162,7 @@ static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
 {
        struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%s\n",
-                       ctrl->ops->get_subsysnqn(ctrl));
+       return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->subnqn);
 }
 static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
 
@@ -1961,24 +2191,16 @@ static struct attribute *nvme_dev_attrs[] = {
        NULL
 };
 
-#define CHECK_ATTR(ctrl, a, name)              \
-       if ((a) == &dev_attr_##name.attr &&     \
-           !(ctrl)->ops->get_##name)           \
-               return 0
-
 static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
                struct attribute *a, int n)
 {
        struct device *dev = container_of(kobj, struct device, kobj);
        struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
 
-       if (a == &dev_attr_delete_controller.attr) {
-               if (!ctrl->ops->delete_ctrl)
-                       return 0;
-       }
-
-       CHECK_ATTR(ctrl, a, subsysnqn);
-       CHECK_ATTR(ctrl, a, address);
+       if (a == &dev_attr_delete_controller.attr && !ctrl->ops->delete_ctrl)
+               return 0;
+       if (a == &dev_attr_address.attr && !ctrl->ops->get_address)
+               return 0;
 
        return a->mode;
 }
@@ -2019,6 +2241,32 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
        return ret;
 }
 
+static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
+{
+       struct streams_directive_params s;
+       int ret;
+
+       if (!ctrl->nr_streams)
+               return 0;
+
+       ret = nvme_get_stream_params(ctrl, &s, ns->ns_id);
+       if (ret)
+               return ret;
+
+       ns->sws = le32_to_cpu(s.sws);
+       ns->sgs = le16_to_cpu(s.sgs);
+
+       if (ns->sws) {
+               unsigned int bs = 1 << ns->lba_shift;
+
+               blk_queue_io_min(ns->queue, bs * ns->sws);
+               if (ns->sgs)
+                       blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs);
+       }
+
+       return 0;
+}
+
 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
        struct nvme_ns *ns;
@@ -2048,6 +2296,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
        blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
        nvme_set_queue_limits(ctrl, ns->queue);
+       nvme_setup_streams_ns(ctrl, ns);
 
        sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
 
@@ -2056,7 +2305,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
        if (nvme_nvm_ns_supported(ns, id) &&
                                nvme_nvm_register(ns, disk_name, node)) {
-               dev_warn(ctrl->dev, "%s: LightNVM init failure\n", __func__);
+               dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
                goto out_free_id;
        }
 
@@ -2231,7 +2480,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
         * removal.
         */
        if (ctrl->state == NVME_CTRL_LIVE)
-               schedule_work(&ctrl->scan_work);
+               queue_work(nvme_wq, &ctrl->scan_work);
 }
 EXPORT_SYMBOL_GPL(nvme_queue_scan);
 
@@ -2286,7 +2535,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
                /*FALLTHRU*/
        case NVME_SC_ABORT_REQ:
                ++ctrl->event_limit;
-               schedule_work(&ctrl->async_event_work);
+               queue_work(nvme_wq, &ctrl->async_event_work);
                break;
        default:
                break;
@@ -2309,7 +2558,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
 void nvme_queue_async_events(struct nvme_ctrl *ctrl)
 {
        ctrl->event_limit = NVME_NR_AERS;
-       schedule_work(&ctrl->async_event_work);
+       queue_work(nvme_wq, &ctrl->async_event_work);
 }
 EXPORT_SYMBOL_GPL(nvme_queue_async_events);
 
@@ -2442,6 +2691,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 
        mutex_lock(&ctrl->namespaces_mutex);
 
+       /* Forcibly unquiesce queues to avoid blocking dispatch */
+       blk_mq_unquiesce_queue(ctrl->admin_q);
+
        /* Forcibly start all queues to avoid having stuck requests */
        blk_mq_start_hw_queues(ctrl->admin_q);
 
@@ -2455,6 +2707,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
                revalidate_disk(ns->disk);
                blk_set_queue_dying(ns->queue);
 
+               /* Forcibly unquiesce queues to avoid blocking dispatch */
+               blk_mq_unquiesce_queue(ns->queue);
+
                /*
                 * Forcibly start all queues to avoid having stuck requests.
                 * Note that we must ensure the queues are not stopped
@@ -2533,7 +2788,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
 
        mutex_lock(&ctrl->namespaces_mutex);
        list_for_each_entry(ns, &ctrl->namespaces, list) {
-               blk_mq_start_stopped_hw_queues(ns->queue, true);
+               blk_mq_unquiesce_queue(ns->queue);
                blk_mq_kick_requeue_list(ns->queue);
        }
        mutex_unlock(&ctrl->namespaces_mutex);
@@ -2544,10 +2799,15 @@ int __init nvme_core_init(void)
 {
        int result;
 
+       nvme_wq = alloc_workqueue("nvme-wq",
+                       WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+       if (!nvme_wq)
+               return -ENOMEM;
+
        result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
                                                        &nvme_dev_fops);
        if (result < 0)
-               return result;
+               goto destroy_wq;
        else if (result > 0)
                nvme_char_major = result;
 
@@ -2559,8 +2819,10 @@ int __init nvme_core_init(void)
 
        return 0;
 
- unregister_chrdev:
+unregister_chrdev:
        __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+destroy_wq:
+       destroy_workqueue(nvme_wq);
        return result;
 }
 
@@ -2568,6 +2830,7 @@ void nvme_core_exit(void)
 {
        class_destroy(nvme_class);
        __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+       destroy_workqueue(nvme_wq);
 }
 
 MODULE_LICENSE("GPL");
index 990e6fb..2e582a2 100644 (file)
@@ -58,7 +58,6 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn)
 
        kref_init(&host->ref);
        memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
-       uuid_be_gen(&host->id);
 
        list_add_tail(&host->list, &nvmf_hosts);
 out_unlock:
@@ -75,7 +74,6 @@ static struct nvmf_host *nvmf_host_default(void)
                return NULL;
 
        kref_init(&host->ref);
-       uuid_be_gen(&host->id);
        snprintf(host->nqn, NVMF_NQN_SIZE,
                "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id);
 
@@ -128,16 +126,6 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
 EXPORT_SYMBOL_GPL(nvmf_get_address);
 
 /**
- * nvmf_get_subsysnqn() - Get subsystem NQN
- * @ctrl:      Host NVMe controller instance which we got the NQN
- */
-const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl)
-{
-       return ctrl->opts->subsysnqn;
-}
-EXPORT_SYMBOL_GPL(nvmf_get_subsysnqn);
-
-/**
  * nvmf_reg_read32() -  NVMe Fabrics "Property Get" API function.
  * @ctrl:      Host NVMe controller instance maintaining the admin
  *             queue used to submit the property read command to
@@ -337,6 +325,24 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
                        }
                }
                break;
+
+       case NVME_SC_CONNECT_INVALID_HOST:
+               dev_err(ctrl->device,
+                       "Connect for subsystem %s is not allowed, hostnqn: %s\n",
+                       data->subsysnqn, data->hostnqn);
+               break;
+
+       case NVME_SC_CONNECT_CTRL_BUSY:
+               dev_err(ctrl->device,
+                       "Connect command failed: controller is busy or not available\n");
+               break;
+
+       case NVME_SC_CONNECT_FORMAT:
+               dev_err(ctrl->device,
+                       "Connect incompatible format: %d",
+                       cmd->connect.recfmt);
+               break;
+
        default:
                dev_err(ctrl->device,
                        "Connect command failed, error wo/DNR bit: %d\n",
@@ -376,13 +382,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
        cmd.connect.opcode = nvme_fabrics_command;
        cmd.connect.fctype = nvme_fabrics_type_connect;
        cmd.connect.qid = 0;
-
-       /*
-        * fabrics spec sets a minimum of depth 32 for admin queue,
-        * so set the queue with this depth always until
-        * justification otherwise.
-        */
-       cmd.connect.sqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+       cmd.connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
 
        /*
         * Set keep-alive timeout in seconds granularity (ms * 1000)
@@ -395,7 +395,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
        if (!data)
                return -ENOMEM;
 
-       memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be));
+       uuid_copy(&data->hostid, &ctrl->opts->host->id);
        data->cntlid = cpu_to_le16(0xffff);
        strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
        strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
@@ -454,7 +454,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
        if (!data)
                return -ENOMEM;
 
-       memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be));
+       uuid_copy(&data->hostid, &ctrl->opts->host->id);
        data->cntlid = cpu_to_le16(ctrl->cntlid);
        strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
        strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
@@ -474,7 +474,7 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
 {
        if (ctrl->opts->max_reconnects != -1 &&
-           ctrl->opts->nr_reconnects < ctrl->opts->max_reconnects)
+           ctrl->nr_reconnects < ctrl->opts->max_reconnects)
                return true;
 
        return false;
@@ -547,6 +547,7 @@ static const match_table_t opt_tokens = {
        { NVMF_OPT_KATO,                "keep_alive_tmo=%d"     },
        { NVMF_OPT_HOSTNQN,             "hostnqn=%s"            },
        { NVMF_OPT_HOST_TRADDR,         "host_traddr=%s"        },
+       { NVMF_OPT_HOST_ID,             "hostid=%s"             },
        { NVMF_OPT_ERR,                 NULL                    }
 };
 
@@ -558,6 +559,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
        int token, ret = 0;
        size_t nqnlen  = 0;
        int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
+       uuid_t hostid;
 
        /* Set defaults */
        opts->queue_size = NVMF_DEF_QUEUE_SIZE;
@@ -568,6 +570,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
        if (!options)
                return -ENOMEM;
 
+       uuid_gen(&hostid);
+
        while ((p = strsep(&o, ",\n")) != NULL) {
                if (!*p)
                        continue;
@@ -724,6 +728,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                        }
                        opts->host_traddr = p;
                        break;
+               case NVMF_OPT_HOST_ID:
+                       p = match_strdup(args);
+                       if (!p) {
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+                       if (uuid_parse(p, &hostid)) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       break;
                default:
                        pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
                                p);
@@ -743,6 +758,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                opts->host = nvmf_default_host;
        }
 
+       uuid_copy(&opts->host->id, &hostid);
+
 out:
        if (!opts->discovery_nqn && !opts->kato)
                opts->kato = NVME_DEFAULT_KATO;
@@ -803,7 +820,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
 
 #define NVMF_REQUIRED_OPTS     (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
 #define NVMF_ALLOWED_OPTS      (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
-                                NVMF_OPT_KATO | NVMF_OPT_HOSTNQN)
+                                NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
+                                NVMF_OPT_HOST_ID)
 
 static struct nvme_ctrl *
 nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
@@ -854,6 +872,15 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
                goto out_unlock;
        }
 
+       if (strcmp(ctrl->subnqn, opts->subsysnqn)) {
+               dev_warn(ctrl->device,
+                       "controller returned incorrect NQN: \"%s\".\n",
+                       ctrl->subnqn);
+               mutex_unlock(&nvmf_transports_mutex);
+               ctrl->ops->delete_ctrl(ctrl);
+               return ERR_PTR(-EINVAL);
+       }
+
        mutex_unlock(&nvmf_transports_mutex);
        return ctrl;
 
index f5a9c1f..bf33663 100644 (file)
@@ -36,7 +36,7 @@ struct nvmf_host {
        struct kref             ref;
        struct list_head        list;
        char                    nqn[NVMF_NQN_SIZE];
-       uuid_be                 id;
+       uuid_                 id;
 };
 
 /**
@@ -56,6 +56,7 @@ enum {
        NVMF_OPT_RECONNECT_DELAY = 1 << 9,
        NVMF_OPT_HOST_TRADDR    = 1 << 10,
        NVMF_OPT_CTRL_LOSS_TMO  = 1 << 11,
+       NVMF_OPT_HOST_ID        = 1 << 12,
 };
 
 /**
@@ -80,7 +81,6 @@ enum {
  * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN.
  * @kato:      Keep-alive timeout.
  * @host:      Virtual NVMe host, contains the NQN and Host ID.
- * @nr_reconnects: number of reconnect attempted since the last ctrl failure
  * @max_reconnects: maximum number of allowed reconnect attempts before removing
  *              the controller, (-1) means reconnect forever, zero means remove
  *              immediately;
@@ -98,7 +98,6 @@ struct nvmf_ctrl_options {
        bool                    discovery_nqn;
        unsigned int            kato;
        struct nvmf_host        *host;
-       int                     nr_reconnects;
        int                     max_reconnects;
 };
 
@@ -140,7 +139,6 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
 int nvmf_register_transport(struct nvmf_transport_ops *ops);
 void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
 void nvmf_free_options(struct nvmf_ctrl_options *opts);
-const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);
 int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
 
index 92964ce..ed87214 100644 (file)
@@ -36,7 +36,7 @@
  */
 #define NVME_FC_NR_AEN_COMMANDS        1
 #define NVME_FC_AQ_BLKMQ_DEPTH \
-       (NVMF_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
+       (NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
 #define AEN_CMDID_BASE         (NVME_FC_AQ_BLKMQ_DEPTH + 1)
 
 enum nvme_fc_queue_flags {
@@ -161,12 +161,12 @@ struct nvme_fc_ctrl {
        struct blk_mq_tag_set   tag_set;
 
        struct work_struct      delete_work;
-       struct work_struct      reset_work;
        struct delayed_work     connect_work;
 
        struct kref             ref;
        u32                     flags;
        u32                     iocnt;
+       wait_queue_head_t       ioabort_wait;
 
        struct nvme_fc_fcp_op   aen_ops[NVME_FC_NR_AEN_COMMANDS];
 
@@ -214,7 +214,6 @@ static LIST_HEAD(nvme_fc_lport_list);
 static DEFINE_IDA(nvme_fc_local_port_cnt);
 static DEFINE_IDA(nvme_fc_ctrl_cnt);
 
-static struct workqueue_struct *nvme_fc_wq;
 
 
 
@@ -878,8 +877,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
        assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize);
        /* Linux supports only Dynamic controllers */
        assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff);
-       memcpy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id,
-               min_t(size_t, FCNVME_ASSOC_HOSTID_LEN, sizeof(uuid_be)));
+       uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id);
        strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn,
                min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE));
        strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn,
@@ -1242,8 +1240,10 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
 
        spin_lock_irqsave(&ctrl->lock, flags);
        if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
-               if (ctrl->flags & FCCTRL_TERMIO)
-                       ctrl->iocnt--;
+               if (ctrl->flags & FCCTRL_TERMIO) {
+                       if (!--ctrl->iocnt)
+                               wake_up(&ctrl->ioabort_wait);
+               }
        }
        if (op->flags & FCOP_FLAGS_RELEASED)
                complete_rq = true;
@@ -1450,18 +1450,8 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
 {
        struct nvme_fc_ctrl *ctrl = set->driver_data;
        struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
-       struct nvme_fc_queue *queue = &ctrl->queues[hctx_idx+1];
-
-       return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
-}
-
-static int
-nvme_fc_init_admin_request(struct blk_mq_tag_set *set, struct request *rq,
-               unsigned int hctx_idx, unsigned int numa_node)
-{
-       struct nvme_fc_ctrl *ctrl = set->driver_data;
-       struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
-       struct nvme_fc_queue *queue = &ctrl->queues[0];
+       int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
+       struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
 
        return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
 }
@@ -1759,16 +1749,16 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
 static void
 nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
 {
+       /* only proceed if in LIVE state - e.g. on first error */
+       if (ctrl->ctrl.state != NVME_CTRL_LIVE)
+               return;
+
        dev_warn(ctrl->ctrl.device,
                "NVME-FC{%d}: transport association error detected: %s\n",
                ctrl->cnum, errmsg);
        dev_warn(ctrl->ctrl.device,
                "NVME-FC{%d}: resetting controller\n", ctrl->cnum);
 
-       /* stop the queues on error, cleanup is in reset thread */
-       if (ctrl->queue_count > 1)
-               nvme_stop_queues(&ctrl->ctrl);
-
        if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
                dev_err(ctrl->ctrl.device,
                        "NVME-FC{%d}: error_recovery: Couldn't change state "
@@ -1776,10 +1766,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
                return;
        }
 
-       if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
-               dev_err(ctrl->ctrl.device,
-                       "NVME-FC{%d}: error_recovery: Failed to schedule "
-                       "reset work\n", ctrl->cnum);
+       nvme_reset_ctrl(&ctrl->ctrl);
 }
 
 static enum blk_eh_timer_return
@@ -1888,7 +1875,7 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
  * level FC exchange resource that is also outstanding. This must be
  * considered in all cleanup operations.
  */
-static int
+static blk_status_t
 nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
        struct nvme_fc_fcp_op *op, u32 data_len,
        enum nvmefc_fcp_datadir io_dir)
@@ -1903,10 +1890,10 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
         * the target device is present
         */
        if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
-               return BLK_MQ_RQ_QUEUE_ERROR;
+               return BLK_STS_IOERR;
 
        if (!nvme_fc_ctrl_get(ctrl))
-               return BLK_MQ_RQ_QUEUE_ERROR;
+               return BLK_STS_IOERR;
 
        /* format the FC-NVME CMD IU and fcp_req */
        cmdiu->connection_id = cpu_to_be64(queue->connection_id);
@@ -1954,8 +1941,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
                if (ret < 0) {
                        nvme_cleanup_cmd(op->rq);
                        nvme_fc_ctrl_put(ctrl);
-                       return (ret == -ENOMEM || ret == -EAGAIN) ?
-                               BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+                       if (ret == -ENOMEM || ret == -EAGAIN)
+                               return BLK_STS_RESOURCE;
+                       return BLK_STS_IOERR;
                }
        }
 
@@ -1972,28 +1960,26 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
                                        queue->lldd_handle, &op->fcp_req);
 
        if (ret) {
-               if (op->rq) {                   /* normal request */
+               if (op->rq)                     /* normal request */
                        nvme_fc_unmap_data(ctrl, op->rq, op);
-                       nvme_cleanup_cmd(op->rq);
-               }
                /* else - aen. no cleanup needed */
 
                nvme_fc_ctrl_put(ctrl);
 
                if (ret != -EBUSY)
-                       return BLK_MQ_RQ_QUEUE_ERROR;
+                       return BLK_STS_IOERR;
 
                if (op->rq) {
                        blk_mq_stop_hw_queues(op->rq->q);
                        blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
                }
-               return BLK_MQ_RQ_QUEUE_BUSY;
+               return BLK_STS_RESOURCE;
        }
 
-       return BLK_MQ_RQ_QUEUE_OK;
+       return BLK_STS_OK;
 }
 
-static int
+static blk_status_t
 nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
                        const struct blk_mq_queue_data *bd)
 {
@@ -2006,7 +1992,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
        struct nvme_command *sqe = &cmdiu->sqe;
        enum nvmefc_fcp_datadir io_dir;
        u32 data_len;
-       int ret;
+       blk_status_t ret;
 
        ret = nvme_setup_cmd(ns, rq, sqe);
        if (ret)
@@ -2061,7 +2047,7 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
        struct nvme_fc_fcp_op *aen_op;
        unsigned long flags;
        bool terminating = false;
-       int ret;
+       blk_status_t ret;
 
        if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
                return;
@@ -2093,7 +2079,6 @@ __nvme_fc_final_op_cleanup(struct request *rq)
        op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED |
                        FCOP_FLAGS_COMPLETE);
 
-       nvme_cleanup_cmd(rq);
        nvme_fc_unmap_data(ctrl, rq, op);
        nvme_complete_rq(rq);
        nvme_fc_ctrl_put(ctrl);
@@ -2311,7 +2296,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
        int ret;
        bool changed;
 
-       ++ctrl->ctrl.opts->nr_reconnects;
+       ++ctrl->ctrl.nr_reconnects;
 
        /*
         * Create the admin queue
@@ -2408,7 +2393,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
        changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
        WARN_ON_ONCE(!changed);
 
-       ctrl->ctrl.opts->nr_reconnects = 0;
+       ctrl->ctrl.nr_reconnects = 0;
 
        if (ctrl->queue_count > 1) {
                nvme_start_queues(&ctrl->ctrl);
@@ -2494,11 +2479,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 
        /* wait for all io that had to be aborted */
        spin_lock_irqsave(&ctrl->lock, flags);
-       while (ctrl->iocnt) {
-               spin_unlock_irqrestore(&ctrl->lock, flags);
-               msleep(1000);
-               spin_lock_irqsave(&ctrl->lock, flags);
-       }
+       wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
        ctrl->flags &= ~FCCTRL_TERMIO;
        spin_unlock_irqrestore(&ctrl->lock, flags);
 
@@ -2528,7 +2509,7 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
        struct nvme_fc_ctrl *ctrl =
                container_of(work, struct nvme_fc_ctrl, delete_work);
 
-       cancel_work_sync(&ctrl->reset_work);
+       cancel_work_sync(&ctrl->ctrl.reset_work);
        cancel_delayed_work_sync(&ctrl->connect_work);
 
        /*
@@ -2555,7 +2536,7 @@ __nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl)
        if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
                return true;
 
-       if (!queue_work(nvme_fc_wq, &ctrl->delete_work))
+       if (!queue_work(nvme_wq, &ctrl->delete_work))
                return true;
 
        return false;
@@ -2582,7 +2563,7 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
        ret = __nvme_fc_del_ctrl(ctrl);
 
        if (!ret)
-               flush_workqueue(nvme_fc_wq);
+               flush_workqueue(nvme_wq);
 
        nvme_put_ctrl(&ctrl->ctrl);
 
@@ -2607,13 +2588,13 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
                dev_info(ctrl->ctrl.device,
                        "NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
                        ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
-               queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
+               queue_delayed_work(nvme_wq, &ctrl->connect_work,
                                ctrl->ctrl.opts->reconnect_delay * HZ);
        } else {
                dev_warn(ctrl->ctrl.device,
                                "NVME-FC{%d}: Max reconnect attempts (%d) "
                                "reached. Removing controller\n",
-                               ctrl->cnum, ctrl->ctrl.opts->nr_reconnects);
+                               ctrl->cnum, ctrl->ctrl.nr_reconnects);
                WARN_ON(__nvme_fc_schedule_delete_work(ctrl));
        }
 }
@@ -2622,7 +2603,7 @@ static void
 nvme_fc_reset_ctrl_work(struct work_struct *work)
 {
        struct nvme_fc_ctrl *ctrl =
-                       container_of(work, struct nvme_fc_ctrl, reset_work);
+               container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
        int ret;
 
        /* will block will waiting for io to terminate */
@@ -2636,29 +2617,6 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
                        "NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
 }
 
-/*
- * called by the nvme core layer, for sysfs interface that requests
- * a reset of the nvme controller
- */
-static int
-nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
-{
-       struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
-
-       dev_info(ctrl->ctrl.device,
-               "NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum);
-
-       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
-               return -EBUSY;
-
-       if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
-               return -EBUSY;
-
-       flush_work(&ctrl->reset_work);
-
-       return 0;
-}
-
 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
        .name                   = "fc",
        .module                 = THIS_MODULE,
@@ -2666,11 +2624,9 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
        .reg_read32             = nvmf_reg_read32,
        .reg_read64             = nvmf_reg_read64,
        .reg_write32            = nvmf_reg_write32,
-       .reset_ctrl             = nvme_fc_reset_nvme_ctrl,
        .free_ctrl              = nvme_fc_nvme_ctrl_freed,
        .submit_async_event     = nvme_fc_submit_async_event,
        .delete_ctrl            = nvme_fc_del_nvme_ctrl,
-       .get_subsysnqn          = nvmf_get_subsysnqn,
        .get_address            = nvmf_get_address,
 };
 
@@ -2696,7 +2652,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
 static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
        .queue_rq       = nvme_fc_queue_rq,
        .complete       = nvme_fc_complete_rq,
-       .init_request   = nvme_fc_init_admin_request,
+       .init_request   = nvme_fc_init_request,
        .exit_request   = nvme_fc_exit_request,
        .reinit_request = nvme_fc_reinit_request,
        .init_hctx      = nvme_fc_init_admin_hctx,
@@ -2741,7 +2697,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
        kref_init(&ctrl->ref);
 
        INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
-       INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work);
+       INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
        INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
        spin_lock_init(&ctrl->lock);
 
@@ -2808,6 +2764,9 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
                nvme_uninit_ctrl(&ctrl->ctrl);
                nvme_put_ctrl(&ctrl->ctrl);
 
+               /* Remove core ctrl ref. */
+               nvme_put_ctrl(&ctrl->ctrl);
+
                /* as we're past the point where we transition to the ref
                 * counting teardown path, if we return a bad pointer here,
                 * the calling routine, thinking it's prior to the
@@ -2966,20 +2925,7 @@ static struct nvmf_transport_ops nvme_fc_transport = {
 
 static int __init nvme_fc_init_module(void)
 {
-       int ret;
-
-       nvme_fc_wq = create_workqueue("nvme_fc_wq");
-       if (!nvme_fc_wq)
-               return -ENOMEM;
-
-       ret = nvmf_register_transport(&nvme_fc_transport);
-       if (ret)
-               goto err;
-
-       return 0;
-err:
-       destroy_workqueue(nvme_fc_wq);
-       return ret;
+       return nvmf_register_transport(&nvme_fc_transport);
 }
 
 static void __exit nvme_fc_exit_module(void)
@@ -2990,8 +2936,6 @@ static void __exit nvme_fc_exit_module(void)
 
        nvmf_unregister_transport(&nvme_fc_transport);
 
-       destroy_workqueue(nvme_fc_wq);
-
        ida_destroy(&nvme_fc_local_port_cnt);
        ida_destroy(&nvme_fc_ctrl_cnt);
 }
index f5df78e..be85413 100644 (file)
@@ -242,7 +242,7 @@ static inline void _nvme_nvm_check_size(void)
        BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
        BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
        BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16);
-       BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096);
+       BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE);
        BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
 }
 
@@ -480,7 +480,7 @@ static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
                                        rqd->bio->bi_iter.bi_sector));
 }
 
-static void nvme_nvm_end_io(struct request *rq, int error)
+static void nvme_nvm_end_io(struct request *rq, blk_status_t status)
 {
        struct nvm_rq *rqd = rq->end_io_data;
 
@@ -509,7 +509,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
        rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
        if (IS_ERR(rq)) {
                kfree(cmd);
-               return -ENOMEM;
+               return PTR_ERR(rq);
        }
        rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
 
@@ -571,13 +571,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
        .max_phys_sect          = 64,
 };
 
-static void nvme_nvm_end_user_vio(struct request *rq, int error)
-{
-       struct completion *waiting = rq->end_io_data;
-
-       complete(waiting);
-}
-
 static int nvme_nvm_submit_user_cmd(struct request_queue *q,
                                struct nvme_ns *ns,
                                struct nvme_nvm_command *vcmd,
@@ -608,7 +601,6 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
        rq->timeout = timeout ? timeout : ADMIN_TIMEOUT;
 
        rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
-       rq->end_io_data = &wait;
 
        if (ppa_buf && ppa_len) {
                ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma);
@@ -662,9 +654,7 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
        }
 
 submit:
-       blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_user_vio);
-
-       wait_for_completion_io(&wait);
+       blk_execute_rq(q, NULL, rq, 0);
 
        if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
                ret = -EINTR;
index 9d6a070..d70ff0f 100644 (file)
@@ -27,12 +27,11 @@ extern unsigned char nvme_io_timeout;
 extern unsigned char admin_timeout;
 #define ADMIN_TIMEOUT  (admin_timeout * HZ)
 
-extern unsigned char shutdown_timeout;
-#define SHUTDOWN_TIMEOUT       (shutdown_timeout * HZ)
-
 #define NVME_DEFAULT_KATO      5
 #define NVME_KATO_GRACE                10
 
+extern struct workqueue_struct *nvme_wq;
+
 enum {
        NVME_NS_LBA             = 0,
        NVME_NS_LIGHTNVM        = 1,
@@ -131,6 +130,7 @@ struct nvme_ctrl {
        struct device *device;  /* char device */
        struct list_head node;
        struct ida ns_ida;
+       struct work_struct reset_work;
 
        struct opal_dev *opal_dev;
 
@@ -138,6 +138,7 @@ struct nvme_ctrl {
        char serial[20];
        char model[40];
        char firmware_rev[8];
+       char subnqn[NVMF_NQN_SIZE];
        u16 cntlid;
 
        u32 ctrl_config;
@@ -147,6 +148,8 @@ struct nvme_ctrl {
        u16 oncs;
        u16 vid;
        u16 oacs;
+       u16 nssa;
+       u16 nr_streams;
        atomic_t abort_limit;
        u8 event_limit;
        u8 vwc;
@@ -165,6 +168,10 @@ struct nvme_ctrl {
 
        /* Power saving configuration */
        u64 ps_max_latency_us;
+       bool apst_enabled;
+
+       u32 hmpre;
+       u32 hmmin;
 
        /* Fabrics only */
        u16 sqsize;
@@ -172,12 +179,10 @@ struct nvme_ctrl {
        u32 iorcsz;
        u16 icdoff;
        u16 maxcmd;
+       int nr_reconnects;
        struct nvmf_ctrl_options *opts;
 };
 
-/*
- * An NVM Express namespace is equivalent to a SCSI LUN
- */
 struct nvme_ns {
        struct list_head list;
 
@@ -189,14 +194,18 @@ struct nvme_ns {
        int instance;
 
        u8 eui[8];
-       u8 uuid[16];
+       u8 nguid[16];
+       uuid_t uuid;
 
        unsigned ns_id;
        int lba_shift;
        u16 ms;
+       u16 sgs;
+       u32 sws;
        bool ext;
        u8 pi_type;
        unsigned long flags;
+       u16 noiob;
 
 #define NVME_NS_REMOVING 0
 #define NVME_NS_DEAD     1
@@ -214,11 +223,9 @@ struct nvme_ctrl_ops {
        int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
        int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
        int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
-       int (*reset_ctrl)(struct nvme_ctrl *ctrl);
        void (*free_ctrl)(struct nvme_ctrl *ctrl);
        void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx);
        int (*delete_ctrl)(struct nvme_ctrl *ctrl);
-       const char *(*get_subsysnqn)(struct nvme_ctrl *ctrl);
        int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
 };
 
@@ -296,7 +303,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl);
 #define NVME_QID_ANY -1
 struct request *nvme_alloc_request(struct request_queue *q,
                struct nvme_command *cmd, unsigned int flags, int qid);
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
                struct nvme_command *cmd);
 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
                void *buf, unsigned bufflen);
@@ -310,23 +317,10 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
                void __user *ubuffer, unsigned bufflen,
                void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
                u32 *result, unsigned timeout);
-int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
-int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
-               struct nvme_id_ns **id);
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log);
-int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
-                     void *buffer, size_t buflen, u32 *result);
-int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
-                     void *buffer, size_t buflen, u32 *result);
 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
 void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
 void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
-
-struct sg_io_hdr;
-
-int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
-int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
-int nvme_sg_get_version_num(int __user *ip);
+int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
 
 #ifdef CONFIG_NVM
 int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
index 951042a..33c3b9d 100644 (file)
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
 #include <linux/blk-mq-pci.h>
-#include <linux/cpu.h>
-#include <linux/delay.h>
 #include <linux/dmi.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/hdreg.h>
-#include <linux/idr.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/kdev_t.h>
-#include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/mutex.h>
 #include <linux/pci.h>
 #include <linux/poison.h>
-#include <linux/ptrace.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
 #include <linux/t10-pi.h>
 #include <linux/timer.h>
 #include <linux/types.h>
@@ -49,7 +36,6 @@
 #include "nvme.h"
 
 #define NVME_Q_DEPTH           1024
-#define NVME_AQ_DEPTH          256
 #define SQ_SIZE(depth)         (depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)         (depth * sizeof(struct nvme_completion))
 
@@ -66,12 +52,14 @@ static bool use_cmb_sqes = true;
 module_param(use_cmb_sqes, bool, 0644);
 MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
 
-static struct workqueue_struct *nvme_workq;
+static unsigned int max_host_mem_size_mb = 128;
+module_param(max_host_mem_size_mb, uint, 0444);
+MODULE_PARM_DESC(max_host_mem_size_mb,
+       "Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
 
 struct nvme_dev;
 struct nvme_queue;
 
-static int nvme_reset(struct nvme_dev *dev);
 static void nvme_process_cq(struct nvme_queue *nvmeq);
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
 
@@ -92,9 +80,8 @@ struct nvme_dev {
        int q_depth;
        u32 db_stride;
        void __iomem *bar;
-       struct work_struct reset_work;
+       unsigned long bar_mapped_size;
        struct work_struct remove_work;
-       struct timer_list watchdog_timer;
        struct mutex shutdown_lock;
        bool subsystem;
        void __iomem *cmb;
@@ -104,10 +91,18 @@ struct nvme_dev {
        u32 cmbloc;
        struct nvme_ctrl ctrl;
        struct completion ioq_wait;
+
+       /* shadow doorbell buffer support: */
        u32 *dbbuf_dbs;
        dma_addr_t dbbuf_dbs_dma_addr;
        u32 *dbbuf_eis;
        dma_addr_t dbbuf_eis_dma_addr;
+
+       /* host memory buffer support: */
+       u64 host_mem_size;
+       u32 nr_host_mem_descs;
+       struct nvme_host_mem_buf_desc *host_mem_descs;
+       void **host_mem_desc_bufs;
 };
 
 static inline unsigned int sq_idx(unsigned int qid, u32 stride)
@@ -185,8 +180,8 @@ static inline void _nvme_check_size(void)
        BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
        BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
        BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
-       BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
-       BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
+       BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
+       BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
        BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
        BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
        BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
@@ -350,19 +345,6 @@ static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_i
        nvmeq->tags = NULL;
 }
 
-static int nvme_admin_init_request(struct blk_mq_tag_set *set,
-               struct request *req, unsigned int hctx_idx,
-               unsigned int numa_node)
-{
-       struct nvme_dev *dev = set->driver_data;
-       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-       struct nvme_queue *nvmeq = dev->queues[0];
-
-       BUG_ON(!nvmeq);
-       iod->nvmeq = nvmeq;
-       return 0;
-}
-
 static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
                          unsigned int hctx_idx)
 {
@@ -382,7 +364,8 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
 {
        struct nvme_dev *dev = set->driver_data;
        struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-       struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
+       int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0;
+       struct nvme_queue *nvmeq = dev->queues[queue_idx];
 
        BUG_ON(!nvmeq);
        iod->nvmeq = nvmeq;
@@ -427,7 +410,7 @@ static __le64 **iod_list(struct request *req)
        return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
 }
 
-static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
+static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 {
        struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
        int nseg = blk_rq_nr_phys_segments(rq);
@@ -436,7 +419,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
        if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
                iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
                if (!iod->sg)
-                       return BLK_MQ_RQ_QUEUE_BUSY;
+                       return BLK_STS_RESOURCE;
        } else {
                iod->sg = iod->inline_sg;
        }
@@ -446,7 +429,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
        iod->nents = 0;
        iod->length = size;
 
-       return BLK_MQ_RQ_QUEUE_OK;
+       return BLK_STS_OK;
 }
 
 static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
@@ -616,21 +599,21 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
        return true;
 }
 
-static int nvme_map_data(struct nvme_dev *dev, struct request *req,
+static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
                struct nvme_command *cmnd)
 {
        struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
        struct request_queue *q = req->q;
        enum dma_data_direction dma_dir = rq_data_dir(req) ?
                        DMA_TO_DEVICE : DMA_FROM_DEVICE;
-       int ret = BLK_MQ_RQ_QUEUE_ERROR;
+       blk_status_t ret = BLK_STS_IOERR;
 
        sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
        iod->nents = blk_rq_map_sg(q, req, iod->sg);
        if (!iod->nents)
                goto out;
 
-       ret = BLK_MQ_RQ_QUEUE_BUSY;
+       ret = BLK_STS_RESOURCE;
        if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
                                DMA_ATTR_NO_WARN))
                goto out;
@@ -638,7 +621,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
        if (!nvme_setup_prps(dev, req))
                goto out_unmap;
 
-       ret = BLK_MQ_RQ_QUEUE_ERROR;
+       ret = BLK_STS_IOERR;
        if (blk_integrity_rq(req)) {
                if (blk_rq_count_integrity_sg(q, req->bio) != 1)
                        goto out_unmap;
@@ -658,7 +641,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
        cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma);
        if (blk_integrity_rq(req))
                cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
-       return BLK_MQ_RQ_QUEUE_OK;
+       return BLK_STS_OK;
 
 out_unmap:
        dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
@@ -688,7 +671,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 /*
  * NOTE: ns is NULL when called on the admin queue.
  */
-static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
                         const struct blk_mq_queue_data *bd)
 {
        struct nvme_ns *ns = hctx->queue->queuedata;
@@ -696,47 +679,34 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
        struct nvme_dev *dev = nvmeq->dev;
        struct request *req = bd->rq;
        struct nvme_command cmnd;
-       int ret = BLK_MQ_RQ_QUEUE_OK;
-
-       /*
-        * If formated with metadata, require the block layer provide a buffer
-        * unless this namespace is formated such that the metadata can be
-        * stripped/generated by the controller with PRACT=1.
-        */
-       if (ns && ns->ms && !blk_integrity_rq(req)) {
-               if (!(ns->pi_type && ns->ms == 8) &&
-                   !blk_rq_is_passthrough(req)) {
-                       blk_mq_end_request(req, -EFAULT);
-                       return BLK_MQ_RQ_QUEUE_OK;
-               }
-       }
+       blk_status_t ret;
 
        ret = nvme_setup_cmd(ns, req, &cmnd);
-       if (ret != BLK_MQ_RQ_QUEUE_OK)
+       if (ret)
                return ret;
 
        ret = nvme_init_iod(req, dev);
-       if (ret != BLK_MQ_RQ_QUEUE_OK)
+       if (ret)
                goto out_free_cmd;
 
-       if (blk_rq_nr_phys_segments(req))
+       if (blk_rq_nr_phys_segments(req)) {
                ret = nvme_map_data(dev, req, &cmnd);
-
-       if (ret != BLK_MQ_RQ_QUEUE_OK)
-               goto out_cleanup_iod;
+               if (ret)
+                       goto out_cleanup_iod;
+       }
 
        blk_mq_start_request(req);
 
        spin_lock_irq(&nvmeq->q_lock);
        if (unlikely(nvmeq->cq_vector < 0)) {
-               ret = BLK_MQ_RQ_QUEUE_ERROR;
+               ret = BLK_STS_IOERR;
                spin_unlock_irq(&nvmeq->q_lock);
                goto out_cleanup_iod;
        }
        __nvme_submit_cmd(nvmeq, &cmnd);
        nvme_process_cq(nvmeq);
        spin_unlock_irq(&nvmeq->q_lock);
-       return BLK_MQ_RQ_QUEUE_OK;
+       return BLK_STS_OK;
 out_cleanup_iod:
        nvme_free_iod(dev, req);
 out_free_cmd:
@@ -759,65 +729,75 @@ static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
        return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
 }
 
-static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
+static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
 {
-       u16 head, phase;
-
-       head = nvmeq->cq_head;
-       phase = nvmeq->cq_phase;
-
-       while (nvme_cqe_valid(nvmeq, head, phase)) {
-               struct nvme_completion cqe = nvmeq->cqes[head];
-               struct request *req;
-
-               if (++head == nvmeq->q_depth) {
-                       head = 0;
-                       phase = !phase;
-               }
-
-               if (tag && *tag == cqe.command_id)
-                       *tag = -1;
+       u16 head = nvmeq->cq_head;
 
-               if (unlikely(cqe.command_id >= nvmeq->q_depth)) {
-                       dev_warn(nvmeq->dev->ctrl.device,
-                               "invalid id %d completed on queue %d\n",
-                               cqe.command_id, le16_to_cpu(cqe.sq_id));
-                       continue;
-               }
+       if (likely(nvmeq->cq_vector >= 0)) {
+               if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
+                                                     nvmeq->dbbuf_cq_ei))
+                       writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
+       }
+}
 
-               /*
-                * AEN requests are special as they don't time out and can
-                * survive any kind of queue freeze and often don't respond to
-                * aborts.  We don't even bother to allocate a struct request
-                * for them but rather special case them here.
-                */
-               if (unlikely(nvmeq->qid == 0 &&
-                               cqe.command_id >= NVME_AQ_BLKMQ_DEPTH)) {
-                       nvme_complete_async_event(&nvmeq->dev->ctrl,
-                                       cqe.status, &cqe.result);
-                       continue;
-               }
+static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
+               struct nvme_completion *cqe)
+{
+       struct request *req;
 
-               req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
-               nvme_end_request(req, cqe.status, cqe.result);
+       if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
+               dev_warn(nvmeq->dev->ctrl.device,
+                       "invalid id %d completed on queue %d\n",
+                       cqe->command_id, le16_to_cpu(cqe->sq_id));
+               return;
        }
 
-       if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
+       /*
+        * AEN requests are special as they don't time out and can
+        * survive any kind of queue freeze and often don't respond to
+        * aborts.  We don't even bother to allocate a struct request
+        * for them but rather special case them here.
+        */
+       if (unlikely(nvmeq->qid == 0 &&
+                       cqe->command_id >= NVME_AQ_BLKMQ_DEPTH)) {
+               nvme_complete_async_event(&nvmeq->dev->ctrl,
+                               cqe->status, &cqe->result);
                return;
+       }
 
-       if (likely(nvmeq->cq_vector >= 0))
-               if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
-                                                     nvmeq->dbbuf_cq_ei))
-                       writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
-       nvmeq->cq_head = head;
-       nvmeq->cq_phase = phase;
+       req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
+       nvme_end_request(req, cqe->status, cqe->result);
+}
 
-       nvmeq->cqe_seen = 1;
+static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
+               struct nvme_completion *cqe)
+{
+       if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
+               *cqe = nvmeq->cqes[nvmeq->cq_head];
+
+               if (++nvmeq->cq_head == nvmeq->q_depth) {
+                       nvmeq->cq_head = 0;
+                       nvmeq->cq_phase = !nvmeq->cq_phase;
+               }
+               return true;
+       }
+       return false;
 }
 
 static void nvme_process_cq(struct nvme_queue *nvmeq)
 {
-       __nvme_process_cq(nvmeq, NULL);
+       struct nvme_completion cqe;
+       int consumed = 0;
+
+       while (nvme_read_cqe(nvmeq, &cqe)) {
+               nvme_handle_cqe(nvmeq, &cqe);
+               consumed++;
+       }
+
+       if (consumed) {
+               nvme_ring_cq_doorbell(nvmeq);
+               nvmeq->cqe_seen = 1;
+       }
 }
 
 static irqreturn_t nvme_irq(int irq, void *data)
@@ -842,16 +822,28 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
 
 static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
 {
-       if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
-               spin_lock_irq(&nvmeq->q_lock);
-               __nvme_process_cq(nvmeq, &tag);
-               spin_unlock_irq(&nvmeq->q_lock);
+       struct nvme_completion cqe;
+       int found = 0, consumed = 0;
 
-               if (tag == -1)
-                       return 1;
-       }
+       if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+               return 0;
 
-       return 0;
+       spin_lock_irq(&nvmeq->q_lock);
+       while (nvme_read_cqe(nvmeq, &cqe)) {
+               nvme_handle_cqe(nvmeq, &cqe);
+               consumed++;
+
+               if (tag == cqe.command_id) {
+                       found = 1;
+                       break;
+               }
+       }
+
+       if (consumed)
+               nvme_ring_cq_doorbell(nvmeq);
+       spin_unlock_irq(&nvmeq->q_lock);
+
+       return found;
 }
 
 static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
@@ -939,7 +931,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
        return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
 }
 
-static void abort_endio(struct request *req, int error)
+static void abort_endio(struct request *req, blk_status_t error)
 {
        struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
        struct nvme_queue *nvmeq = iod->nvmeq;
@@ -950,6 +942,51 @@ static void abort_endio(struct request *req, int error)
        blk_mq_free_request(req);
 }
 
+static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
+{
+
+       /* If true, indicates loss of adapter communication, possibly by a
+        * NVMe Subsystem reset.
+        */
+       bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
+
+       /* If there is a reset ongoing, we shouldn't reset again. */
+       if (dev->ctrl.state == NVME_CTRL_RESETTING)
+               return false;
+
+       /* We shouldn't reset unless the controller is on fatal error state
+        * _or_ if we lost the communication with it.
+        */
+       if (!(csts & NVME_CSTS_CFS) && !nssro)
+               return false;
+
+       /* If PCI error recovery process is happening, we cannot reset or
+        * the recovery mechanism will surely fail.
+        */
+       if (pci_channel_offline(to_pci_dev(dev->dev)))
+               return false;
+
+       return true;
+}
+
+static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
+{
+       /* Read a config register to help see what died. */
+       u16 pci_status;
+       int result;
+
+       result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
+                                     &pci_status);
+       if (result == PCIBIOS_SUCCESSFUL)
+               dev_warn(dev->ctrl.device,
+                        "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
+                        csts, pci_status);
+       else
+               dev_warn(dev->ctrl.device,
+                        "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
+                        csts, result);
+}
+
 static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 {
        struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -957,6 +994,17 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
        struct nvme_dev *dev = nvmeq->dev;
        struct request *abort_req;
        struct nvme_command cmd;
+       u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+       /*
+        * Reset immediately if the controller is failed
+        */
+       if (nvme_should_reset(dev, csts)) {
+               nvme_warn_reset(dev, csts);
+               nvme_dev_disable(dev, false);
+               nvme_reset_ctrl(&dev->ctrl);
+               return BLK_EH_HANDLED;
+       }
 
        /*
         * Did we miss an interrupt?
@@ -993,7 +1041,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
                         "I/O %d QID %d timeout, reset controller\n",
                         req->tag, nvmeq->qid);
                nvme_dev_disable(dev, false);
-               nvme_reset(dev);
+               nvme_reset_ctrl(&dev->ctrl);
 
                /*
                 * Mark the request as handled, since the inline shutdown
@@ -1247,7 +1295,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
        .complete       = nvme_pci_complete_rq,
        .init_hctx      = nvme_admin_init_hctx,
        .exit_hctx      = nvme_admin_exit_hctx,
-       .init_request   = nvme_admin_init_request,
+       .init_request   = nvme_init_request,
        .timeout        = nvme_timeout,
 };
 
@@ -1311,6 +1359,32 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
        return 0;
 }
 
+static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+{
+       return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride);
+}
+
+static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
+{
+       struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+       if (size <= dev->bar_mapped_size)
+               return 0;
+       if (size > pci_resource_len(pdev, 0))
+               return -ENOMEM;
+       if (dev->bar)
+               iounmap(dev->bar);
+       dev->bar = ioremap(pci_resource_start(pdev, 0), size);
+       if (!dev->bar) {
+               dev->bar_mapped_size = 0;
+               return -ENOMEM;
+       }
+       dev->bar_mapped_size = size;
+       dev->dbs = dev->bar + NVME_REG_DBS;
+
+       return 0;
+}
+
 static int nvme_configure_admin_queue(struct nvme_dev *dev)
 {
        int result;
@@ -1318,6 +1392,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
        u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
        struct nvme_queue *nvmeq;
 
+       result = nvme_remap_bar(dev, db_bar_size(dev, 0));
+       if (result < 0)
+               return result;
+
        dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
                                                NVME_CAP_NSSRC(cap) : 0;
 
@@ -1358,66 +1436,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
        return result;
 }
 
-static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
-{
-
-       /* If true, indicates loss of adapter communication, possibly by a
-        * NVMe Subsystem reset.
-        */
-       bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
-
-       /* If there is a reset ongoing, we shouldn't reset again. */
-       if (dev->ctrl.state == NVME_CTRL_RESETTING)
-               return false;
-
-       /* We shouldn't reset unless the controller is on fatal error state
-        * _or_ if we lost the communication with it.
-        */
-       if (!(csts & NVME_CSTS_CFS) && !nssro)
-               return false;
-
-       /* If PCI error recovery process is happening, we cannot reset or
-        * the recovery mechanism will surely fail.
-        */
-       if (pci_channel_offline(to_pci_dev(dev->dev)))
-               return false;
-
-       return true;
-}
-
-static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
-{
-       /* Read a config register to help see what died. */
-       u16 pci_status;
-       int result;
-
-       result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
-                                     &pci_status);
-       if (result == PCIBIOS_SUCCESSFUL)
-               dev_warn(dev->ctrl.device,
-                        "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
-                        csts, pci_status);
-       else
-               dev_warn(dev->ctrl.device,
-                        "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
-                        csts, result);
-}
-
-static void nvme_watchdog_timer(unsigned long data)
-{
-       struct nvme_dev *dev = (struct nvme_dev *)data;
-       u32 csts = readl(dev->bar + NVME_REG_CSTS);
-
-       /* Skip controllers under certain specific conditions. */
-       if (nvme_should_reset(dev, csts)) {
-               if (!nvme_reset(dev))
-                       nvme_warn_reset(dev, csts);
-               return;
-       }
-
-       mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-}
-
 static int nvme_create_io_queues(struct nvme_dev *dev)
 {
        unsigned i, max;
@@ -1514,16 +1532,168 @@ static inline void nvme_release_cmb(struct nvme_dev *dev)
        }
 }
 
-static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
+{
+       size_t len = dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs);
+       struct nvme_command c;
+       u64 dma_addr;
+       int ret;
+
+       dma_addr = dma_map_single(dev->dev, dev->host_mem_descs, len,
+                       DMA_TO_DEVICE);
+       if (dma_mapping_error(dev->dev, dma_addr))
+               return -ENOMEM;
+
+       memset(&c, 0, sizeof(c));
+       c.features.opcode       = nvme_admin_set_features;
+       c.features.fid          = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
+       c.features.dword11      = cpu_to_le32(bits);
+       c.features.dword12      = cpu_to_le32(dev->host_mem_size >>
+                                             ilog2(dev->ctrl.page_size));
+       c.features.dword13      = cpu_to_le32(lower_32_bits(dma_addr));
+       c.features.dword14      = cpu_to_le32(upper_32_bits(dma_addr));
+       c.features.dword15      = cpu_to_le32(dev->nr_host_mem_descs);
+
+       ret = nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
+       if (ret) {
+               dev_warn(dev->ctrl.device,
+                        "failed to set host mem (err %d, flags %#x).\n",
+                        ret, bits);
+       }
+       dma_unmap_single(dev->dev, dma_addr, len, DMA_TO_DEVICE);
+       return ret;
+}
+
+static void nvme_free_host_mem(struct nvme_dev *dev)
+{
+       int i;
+
+       for (i = 0; i < dev->nr_host_mem_descs; i++) {
+               struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i];
+               size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size;
+
+               dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i],
+                               le64_to_cpu(desc->addr));
+       }
+
+       kfree(dev->host_mem_desc_bufs);
+       dev->host_mem_desc_bufs = NULL;
+       kfree(dev->host_mem_descs);
+       dev->host_mem_descs = NULL;
+}
+
+static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
 {
-       return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
+       struct nvme_host_mem_buf_desc *descs;
+       u32 chunk_size, max_entries, i = 0;
+       void **bufs;
+       u64 size, tmp;
+
+       /* start big and work our way down */
+       chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
+retry:
+       tmp = (preferred + chunk_size - 1);
+       do_div(tmp, chunk_size);
+       max_entries = tmp;
+       descs = kcalloc(max_entries, sizeof(*descs), GFP_KERNEL);
+       if (!descs)
+               goto out;
+
+       bufs = kcalloc(max_entries, sizeof(*bufs), GFP_KERNEL);
+       if (!bufs)
+               goto out_free_descs;
+
+       for (size = 0; size < preferred; size += chunk_size) {
+               u32 len = min_t(u64, chunk_size, preferred - size);
+               dma_addr_t dma_addr;
+
+               bufs[i] = dma_alloc_attrs(dev->dev, len, &dma_addr, GFP_KERNEL,
+                               DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
+               if (!bufs[i])
+                       break;
+
+               descs[i].addr = cpu_to_le64(dma_addr);
+               descs[i].size = cpu_to_le32(len / dev->ctrl.page_size);
+               i++;
+       }
+
+       if (!size || (min && size < min)) {
+               dev_warn(dev->ctrl.device,
+                       "failed to allocate host memory buffer.\n");
+               goto out_free_bufs;
+       }
+
+       dev_info(dev->ctrl.device,
+               "allocated %lld MiB host memory buffer.\n",
+               size >> ilog2(SZ_1M));
+       dev->nr_host_mem_descs = i;
+       dev->host_mem_size = size;
+       dev->host_mem_descs = descs;
+       dev->host_mem_desc_bufs = bufs;
+       return 0;
+
+out_free_bufs:
+       while (--i >= 0) {
+               size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size;
+
+               dma_free_coherent(dev->dev, size, bufs[i],
+                               le64_to_cpu(descs[i].addr));
+       }
+
+       kfree(bufs);
+out_free_descs:
+       kfree(descs);
+out:
+       /* try a smaller chunk size if we failed early */
+       if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) {
+               chunk_size /= 2;
+               goto retry;
+       }
+       dev->host_mem_descs = NULL;
+       return -ENOMEM;
+}
+
+static void nvme_setup_host_mem(struct nvme_dev *dev)
+{
+       u64 max = (u64)max_host_mem_size_mb * SZ_1M;
+       u64 preferred = (u64)dev->ctrl.hmpre * 4096;
+       u64 min = (u64)dev->ctrl.hmmin * 4096;
+       u32 enable_bits = NVME_HOST_MEM_ENABLE;
+
+       preferred = min(preferred, max);
+       if (min > max) {
+               dev_warn(dev->ctrl.device,
+                       "min host memory (%lld MiB) above limit (%d MiB).\n",
+                       min >> ilog2(SZ_1M), max_host_mem_size_mb);
+               nvme_free_host_mem(dev);
+               return;
+       }
+
+       /*
+        * If we already have a buffer allocated check if we can reuse it.
+        */
+       if (dev->host_mem_descs) {
+               if (dev->host_mem_size >= min)
+                       enable_bits |= NVME_HOST_MEM_RETURN;
+               else
+                       nvme_free_host_mem(dev);
+       }
+
+       if (!dev->host_mem_descs) {
+               if (nvme_alloc_host_mem(dev, min, preferred))
+                       return;
+       }
+
+       if (nvme_set_host_mem(dev, enable_bits))
+               nvme_free_host_mem(dev);
 }
 
 static int nvme_setup_io_queues(struct nvme_dev *dev)
 {
        struct nvme_queue *adminq = dev->queues[0];
        struct pci_dev *pdev = to_pci_dev(dev->dev);
-       int result, nr_io_queues, size;
+       int result, nr_io_queues;
+       unsigned long size;
 
        nr_io_queues = num_online_cpus();
        result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
@@ -1542,20 +1712,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
                        nvme_release_cmb(dev);
        }
 
-       size = db_bar_size(dev, nr_io_queues);
-       if (size > 8192) {
-               iounmap(dev->bar);
-               do {
-                       dev->bar = ioremap(pci_resource_start(pdev, 0), size);
-                       if (dev->bar)
-                               break;
-                       if (!--nr_io_queues)
-                               return -ENOMEM;
-                       size = db_bar_size(dev, nr_io_queues);
-               } while (1);
-               dev->dbs = dev->bar + 4096;
-               adminq->q_db = dev->dbs;
-       }
+       do {
+               size = db_bar_size(dev, nr_io_queues);
+               result = nvme_remap_bar(dev, size);
+               if (!result)
+                       break;
+               if (!--nr_io_queues)
+                       return -ENOMEM;
+       } while (1);
+       adminq->q_db = dev->dbs;
 
        /* Deregister the admin queue's interrupt */
        pci_free_irq(pdev, 0, adminq);
@@ -1586,7 +1751,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
        return nvme_create_io_queues(dev);
 }
 
-static void nvme_del_queue_end(struct request *req, int error)
+static void nvme_del_queue_end(struct request *req, blk_status_t error)
 {
        struct nvme_queue *nvmeq = req->end_io_data;
 
@@ -1594,7 +1759,7 @@ static void nvme_del_queue_end(struct request *req, int error)
        complete(&nvmeq->dev->ioq_wait);
 }
 
-static void nvme_del_cq_end(struct request *req, int error)
+static void nvme_del_cq_end(struct request *req, blk_status_t error)
 {
        struct nvme_queue *nvmeq = req->end_io_data;
 
@@ -1799,13 +1964,12 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
        bool dead = true;
        struct pci_dev *pdev = to_pci_dev(dev->dev);
 
-       del_timer_sync(&dev->watchdog_timer);
-
        mutex_lock(&dev->shutdown_lock);
        if (pci_is_enabled(pdev)) {
                u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
-               if (dev->ctrl.state == NVME_CTRL_LIVE)
+               if (dev->ctrl.state == NVME_CTRL_LIVE ||
+                   dev->ctrl.state == NVME_CTRL_RESETTING)
                        nvme_start_freeze(&dev->ctrl);
                dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
                        pdev->error_state  != pci_channel_io_normal);
@@ -1815,8 +1979,20 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
         * Give the controller a chance to complete all entered requests if
         * doing a safe shutdown.
         */
-       if (!dead && shutdown)
-               nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+       if (!dead) {
+               if (shutdown)
+                       nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+
+               /*
+                * If the controller is still alive tell it to stop using the
+                * host memory buffer.  In theory the shutdown / reset should
+                * make sure that it doesn't access the host memoery anymore,
+                * but I'd rather be safe than sorry..
+                */
+               if (dev->host_mem_descs)
+                       nvme_set_host_mem(dev, 0);
+
+       }
        nvme_stop_queues(&dev->ctrl);
 
        queues = dev->online_queues - 1;
@@ -1899,7 +2075,8 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
 
 static void nvme_reset_work(struct work_struct *work)
 {
-       struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
+       struct nvme_dev *dev =
+               container_of(work, struct nvme_dev, ctrl.reset_work);
        bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
        int result = -ENODEV;
 
@@ -1948,6 +2125,9 @@ static void nvme_reset_work(struct work_struct *work)
                                 "unable to allocate dma for dbbuf\n");
        }
 
+       if (dev->ctrl.hmpre)
+               nvme_setup_host_mem(dev);
+
        result = nvme_setup_io_queues(dev);
        if (result)
                goto out;
@@ -1961,8 +2141,6 @@ static void nvme_reset_work(struct work_struct *work)
        if (dev->online_queues > 1)
                nvme_queue_async_events(&dev->ctrl);
 
-       mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-
        /*
         * Keep the controller around but remove all namespaces if we don't have
         * any working I/O queue.
@@ -2002,17 +2180,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
        nvme_put_ctrl(&dev->ctrl);
 }
 
-static int nvme_reset(struct nvme_dev *dev)
-{
-       if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
-               return -ENODEV;
-       if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
-               return -EBUSY;
-       if (!queue_work(nvme_workq, &dev->reset_work))
-               return -EBUSY;
-       return 0;
-}
-
 static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
 {
        *val = readl(to_nvme_dev(ctrl)->bar + off);
@@ -2031,16 +2198,6 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
        return 0;
 }
 
-static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
-{
-       struct nvme_dev *dev = to_nvme_dev(ctrl);
-       int ret = nvme_reset(dev);
-
-       if (!ret)
-               flush_work(&dev->reset_work);
-       return ret;
-}
-
 static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
        .name                   = "pcie",
        .module                 = THIS_MODULE,
@@ -2048,7 +2205,6 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
        .reg_read32             = nvme_pci_reg_read32,
        .reg_write32            = nvme_pci_reg_write32,
        .reg_read64             = nvme_pci_reg_read64,
-       .reset_ctrl             = nvme_pci_reset_ctrl,
        .free_ctrl              = nvme_pci_free_ctrl,
        .submit_async_event     = nvme_pci_submit_async_event,
 };
@@ -2060,8 +2216,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
        if (pci_request_mem_regions(pdev, "nvme"))
                return -ENODEV;
 
-       dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
-       if (!dev->bar)
+       if (nvme_remap_bar(dev, NVME_REG_DBS + 4096))
                goto release;
 
        return 0;
@@ -2115,10 +2270,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (result)
                goto free;
 
-       INIT_WORK(&dev->reset_work, nvme_reset_work);
+       INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
        INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
-       setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
-               (unsigned long)dev);
        mutex_init(&dev->shutdown_lock);
        init_completion(&dev->ioq_wait);
 
@@ -2136,7 +2289,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
        dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
 
-       queue_work(nvme_workq, &dev->reset_work);
+       queue_work(nvme_wq, &dev->ctrl.reset_work);
        return 0;
 
  release_pools:
@@ -2157,7 +2310,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
        if (prepare)
                nvme_dev_disable(dev, false);
        else
-               nvme_reset(dev);
+               nvme_reset_ctrl(&dev->ctrl);
 }
 
 static void nvme_shutdown(struct pci_dev *pdev)
@@ -2177,7 +2330,7 @@ static void nvme_remove(struct pci_dev *pdev)
 
        nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
 
-       cancel_work_sync(&dev->reset_work);
+       cancel_work_sync(&dev->ctrl.reset_work);
        pci_set_drvdata(pdev, NULL);
 
        if (!pci_device_is_present(pdev)) {
@@ -2185,9 +2338,10 @@ static void nvme_remove(struct pci_dev *pdev)
                nvme_dev_disable(dev, false);
        }
 
-       flush_work(&dev->reset_work);
+       flush_work(&dev->ctrl.reset_work);
        nvme_uninit_ctrl(&dev->ctrl);
        nvme_dev_disable(dev, true);
+       nvme_free_host_mem(dev);
        nvme_dev_remove_admin(dev);
        nvme_free_queues(dev, 0);
        nvme_release_prp_pools(dev);
@@ -2228,7 +2382,7 @@ static int nvme_resume(struct device *dev)
        struct pci_dev *pdev = to_pci_dev(dev);
        struct nvme_dev *ndev = pci_get_drvdata(pdev);
 
-       nvme_reset(ndev);
+       nvme_reset_ctrl(&ndev->ctrl);
        return 0;
 }
 #endif
@@ -2267,7 +2421,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
 
        dev_info(dev->ctrl.device, "restart after slot reset\n");
        pci_restore_state(pdev);
-       nvme_reset(dev);
+       nvme_reset_ctrl(&dev->ctrl);
        return PCI_ERS_RESULT_RECOVERED;
 }
 
@@ -2323,22 +2477,12 @@ static struct pci_driver nvme_driver = {
 
 static int __init nvme_init(void)
 {
-       int result;
-
-       nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
-       if (!nvme_workq)
-               return -ENOMEM;
-
-       result = pci_register_driver(&nvme_driver);
-       if (result)
-               destroy_workqueue(nvme_workq);
-       return result;
+       return pci_register_driver(&nvme_driver);
 }
 
 static void __exit nvme_exit(void)
 {
        pci_unregister_driver(&nvme_driver);
-       destroy_workqueue(nvme_workq);
        _nvme_check_size();
 }
 
index 24397d3..6d4119d 100644 (file)
@@ -48,7 +48,7 @@
  */
 #define NVME_RDMA_NR_AEN_COMMANDS      1
 #define NVME_RDMA_AQ_BLKMQ_DEPTH       \
-       (NVMF_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
+       (NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
 
 struct nvme_rdma_device {
        struct ib_device       *dev;
@@ -80,10 +80,8 @@ struct nvme_rdma_request {
 };
 
 enum nvme_rdma_queue_flags {
-       NVME_RDMA_Q_CONNECTED = (1 << 0),
-       NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1),
-       NVME_RDMA_Q_DELETING = (1 << 2),
-       NVME_RDMA_Q_LIVE = (1 << 3),
+       NVME_RDMA_Q_LIVE                = 0,
+       NVME_RDMA_Q_DELETING            = 1,
 };
 
 struct nvme_rdma_queue {
@@ -103,9 +101,6 @@ struct nvme_rdma_queue {
 };
 
 struct nvme_rdma_ctrl {
-       /* read and written in the hot path */
-       spinlock_t              lock;
-
        /* read only in the hot path */
        struct nvme_rdma_queue  *queues;
        u32                     queue_count;
@@ -113,7 +108,6 @@ struct nvme_rdma_ctrl {
        /* other member variables */
        struct blk_mq_tag_set   tag_set;
        struct work_struct      delete_work;
-       struct work_struct      reset_work;
        struct work_struct      err_work;
 
        struct nvme_rdma_qe     async_event_sqe;
@@ -145,8 +139,6 @@ static DEFINE_MUTEX(device_list_mutex);
 static LIST_HEAD(nvme_rdma_ctrl_list);
 static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
 
-static struct workqueue_struct *nvme_rdma_wq;
-
 /*
  * Disabling this option makes small I/O goes faster, but is fundamentally
  * unsafe.  With it turned off we will have to register a global rkey that
@@ -301,10 +293,12 @@ out:
        return ret;
 }
 
-static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl,
-               struct request *rq, unsigned int queue_idx)
+static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
+               struct request *rq, unsigned int hctx_idx)
 {
+       struct nvme_rdma_ctrl *ctrl = set->driver_data;
        struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+       int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
        struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
        struct nvme_rdma_device *dev = queue->device;
 
@@ -315,22 +309,13 @@ static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl,
                        DMA_TO_DEVICE);
 }
 
-static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
-               struct request *rq, unsigned int hctx_idx)
-{
-       return __nvme_rdma_exit_request(set->driver_data, rq, hctx_idx + 1);
-}
-
-static void nvme_rdma_exit_admin_request(struct blk_mq_tag_set *set,
-               struct request *rq, unsigned int hctx_idx)
-{
-       return __nvme_rdma_exit_request(set->driver_data, rq, 0);
-}
-
-static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl,
-               struct request *rq, unsigned int queue_idx)
+static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
+               struct request *rq, unsigned int hctx_idx,
+               unsigned int numa_node)
 {
+       struct nvme_rdma_ctrl *ctrl = set->driver_data;
        struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+       int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
        struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
        struct nvme_rdma_device *dev = queue->device;
        struct ib_device *ibdev = dev->dev;
@@ -358,20 +343,6 @@ out_free_qe:
        return -ENOMEM;
 }
 
-static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
-               struct request *rq, unsigned int hctx_idx,
-               unsigned int numa_node)
-{
-       return __nvme_rdma_init_request(set->driver_data, rq, hctx_idx + 1);
-}
-
-static int nvme_rdma_init_admin_request(struct blk_mq_tag_set *set,
-               struct request *rq, unsigned int hctx_idx,
-               unsigned int numa_node)
-{
-       return __nvme_rdma_init_request(set->driver_data, rq, 0);
-}
-
 static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
                unsigned int hctx_idx)
 {
@@ -469,9 +440,6 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
        struct nvme_rdma_device *dev;
        struct ib_device *ibdev;
 
-       if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags))
-               return;
-
        dev = queue->device;
        ibdev = dev->dev;
        rdma_destroy_qp(queue->cm_id);
@@ -483,17 +451,21 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
        nvme_rdma_dev_put(dev);
 }
 
-static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
-               struct nvme_rdma_device *dev)
+static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
 {
-       struct ib_device *ibdev = dev->dev;
+       struct ib_device *ibdev;
        const int send_wr_factor = 3;                   /* MR, SEND, INV */
        const int cq_factor = send_wr_factor + 1;       /* + RECV */
        int comp_vector, idx = nvme_rdma_queue_idx(queue);
-
        int ret;
 
-       queue->device = dev;
+       queue->device = nvme_rdma_find_get_device(queue->cm_id);
+       if (!queue->device) {
+               dev_err(queue->cm_id->device->dev.parent,
+                       "no client data found!\n");
+               return -ECONNREFUSED;
+       }
+       ibdev = queue->device->dev;
 
        /*
         * The admin queue is barely used once the controller is live, so don't
@@ -506,12 +478,12 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
 
 
        /* +1 for ib_stop_cq */
-       queue->ib_cq = ib_alloc_cq(dev->dev, queue,
-                               cq_factor * queue->queue_size + 1, comp_vector,
-                               IB_POLL_SOFTIRQ);
+       queue->ib_cq = ib_alloc_cq(ibdev, queue,
+                               cq_factor * queue->queue_size + 1,
+                               comp_vector, IB_POLL_SOFTIRQ);
        if (IS_ERR(queue->ib_cq)) {
                ret = PTR_ERR(queue->ib_cq);
-               goto out;
+               goto out_put_dev;
        }
 
        ret = nvme_rdma_create_qp(queue, send_wr_factor);
@@ -524,7 +496,6 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
                ret = -ENOMEM;
                goto out_destroy_qp;
        }
-       set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags);
 
        return 0;
 
@@ -532,7 +503,8 @@ out_destroy_qp:
        ib_destroy_qp(queue->qp);
 out_destroy_ib_cq:
        ib_free_cq(queue->ib_cq);
-out:
+out_put_dev:
+       nvme_rdma_dev_put(queue->device);
        return ret;
 }
 
@@ -583,12 +555,10 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
        }
 
        clear_bit(NVME_RDMA_Q_DELETING, &queue->flags);
-       set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags);
 
        return 0;
 
 out_destroy_cm_id:
-       nvme_rdma_destroy_queue_ib(queue);
        rdma_destroy_id(queue->cm_id);
        return ret;
 }
@@ -718,11 +688,11 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
        if (nvmf_should_reconnect(&ctrl->ctrl)) {
                dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
                        ctrl->ctrl.opts->reconnect_delay);
-               queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
+               queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
                                ctrl->ctrl.opts->reconnect_delay * HZ);
        } else {
                dev_info(ctrl->ctrl.device, "Removing controller...\n");
-               queue_work(nvme_rdma_wq, &ctrl->delete_work);
+               queue_work(nvme_wq, &ctrl->delete_work);
        }
 }
 
@@ -733,7 +703,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
        bool changed;
        int ret;
 
-       ++ctrl->ctrl.opts->nr_reconnects;
+       ++ctrl->ctrl.nr_reconnects;
 
        if (ctrl->queue_count > 1) {
                nvme_rdma_free_io_queues(ctrl);
@@ -749,7 +719,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
        if (ret)
                goto requeue;
 
-       ret = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH);
+       ret = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
        if (ret)
                goto requeue;
 
@@ -777,7 +747,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
        changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
        WARN_ON_ONCE(!changed);
-       ctrl->ctrl.opts->nr_reconnects = 0;
+       ctrl->ctrl.nr_reconnects = 0;
 
        if (ctrl->queue_count > 1) {
                nvme_queue_scan(&ctrl->ctrl);
@@ -790,7 +760,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
 requeue:
        dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
-                       ctrl->ctrl.opts->nr_reconnects);
+                       ctrl->ctrl.nr_reconnects);
        nvme_rdma_reconnect_or_remove(ctrl);
 }
 
@@ -802,10 +772,8 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 
        nvme_stop_keep_alive(&ctrl->ctrl);
 
-       for (i = 0; i < ctrl->queue_count; i++) {
-               clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags);
+       for (i = 0; i < ctrl->queue_count; i++)
                clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
-       }
 
        if (ctrl->queue_count > 1)
                nvme_stop_queues(&ctrl->ctrl);
@@ -833,7 +801,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
        if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING))
                return;
 
-       queue_work(nvme_rdma_wq, &ctrl->err_work);
+       queue_work(nvme_wq, &ctrl->err_work);
 }
 
 static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
@@ -1278,21 +1246,11 @@ static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
 
 static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
 {
-       struct nvme_rdma_device *dev;
        int ret;
 
-       dev = nvme_rdma_find_get_device(queue->cm_id);
-       if (!dev) {
-               dev_err(queue->cm_id->device->dev.parent,
-                       "no client data found!\n");
-               return -ECONNREFUSED;
-       }
-
-       ret = nvme_rdma_create_queue_ib(queue, dev);
-       if (ret) {
-               nvme_rdma_dev_put(dev);
-               goto out;
-       }
+       ret = nvme_rdma_create_queue_ib(queue);
+       if (ret)
+               return ret;
 
        ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CONNECT_TIMEOUT_MS);
        if (ret) {
@@ -1306,7 +1264,6 @@ static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
 
 out_destroy_queue:
        nvme_rdma_destroy_queue_ib(queue);
-out:
        return ret;
 }
 
@@ -1334,8 +1291,8 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
         * specified by the Fabrics standard.
         */
        if (priv.qid == 0) {
-               priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH);
-               priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+               priv.hrqsize = cpu_to_le16(NVME_AQ_DEPTH);
+               priv.hsqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
        } else {
                /*
                 * current interpretation of the fabrics spec
@@ -1383,12 +1340,14 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
                complete(&queue->cm_done);
                return 0;
        case RDMA_CM_EVENT_REJECTED:
+               nvme_rdma_destroy_queue_ib(queue);
                cm_error = nvme_rdma_conn_rejected(queue, ev);
                break;
-       case RDMA_CM_EVENT_ADDR_ERROR:
        case RDMA_CM_EVENT_ROUTE_ERROR:
        case RDMA_CM_EVENT_CONNECT_ERROR:
        case RDMA_CM_EVENT_UNREACHABLE:
+               nvme_rdma_destroy_queue_ib(queue);
+       case RDMA_CM_EVENT_ADDR_ERROR:
                dev_dbg(queue->ctrl->ctrl.device,
                        "CM error event %d\n", ev->event);
                cm_error = -ECONNRESET;
@@ -1435,8 +1394,8 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
 /*
  * We cannot accept any other command until the Connect command has completed.
  */
-static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
-               struct request *rq)
+static inline blk_status_t
+nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq)
 {
        if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
                struct nvme_command *cmd = nvme_req(rq)->cmd;
@@ -1452,16 +1411,15 @@ static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
                         * failover.
                         */
                        if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
-                               return -EIO;
-                       else
-                               return -EAGAIN;
+                               return BLK_STS_IOERR;
+                       return BLK_STS_RESOURCE; /* try again later */
                }
        }
 
        return 0;
 }
 
-static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
                const struct blk_mq_queue_data *bd)
 {
        struct nvme_ns *ns = hctx->queue->queuedata;
@@ -1472,28 +1430,29 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
        struct nvme_command *c = sqe->data;
        bool flush = false;
        struct ib_device *dev;
-       int ret;
+       blk_status_t ret;
+       int err;
 
        WARN_ON_ONCE(rq->tag < 0);
 
        ret = nvme_rdma_queue_is_ready(queue, rq);
        if (unlikely(ret))
-               goto err;
+               return ret;
 
        dev = queue->device->dev;
        ib_dma_sync_single_for_cpu(dev, sqe->dma,
                        sizeof(struct nvme_command), DMA_TO_DEVICE);
 
        ret = nvme_setup_cmd(ns, rq, c);
-       if (ret != BLK_MQ_RQ_QUEUE_OK)
+       if (ret)
                return ret;
 
        blk_mq_start_request(rq);
 
-       ret = nvme_rdma_map_data(queue, rq, c);
-       if (ret < 0) {
+       err = nvme_rdma_map_data(queue, rq, c);
+       if (err < 0) {
                dev_err(queue->ctrl->ctrl.device,
-                            "Failed to map data (%d)\n", ret);
+                            "Failed to map data (%d)\n", err);
                nvme_cleanup_cmd(rq);
                goto err;
        }
@@ -1503,17 +1462,18 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 
        if (req_op(rq) == REQ_OP_FLUSH)
                flush = true;
-       ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
+       err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
                        req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
-       if (ret) {
+       if (err) {
                nvme_rdma_unmap_data(queue, rq);
                goto err;
        }
 
-       return BLK_MQ_RQ_QUEUE_OK;
+       return BLK_STS_OK;
 err:
-       return (ret == -ENOMEM || ret == -EAGAIN) ?
-               BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+       if (err == -ENOMEM || err == -EAGAIN)
+               return BLK_STS_RESOURCE;
+       return BLK_STS_IOERR;
 }
 
 static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
@@ -1523,7 +1483,6 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
        struct ib_wc wc;
        int found = 0;
 
-       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
        while (ib_poll_cq(cq, 1, &wc) > 0) {
                struct ib_cqe *cqe = wc.wr_cqe;
 
@@ -1560,8 +1519,8 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = {
 static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
        .queue_rq       = nvme_rdma_queue_rq,
        .complete       = nvme_rdma_complete_rq,
-       .init_request   = nvme_rdma_init_admin_request,
-       .exit_request   = nvme_rdma_exit_admin_request,
+       .init_request   = nvme_rdma_init_request,
+       .exit_request   = nvme_rdma_exit_request,
        .reinit_request = nvme_rdma_reinit_request,
        .init_hctx      = nvme_rdma_init_admin_hctx,
        .timeout        = nvme_rdma_timeout,
@@ -1571,7 +1530,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 {
        int error;
 
-       error = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH);
+       error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
        if (error)
                return error;
 
@@ -1672,7 +1631,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
                nvme_rdma_free_io_queues(ctrl);
        }
 
-       if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags))
+       if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags))
                nvme_shutdown_ctrl(&ctrl->ctrl);
 
        blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@ -1709,7 +1668,7 @@ static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
        if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
                return -EBUSY;
 
-       if (!queue_work(nvme_rdma_wq, &ctrl->delete_work))
+       if (!queue_work(nvme_wq, &ctrl->delete_work))
                return -EBUSY;
 
        return 0;
@@ -1743,8 +1702,8 @@ static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
 
 static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 {
-       struct nvme_rdma_ctrl *ctrl = container_of(work,
-                                       struct nvme_rdma_ctrl, reset_work);
+       struct nvme_rdma_ctrl *ctrl =
+               container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
        int ret;
        bool changed;
 
@@ -1785,22 +1744,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 del_dead_ctrl:
        /* Deleting this dead controller... */
        dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
-       WARN_ON(!queue_work(nvme_rdma_wq, &ctrl->delete_work));
-}
-
-static int nvme_rdma_reset_ctrl(struct nvme_ctrl *nctrl)
-{
-       struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-
-       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
-               return -EBUSY;
-
-       if (!queue_work(nvme_rdma_wq, &ctrl->reset_work))
-               return -EBUSY;
-
-       flush_work(&ctrl->reset_work);
-
-       return 0;
+       WARN_ON(!queue_work(nvme_wq, &ctrl->delete_work));
 }
 
 static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
@@ -1810,11 +1754,9 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
        .reg_read32             = nvmf_reg_read32,
        .reg_read64             = nvmf_reg_read64,
        .reg_write32            = nvmf_reg_write32,
-       .reset_ctrl             = nvme_rdma_reset_ctrl,
        .free_ctrl              = nvme_rdma_free_ctrl,
        .submit_async_event     = nvme_rdma_submit_async_event,
        .delete_ctrl            = nvme_rdma_del_ctrl,
-       .get_subsysnqn          = nvmf_get_subsysnqn,
        .get_address            = nvmf_get_address,
 };
 
@@ -1919,8 +1861,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
                        nvme_rdma_reconnect_ctrl_work);
        INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
        INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
-       INIT_WORK(&ctrl->reset_work, nvme_rdma_reset_ctrl_work);
-       spin_lock_init(&ctrl->lock);
+       INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
 
        ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
        ctrl->ctrl.sqsize = opts->queue_size - 1;
@@ -1939,12 +1880,14 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
        /* sanity check icdoff */
        if (ctrl->ctrl.icdoff) {
                dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
+               ret = -EINVAL;
                goto out_remove_admin_queue;
        }
 
        /* sanity check keyed sgls */
        if (!(ctrl->ctrl.sgls & (1 << 20))) {
                dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not support\n");
+               ret = -EINVAL;
                goto out_remove_admin_queue;
        }
 
@@ -2033,7 +1976,7 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
        }
        mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-       flush_workqueue(nvme_rdma_wq);
+       flush_workqueue(nvme_wq);
 }
 
 static struct ib_client nvme_rdma_ib_client = {
@@ -2046,13 +1989,9 @@ static int __init nvme_rdma_init_module(void)
 {
        int ret;
 
-       nvme_rdma_wq = create_workqueue("nvme_rdma_wq");
-       if (!nvme_rdma_wq)
-               return -ENOMEM;
-
        ret = ib_register_client(&nvme_rdma_ib_client);
        if (ret)
-               goto err_destroy_wq;
+               return ret;
 
        ret = nvmf_register_transport(&nvme_rdma_transport);
        if (ret)
@@ -2062,8 +2001,6 @@ static int __init nvme_rdma_init_module(void)
 
 err_unreg_client:
        ib_unregister_client(&nvme_rdma_ib_client);
-err_destroy_wq:
-       destroy_workqueue(nvme_rdma_wq);
        return ret;
 }
 
@@ -2071,7 +2008,6 @@ static void __exit nvme_rdma_cleanup_module(void)
 {
        nvmf_unregister_transport(&nvme_rdma_transport);
        ib_unregister_client(&nvme_rdma_ib_client);
-       destroy_workqueue(nvme_rdma_wq);
 }
 
 module_init(nvme_rdma_init_module);
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
deleted file mode 100644 (file)
index 1f7671e..0000000
+++ /dev/null
@@ -1,2460 +0,0 @@
-/*
- * NVM Express device driver
- * Copyright (c) 2011-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-/*
- * Refer to the SCSI-NVMe Translation spec for details on how
- * each command is translated.
- */
-
-#include <linux/bio.h>
-#include <linux/bitops.h>
-#include <linux/blkdev.h>
-#include <linux/compat.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/idr.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/kdev_t.h>
-#include <linux/kthread.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/poison.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <asm/unaligned.h>
-#include <scsi/sg.h>
-#include <scsi/scsi.h>
-#include <scsi/scsi_request.h>
-
-#include "nvme.h"
-
-static int sg_version_num = 30534;     /* 2 digits for each component */
-
-/* VPD Page Codes */
-#define VPD_SUPPORTED_PAGES                            0x00
-#define VPD_SERIAL_NUMBER                              0x80
-#define VPD_DEVICE_IDENTIFIERS                         0x83
-#define VPD_EXTENDED_INQUIRY                           0x86
-#define VPD_BLOCK_LIMITS                               0xB0
-#define VPD_BLOCK_DEV_CHARACTERISTICS                  0xB1
-
-/* format unit paramter list offsets */
-#define FORMAT_UNIT_SHORT_PARM_LIST_LEN                        4
-#define FORMAT_UNIT_LONG_PARM_LIST_LEN                 8
-#define FORMAT_UNIT_PROT_INT_OFFSET                    3
-#define FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET            0
-#define FORMAT_UNIT_PROT_FIELD_USAGE_MASK              0x07
-
-/* Misc. defines */
-#define FIXED_SENSE_DATA                               0x70
-#define DESC_FORMAT_SENSE_DATA                         0x72
-#define FIXED_SENSE_DATA_ADD_LENGTH                    10
-#define LUN_ENTRY_SIZE                                 8
-#define LUN_DATA_HEADER_SIZE                           8
-#define ALL_LUNS_RETURNED                              0x02
-#define ALL_WELL_KNOWN_LUNS_RETURNED                   0x01
-#define RESTRICTED_LUNS_RETURNED                       0x00
-#define DOWNLOAD_SAVE_ACTIVATE                         0x05
-#define DOWNLOAD_SAVE_DEFER_ACTIVATE                   0x0E
-#define ACTIVATE_DEFERRED_MICROCODE                    0x0F
-#define FORMAT_UNIT_IMMED_MASK                         0x2
-#define FORMAT_UNIT_IMMED_OFFSET                       1
-#define KELVIN_TEMP_FACTOR                             273
-#define FIXED_FMT_SENSE_DATA_SIZE                      18
-#define DESC_FMT_SENSE_DATA_SIZE                       8
-
-/* SCSI/NVMe defines and bit masks */
-#define INQ_STANDARD_INQUIRY_PAGE                      0x00
-#define INQ_SUPPORTED_VPD_PAGES_PAGE                   0x00
-#define INQ_UNIT_SERIAL_NUMBER_PAGE                    0x80
-#define INQ_DEVICE_IDENTIFICATION_PAGE                 0x83
-#define INQ_EXTENDED_INQUIRY_DATA_PAGE                 0x86
-#define INQ_BDEV_LIMITS_PAGE                           0xB0
-#define INQ_BDEV_CHARACTERISTICS_PAGE                  0xB1
-#define INQ_SERIAL_NUMBER_LENGTH                       0x14
-#define INQ_NUM_SUPPORTED_VPD_PAGES                    6
-#define VERSION_SPC_4                                  0x06
-#define ACA_UNSUPPORTED                                        0
-#define STANDARD_INQUIRY_LENGTH                                36
-#define ADDITIONAL_STD_INQ_LENGTH                      31
-#define EXTENDED_INQUIRY_DATA_PAGE_LENGTH              0x3C
-#define RESERVED_FIELD                                 0
-
-/* Mode Sense/Select defines */
-#define MODE_PAGE_INFO_EXCEP                           0x1C
-#define MODE_PAGE_CACHING                              0x08
-#define MODE_PAGE_CONTROL                              0x0A
-#define MODE_PAGE_POWER_CONDITION                      0x1A
-#define MODE_PAGE_RETURN_ALL                           0x3F
-#define MODE_PAGE_BLK_DES_LEN                          0x08
-#define MODE_PAGE_LLBAA_BLK_DES_LEN                    0x10
-#define MODE_PAGE_CACHING_LEN                          0x14
-#define MODE_PAGE_CONTROL_LEN                          0x0C
-#define MODE_PAGE_POW_CND_LEN                          0x28
-#define MODE_PAGE_INF_EXC_LEN                          0x0C
-#define MODE_PAGE_ALL_LEN                              0x54
-#define MODE_SENSE6_MPH_SIZE                           4
-#define MODE_SENSE_PAGE_CONTROL_MASK                   0xC0
-#define MODE_SENSE_PAGE_CODE_OFFSET                    2
-#define MODE_SENSE_PAGE_CODE_MASK                      0x3F
-#define MODE_SENSE_LLBAA_MASK                          0x10
-#define MODE_SENSE_LLBAA_SHIFT                         4
-#define MODE_SENSE_DBD_MASK                            8
-#define MODE_SENSE_DBD_SHIFT                           3
-#define MODE_SENSE10_MPH_SIZE                          8
-#define MODE_SELECT_CDB_PAGE_FORMAT_MASK               0x10
-#define MODE_SELECT_CDB_SAVE_PAGES_MASK                        0x1
-#define MODE_SELECT_6_BD_OFFSET                                3
-#define MODE_SELECT_10_BD_OFFSET                       6
-#define MODE_SELECT_10_LLBAA_OFFSET                    4
-#define MODE_SELECT_10_LLBAA_MASK                      1
-#define MODE_SELECT_6_MPH_SIZE                         4
-#define MODE_SELECT_10_MPH_SIZE                                8
-#define CACHING_MODE_PAGE_WCE_MASK                     0x04
-#define MODE_SENSE_BLK_DESC_ENABLED                    0
-#define MODE_SENSE_BLK_DESC_COUNT                      1
-#define MODE_SELECT_PAGE_CODE_MASK                     0x3F
-#define SHORT_DESC_BLOCK                               8
-#define LONG_DESC_BLOCK                                        16
-#define MODE_PAGE_POW_CND_LEN_FIELD                    0x26
-#define MODE_PAGE_INF_EXC_LEN_FIELD                    0x0A
-#define MODE_PAGE_CACHING_LEN_FIELD                    0x12
-#define MODE_PAGE_CONTROL_LEN_FIELD                    0x0A
-#define MODE_SENSE_PC_CURRENT_VALUES                   0
-
-/* Log Sense defines */
-#define LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE              0x00
-#define LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH            0x07
-#define LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE         0x2F
-#define LOG_PAGE_TEMPERATURE_PAGE                      0x0D
-#define LOG_SENSE_CDB_SP_NOT_ENABLED                   0
-#define LOG_SENSE_CDB_PC_MASK                          0xC0
-#define LOG_SENSE_CDB_PC_SHIFT                         6
-#define LOG_SENSE_CDB_PC_CUMULATIVE_VALUES             1
-#define LOG_SENSE_CDB_PAGE_CODE_MASK                   0x3F
-#define REMAINING_INFO_EXCP_PAGE_LENGTH                        0x8
-#define LOG_INFO_EXCP_PAGE_LENGTH                      0xC
-#define REMAINING_TEMP_PAGE_LENGTH                     0xC
-#define LOG_TEMP_PAGE_LENGTH                           0x10
-#define LOG_TEMP_UNKNOWN                               0xFF
-#define SUPPORTED_LOG_PAGES_PAGE_LENGTH                        0x3
-
-/* Read Capacity defines */
-#define READ_CAP_10_RESP_SIZE                          8
-#define READ_CAP_16_RESP_SIZE                          32
-
-/* NVMe Namespace and Command Defines */
-#define BYTES_TO_DWORDS                                        4
-#define NVME_MAX_FIRMWARE_SLOT                         7
-
-/* Report LUNs defines */
-#define REPORT_LUNS_FIRST_LUN_OFFSET                   8
-
-/* SCSI ADDITIONAL SENSE Codes */
-
-#define SCSI_ASC_NO_SENSE                              0x00
-#define SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT            0x03
-#define SCSI_ASC_LUN_NOT_READY                         0x04
-#define SCSI_ASC_WARNING                               0x0B
-#define SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED          0x10
-#define SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED         0x10
-#define SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED         0x10
-#define SCSI_ASC_UNRECOVERED_READ_ERROR                        0x11
-#define SCSI_ASC_MISCOMPARE_DURING_VERIFY              0x1D
-#define SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID          0x20
-#define SCSI_ASC_ILLEGAL_COMMAND                       0x20
-#define SCSI_ASC_ILLEGAL_BLOCK                         0x21
-#define SCSI_ASC_INVALID_CDB                           0x24
-#define SCSI_ASC_INVALID_LUN                           0x25
-#define SCSI_ASC_INVALID_PARAMETER                     0x26
-#define SCSI_ASC_FORMAT_COMMAND_FAILED                 0x31
-#define SCSI_ASC_INTERNAL_TARGET_FAILURE               0x44
-
-/* SCSI ADDITIONAL SENSE Code Qualifiers */
-
-#define SCSI_ASCQ_CAUSE_NOT_REPORTABLE                 0x00
-#define SCSI_ASCQ_FORMAT_COMMAND_FAILED                        0x01
-#define SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED         0x01
-#define SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED                0x02
-#define SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED                0x03
-#define SCSI_ASCQ_FORMAT_IN_PROGRESS                   0x04
-#define SCSI_ASCQ_POWER_LOSS_EXPECTED                  0x08
-#define SCSI_ASCQ_INVALID_LUN_ID                       0x09
-
-/* copied from drivers/usb/gadget/function/storage_common.h */
-static inline u32 get_unaligned_be24(u8 *buf)
-{
-       return 0xffffff & (u32) get_unaligned_be32(buf - 1);
-}
-
-/* Struct to gather data that needs to be extracted from a SCSI CDB.
-   Not conforming to any particular CDB variant, but compatible with all. */
-
-struct nvme_trans_io_cdb {
-       u8 fua;
-       u8 prot_info;
-       u64 lba;
-       u32 xfer_len;
-};
-
-
-/* Internal Helper Functions */
-
-
-/* Copy data to userspace memory */
-
-static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
-                                                               unsigned long n)
-{
-       int i;
-       void *index = from;
-       size_t remaining = n;
-       size_t xfer_len;
-
-       if (hdr->iovec_count > 0) {
-               struct sg_iovec sgl;
-
-               for (i = 0; i < hdr->iovec_count; i++) {
-                       if (copy_from_user(&sgl, hdr->dxferp +
-                                               i * sizeof(struct sg_iovec),
-                                               sizeof(struct sg_iovec)))
-                               return -EFAULT;
-                       xfer_len = min(remaining, sgl.iov_len);
-                       if (copy_to_user(sgl.iov_base, index, xfer_len))
-                               return -EFAULT;
-
-                       index += xfer_len;
-                       remaining -= xfer_len;
-                       if (remaining == 0)
-                               break;
-               }
-               return 0;
-       }
-
-       if (copy_to_user(hdr->dxferp, from, n))
-               return -EFAULT;
-       return 0;
-}
-
-/* Copy data from userspace memory */
-
-static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
-                                                               unsigned long n)
-{
-       int i;
-       void *index = to;
-       size_t remaining = n;
-       size_t xfer_len;
-
-       if (hdr->iovec_count > 0) {
-               struct sg_iovec sgl;
-
-               for (i = 0; i < hdr->iovec_count; i++) {
-                       if (copy_from_user(&sgl, hdr->dxferp +
-                                               i * sizeof(struct sg_iovec),
-                                               sizeof(struct sg_iovec)))
-                               return -EFAULT;
-                       xfer_len = min(remaining, sgl.iov_len);
-                       if (copy_from_user(index, sgl.iov_base, xfer_len))
-                               return -EFAULT;
-                       index += xfer_len;
-                       remaining -= xfer_len;
-                       if (remaining == 0)
-                               break;
-               }
-               return 0;
-       }
-
-       if (copy_from_user(to, hdr->dxferp, n))
-               return -EFAULT;
-       return 0;
-}
-
-/* Status/Sense Buffer Writeback */
-
-static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key,
-                                u8 asc, u8 ascq)
-{
-       u8 xfer_len;
-       u8 resp[DESC_FMT_SENSE_DATA_SIZE];
-
-       if (scsi_status_is_good(status)) {
-               hdr->status = SAM_STAT_GOOD;
-               hdr->masked_status = GOOD;
-               hdr->host_status = DID_OK;
-               hdr->driver_status = DRIVER_OK;
-               hdr->sb_len_wr = 0;
-       } else {
-               hdr->status = status;
-               hdr->masked_status = status >> 1;
-               hdr->host_status = DID_OK;
-               hdr->driver_status = DRIVER_OK;
-
-               memset(resp, 0, DESC_FMT_SENSE_DATA_SIZE);
-               resp[0] = DESC_FORMAT_SENSE_DATA;
-               resp[1] = sense_key;
-               resp[2] = asc;
-               resp[3] = ascq;
-
-               xfer_len = min_t(u8, hdr->mx_sb_len, DESC_FMT_SENSE_DATA_SIZE);
-               hdr->sb_len_wr = xfer_len;
-               if (copy_to_user(hdr->sbp, resp, xfer_len) > 0)
-                       return -EFAULT;
-       }
-
-       return 0;
-}
-
-/*
- * Take a status code from a lowlevel routine, and if it was a positive NVMe
- * error code update the sense data based on it.  In either case the passed
- * in value is returned again, unless an -EFAULT from copy_to_user overrides
- * it.
- */
-static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc)
-{
-       u8 status, sense_key, asc, ascq;
-       int res;
-
-       /* For non-nvme (Linux) errors, simply return the error code */
-       if (nvme_sc < 0)
-               return nvme_sc;
-
-       /* Mask DNR, More, and reserved fields */
-       switch (nvme_sc & 0x7FF) {
-       /* Generic Command Status */
-       case NVME_SC_SUCCESS:
-               status = SAM_STAT_GOOD;
-               sense_key = NO_SENSE;
-               asc = SCSI_ASC_NO_SENSE;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-       case NVME_SC_INVALID_OPCODE:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = ILLEGAL_REQUEST;
-               asc = SCSI_ASC_ILLEGAL_COMMAND;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-       case NVME_SC_INVALID_FIELD:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = ILLEGAL_REQUEST;
-               asc = SCSI_ASC_INVALID_CDB;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-       case NVME_SC_DATA_XFER_ERROR:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = MEDIUM_ERROR;
-               asc = SCSI_ASC_NO_SENSE;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-       case NVME_SC_POWER_LOSS:
-               status = SAM_STAT_TASK_ABORTED;
-               sense_key = ABORTED_COMMAND;
-               asc = SCSI_ASC_WARNING;
-               ascq = SCSI_ASCQ_POWER_LOSS_EXPECTED;
-               break;
-       case NVME_SC_INTERNAL:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = HARDWARE_ERROR;
-               asc = SCSI_ASC_INTERNAL_TARGET_FAILURE;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-       case NVME_SC_ABORT_REQ:
-               status = SAM_STAT_TASK_ABORTED;
-               sense_key = ABORTED_COMMAND;
-               asc = SCSI_ASC_NO_SENSE;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-       case NVME_SC_ABORT_QUEUE:
-               status = SAM_STAT_TASK_ABORTED;
-               sense_key = ABORTED_COMMAND;
-               asc = SCSI_ASC_NO_SENSE;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-       case NVME_SC_FUSED_FAIL:
-               status = SAM_STAT_TASK_ABORTED;
-               sense_key = ABORTED_COMMAND;
-               asc = SCSI_ASC_NO_SENSE;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-       case NVME_SC_FUSED_MISSING:
-               status = SAM_STAT_TASK_ABORTED;
-               sense_key = ABORTED_COMMAND;
-               asc = SCSI_ASC_NO_SENSE;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-       case NVME_SC_INVALID_NS:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = ILLEGAL_REQUEST;
-               asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID;
-               ascq = SCSI_ASCQ_INVALID_LUN_ID;
-               break;
-       case NVME_SC_LBA_RANGE:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = ILLEGAL_REQUEST;
-               asc = SCSI_ASC_ILLEGAL_BLOCK;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-       case NVME_SC_CAP_EXCEEDED:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = MEDIUM_ERROR;
-               asc = SCSI_ASC_NO_SENSE;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-       case NVME_SC_NS_NOT_READY:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = NOT_READY;
-               asc = SCSI_ASC_LUN_NOT_READY;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-
-       /* Command Specific Status */
-       case NVME_SC_INVALID_FORMAT:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = ILLEGAL_REQUEST;
-               asc = SCSI_ASC_FORMAT_COMMAND_FAILED;
-               ascq = SCSI_ASCQ_FORMAT_COMMAND_FAILED;
-               break;
-       case NVME_SC_BAD_ATTRIBUTES:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = ILLEGAL_REQUEST;
-               asc = SCSI_ASC_INVALID_CDB;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-
-       /* Media Errors */
-       case NVME_SC_WRITE_FAULT:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = MEDIUM_ERROR;
-               asc = SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-       case NVME_SC_READ_ERROR:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = MEDIUM_ERROR;
-               asc = SCSI_ASC_UNRECOVERED_READ_ERROR;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-       case NVME_SC_GUARD_CHECK:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = MEDIUM_ERROR;
-               asc = SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED;
-               ascq = SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED;
-               break;
-       case NVME_SC_APPTAG_CHECK:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = MEDIUM_ERROR;
-               asc = SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED;
-               ascq = SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED;
-               break;
-       case NVME_SC_REFTAG_CHECK:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = MEDIUM_ERROR;
-               asc = SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED;
-               ascq = SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED;
-               break;
-       case NVME_SC_COMPARE_FAILED:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = MISCOMPARE;
-               asc = SCSI_ASC_MISCOMPARE_DURING_VERIFY;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-       case NVME_SC_ACCESS_DENIED:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = ILLEGAL_REQUEST;
-               asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID;
-               ascq = SCSI_ASCQ_INVALID_LUN_ID;
-               break;
-
-       /* Unspecified/Default */
-       case NVME_SC_CMDID_CONFLICT:
-       case NVME_SC_CMD_SEQ_ERROR:
-       case NVME_SC_CQ_INVALID:
-       case NVME_SC_QID_INVALID:
-       case NVME_SC_QUEUE_SIZE:
-       case NVME_SC_ABORT_LIMIT:
-       case NVME_SC_ABORT_MISSING:
-       case NVME_SC_ASYNC_LIMIT:
-       case NVME_SC_FIRMWARE_SLOT:
-       case NVME_SC_FIRMWARE_IMAGE:
-       case NVME_SC_INVALID_VECTOR:
-       case NVME_SC_INVALID_LOG_PAGE:
-       default:
-               status = SAM_STAT_CHECK_CONDITION;
-               sense_key = ILLEGAL_REQUEST;
-               asc = SCSI_ASC_NO_SENSE;
-               ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               break;
-       }
-
-       res = nvme_trans_completion(hdr, status, sense_key, asc, ascq);
-       return res ? res : nvme_sc;
-}
-
-/* INQUIRY Helper Functions */
-
-static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
-                                       struct sg_io_hdr *hdr, u8 *inq_response,
-                                       int alloc_len)
-{
-       struct nvme_ctrl *ctrl = ns->ctrl;
-       struct nvme_id_ns *id_ns;
-       int res;
-       int nvme_sc;
-       int xfer_len;
-       u8 resp_data_format = 0x02;
-       u8 protect;
-       u8 cmdque = 0x01 << 1;
-       u8 fw_offset = sizeof(ctrl->firmware_rev);
-
-       /* nvme ns identify - use DPS value for PROTECT field */
-       nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
-       res = nvme_trans_status_code(hdr, nvme_sc);
-       if (res)
-               return res;
-
-       if (id_ns->dps)
-               protect = 0x01;
-       else
-               protect = 0;
-       kfree(id_ns);
-
-       memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
-       inq_response[2] = VERSION_SPC_4;
-       inq_response[3] = resp_data_format;     /*normaca=0 | hisup=0 */
-       inq_response[4] = ADDITIONAL_STD_INQ_LENGTH;
-       inq_response[5] = protect;      /* sccs=0 | acc=0 | tpgs=0 | pc3=0 */
-       inq_response[7] = cmdque;       /* wbus16=0 | sync=0 | vs=0 */
-       strncpy(&inq_response[8], "NVMe    ", 8);
-       strncpy(&inq_response[16], ctrl->model, 16);
-
-       while (ctrl->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
-               fw_offset--;
-       fw_offset -= 4;
-       strncpy(&inq_response[32], ctrl->firmware_rev + fw_offset, 4);
-
-       xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
-       return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
-                                       struct sg_io_hdr *hdr, u8 *inq_response,
-                                       int alloc_len)
-{
-       int xfer_len;
-
-       memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
-       inq_response[1] = INQ_SUPPORTED_VPD_PAGES_PAGE;   /* Page Code */
-       inq_response[3] = INQ_NUM_SUPPORTED_VPD_PAGES;    /* Page Length */
-       inq_response[4] = INQ_SUPPORTED_VPD_PAGES_PAGE;
-       inq_response[5] = INQ_UNIT_SERIAL_NUMBER_PAGE;
-       inq_response[6] = INQ_DEVICE_IDENTIFICATION_PAGE;
-       inq_response[7] = INQ_EXTENDED_INQUIRY_DATA_PAGE;
-       inq_response[8] = INQ_BDEV_CHARACTERISTICS_PAGE;
-       inq_response[9] = INQ_BDEV_LIMITS_PAGE;
-
-       xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
-       return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
-                                       struct sg_io_hdr *hdr, u8 *inq_response,
-                                       int alloc_len)
-{
-       int xfer_len;
-
-       memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
-       inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */
-       inq_response[3] = INQ_SERIAL_NUMBER_LENGTH;    /* Page Length */
-       strncpy(&inq_response[4], ns->ctrl->serial, INQ_SERIAL_NUMBER_LENGTH);
-
-       xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
-       return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-               u8 *inq_response, int alloc_len)
-{
-       struct nvme_id_ns *id_ns;
-       int nvme_sc, res;
-       size_t len;
-       void *eui;
-
-       nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
-       res = nvme_trans_status_code(hdr, nvme_sc);
-       if (res)
-               return res;
-
-       eui = id_ns->eui64;
-       len = sizeof(id_ns->eui64);
-
-       if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) {
-               if (bitmap_empty(eui, len * 8)) {
-                       eui = id_ns->nguid;
-                       len = sizeof(id_ns->nguid);
-               }
-       }
-
-       if (bitmap_empty(eui, len * 8)) {
-               res = -EOPNOTSUPP;
-               goto out_free_id;
-       }
-
-       memset(inq_response, 0, alloc_len);
-       inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
-       inq_response[3] = 4 + len; /* Page Length */
-
-       /* Designation Descriptor start */
-       inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */
-       inq_response[5] = 0x02; /* PIV=0b | Asso=00b | Designator Type=2h */
-       inq_response[6] = 0x00; /* Rsvd */
-       inq_response[7] = len;  /* Designator Length */
-       memcpy(&inq_response[8], eui, len);
-
-       res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
-out_free_id:
-       kfree(id_ns);
-       return res;
-}
-
-static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
-               struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len)
-{
-       struct nvme_ctrl *ctrl = ns->ctrl;
-       struct nvme_id_ctrl *id_ctrl;
-       int nvme_sc, res;
-
-       if (alloc_len < 72) {
-               return nvme_trans_completion(hdr,
-                               SAM_STAT_CHECK_CONDITION,
-                               ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                               SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-       }
-
-       nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
-       res = nvme_trans_status_code(hdr, nvme_sc);
-       if (res)
-               return res;
-
-       memset(inq_response, 0, alloc_len);
-       inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
-       inq_response[3] = 0x48; /* Page Length */
-
-       /* Designation Descriptor start */
-       inq_response[4] = 0x03; /* Proto ID=0h | Code set=3h */
-       inq_response[5] = 0x08; /* PIV=0b | Asso=00b | Designator Type=8h */
-       inq_response[6] = 0x00; /* Rsvd */
-       inq_response[7] = 0x44; /* Designator Length */
-
-       sprintf(&inq_response[8], "%04x", le16_to_cpu(id_ctrl->vid));
-       memcpy(&inq_response[12], ctrl->model, sizeof(ctrl->model));
-       sprintf(&inq_response[52], "%04x", cpu_to_be32(ns->ns_id));
-       memcpy(&inq_response[56], ctrl->serial, sizeof(ctrl->serial));
-
-       res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
-       kfree(id_ctrl);
-       return res;
-}
-
-static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                       u8 *resp, int alloc_len)
-{
-       int res;
-
-       if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) {
-               res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len);
-               if (res != -EOPNOTSUPP)
-                       return res;
-       }
-
-       return nvme_fill_device_id_scsi_string(ns, hdr, resp, alloc_len);
-}
-
-static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                       int alloc_len)
-{
-       u8 *inq_response;
-       int res;
-       int nvme_sc;
-       struct nvme_ctrl *ctrl = ns->ctrl;
-       struct nvme_id_ctrl *id_ctrl;
-       struct nvme_id_ns *id_ns;
-       int xfer_len;
-       u8 microcode = 0x80;
-       u8 spt;
-       u8 spt_lut[8] = {0, 0, 2, 1, 4, 6, 5, 7};
-       u8 grd_chk, app_chk, ref_chk, protect;
-       u8 uask_sup = 0x20;
-       u8 v_sup;
-       u8 luiclr = 0x01;
-
-       inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
-       if (inq_response == NULL)
-               return -ENOMEM;
-
-       nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
-       res = nvme_trans_status_code(hdr, nvme_sc);
-       if (res)
-               goto out_free_inq;
-
-       spt = spt_lut[id_ns->dpc & 0x07] << 3;
-       if (id_ns->dps)
-               protect = 0x01;
-       else
-               protect = 0;
-       kfree(id_ns);
-
-       grd_chk = protect << 2;
-       app_chk = protect << 1;
-       ref_chk = protect;
-
-       nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
-       res = nvme_trans_status_code(hdr, nvme_sc);
-       if (res)
-               goto out_free_inq;
-
-       v_sup = id_ctrl->vwc;
-       kfree(id_ctrl);
-
-       memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
-       inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE;    /* Page Code */
-       inq_response[2] = 0x00;    /* Page Length MSB */
-       inq_response[3] = 0x3C;    /* Page Length LSB */
-       inq_response[4] = microcode | spt | grd_chk | app_chk | ref_chk;
-       inq_response[5] = uask_sup;
-       inq_response[6] = v_sup;
-       inq_response[7] = luiclr;
-       inq_response[8] = 0;
-       inq_response[9] = 0;
-
-       xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
-       res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- out_free_inq:
-       kfree(inq_response);
-       return res;
-}
-
-static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                       u8 *inq_response, int alloc_len)
-{
-       __be32 max_sectors = cpu_to_be32(
-               nvme_block_nr(ns, queue_max_hw_sectors(ns->queue)));
-       __be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors);
-       __be32 discard_desc_count = cpu_to_be32(0x100);
-
-       memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
-       inq_response[1] = VPD_BLOCK_LIMITS;
-       inq_response[3] = 0x3c; /* Page Length */
-       memcpy(&inq_response[8], &max_sectors, sizeof(u32));
-       memcpy(&inq_response[20], &max_discard, sizeof(u32));
-
-       if (max_discard)
-               memcpy(&inq_response[24], &discard_desc_count, sizeof(u32));
-
-       return nvme_trans_copy_to_user(hdr, inq_response, 0x3c);
-}
-
-static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                       int alloc_len)
-{
-       u8 *inq_response;
-       int res;
-       int xfer_len;
-
-       inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
-       if (inq_response == NULL) {
-               res = -ENOMEM;
-               goto out_mem;
-       }
-
-       inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE;    /* Page Code */
-       inq_response[2] = 0x00;    /* Page Length MSB */
-       inq_response[3] = 0x3C;    /* Page Length LSB */
-       inq_response[4] = 0x00;    /* Medium Rotation Rate MSB */
-       inq_response[5] = 0x01;    /* Medium Rotation Rate LSB */
-       inq_response[6] = 0x00;    /* Form Factor */
-
-       xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
-       res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
-       kfree(inq_response);
- out_mem:
-       return res;
-}
-
-/* LOG SENSE Helper Functions */
-
-static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                       int alloc_len)
-{
-       int res;
-       int xfer_len;
-       u8 *log_response;
-
-       log_response = kzalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL);
-       if (log_response == NULL) {
-               res = -ENOMEM;
-               goto out_mem;
-       }
-
-       log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
-       /* Subpage=0x00, Page Length MSB=0 */
-       log_response[3] = SUPPORTED_LOG_PAGES_PAGE_LENGTH;
-       log_response[4] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
-       log_response[5] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
-       log_response[6] = LOG_PAGE_TEMPERATURE_PAGE;
-
-       xfer_len = min(alloc_len, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH);
-       res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
-       kfree(log_response);
- out_mem:
-       return res;
-}
-
-static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
-                                       struct sg_io_hdr *hdr, int alloc_len)
-{
-       int res;
-       int xfer_len;
-       u8 *log_response;
-       struct nvme_smart_log *smart_log;
-       u8 temp_c;
-       u16 temp_k;
-
-       log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL);
-       if (log_response == NULL)
-               return -ENOMEM;
-
-       res = nvme_get_log_page(ns->ctrl, &smart_log);
-       if (res < 0)
-               goto out_free_response;
-
-       if (res != NVME_SC_SUCCESS) {
-               temp_c = LOG_TEMP_UNKNOWN;
-       } else {
-               temp_k = (smart_log->temperature[1] << 8) +
-                               (smart_log->temperature[0]);
-               temp_c = temp_k - KELVIN_TEMP_FACTOR;
-       }
-       kfree(smart_log);
-
-       log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
-       /* Subpage=0x00, Page Length MSB=0 */
-       log_response[3] = REMAINING_INFO_EXCP_PAGE_LENGTH;
-       /* Informational Exceptions Log Parameter 1 Start */
-       /* Parameter Code=0x0000 bytes 4,5 */
-       log_response[6] = 0x23; /* DU=0, TSD=1, ETC=0, TMC=0, FMT_AND_LNK=11b */
-       log_response[7] = 0x04; /* PARAMETER LENGTH */
-       /* Add sense Code and qualifier = 0x00 each */
-       /* Use Temperature from NVMe Get Log Page, convert to C from K */
-       log_response[10] = temp_c;
-
-       xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH);
-       res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
- out_free_response:
-       kfree(log_response);
-       return res;
-}
-
-static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                       int alloc_len)
-{
-       int res;
-       int xfer_len;
-       u8 *log_response;
-       struct nvme_smart_log *smart_log;
-       u32 feature_resp;
-       u8 temp_c_cur, temp_c_thresh;
-       u16 temp_k;
-
-       log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL);
-       if (log_response == NULL)
-               return -ENOMEM;
-
-       res = nvme_get_log_page(ns->ctrl, &smart_log);
-       if (res < 0)
-               goto out_free_response;
-
-       if (res != NVME_SC_SUCCESS) {
-               temp_c_cur = LOG_TEMP_UNKNOWN;
-       } else {
-               temp_k = (smart_log->temperature[1] << 8) +
-                               (smart_log->temperature[0]);
-               temp_c_cur = temp_k - KELVIN_TEMP_FACTOR;
-       }
-       kfree(smart_log);
-
-       /* Get Features for Temp Threshold */
-       res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, NULL, 0,
-                                                               &feature_resp);
-       if (res != NVME_SC_SUCCESS)
-               temp_c_thresh = LOG_TEMP_UNKNOWN;
-       else
-               temp_c_thresh = (feature_resp & 0xFFFF) - KELVIN_TEMP_FACTOR;
-
-       log_response[0] = LOG_PAGE_TEMPERATURE_PAGE;
-       /* Subpage=0x00, Page Length MSB=0 */
-       log_response[3] = REMAINING_TEMP_PAGE_LENGTH;
-       /* Temperature Log Parameter 1 (Temperature) Start */
-       /* Parameter Code = 0x0000 */
-       log_response[6] = 0x01;         /* Format and Linking = 01b */
-       log_response[7] = 0x02;         /* Parameter Length */
-       /* Use Temperature from NVMe Get Log Page, convert to C from K */
-       log_response[9] = temp_c_cur;
-       /* Temperature Log Parameter 2 (Reference Temperature) Start */
-       log_response[11] = 0x01;        /* Parameter Code = 0x0001 */
-       log_response[12] = 0x01;        /* Format and Linking = 01b */
-       log_response[13] = 0x02;        /* Parameter Length */
-       /* Use Temperature Thresh from NVMe Get Log Page, convert to C from K */
-       log_response[15] = temp_c_thresh;
-
-       xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH);
-       res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
- out_free_response:
-       kfree(log_response);
-       return res;
-}
-
-/* MODE SENSE Helper Functions */
-
-static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa,
-                                       u16 mode_data_length, u16 blk_desc_len)
-{
-       /* Quick check to make sure I don't stomp on my own memory... */
-       if ((cdb10 && len < 8) || (!cdb10 && len < 4))
-               return -EINVAL;
-
-       if (cdb10) {
-               resp[0] = (mode_data_length & 0xFF00) >> 8;
-               resp[1] = (mode_data_length & 0x00FF);
-               resp[3] = 0x10 /* DPOFUA */;
-               resp[4] = llbaa;
-               resp[5] = RESERVED_FIELD;
-               resp[6] = (blk_desc_len & 0xFF00) >> 8;
-               resp[7] = (blk_desc_len & 0x00FF);
-       } else {
-               resp[0] = (mode_data_length & 0x00FF);
-               resp[2] = 0x10 /* DPOFUA */;
-               resp[3] = (blk_desc_len & 0x00FF);
-       }
-
-       return 0;
-}
-
-static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                   u8 *resp, int len, u8 llbaa)
-{
-       int res;
-       int nvme_sc;
-       struct nvme_id_ns *id_ns;
-       u8 flbas;
-       u32 lba_length;
-
-       if (llbaa == 0 && len < MODE_PAGE_BLK_DES_LEN)
-               return -EINVAL;
-       else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
-               return -EINVAL;
-
-       nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
-       res = nvme_trans_status_code(hdr, nvme_sc);
-       if (res)
-               return res;
-
-       flbas = (id_ns->flbas) & 0x0F;
-       lba_length = (1 << (id_ns->lbaf[flbas].ds));
-
-       if (llbaa == 0) {
-               __be32 tmp_cap = cpu_to_be32(le64_to_cpu(id_ns->ncap));
-               /* Byte 4 is reserved */
-               __be32 tmp_len = cpu_to_be32(lba_length & 0x00FFFFFF);
-
-               memcpy(resp, &tmp_cap, sizeof(u32));
-               memcpy(&resp[4], &tmp_len, sizeof(u32));
-       } else {
-               __be64 tmp_cap = cpu_to_be64(le64_to_cpu(id_ns->ncap));
-               __be32 tmp_len = cpu_to_be32(lba_length);
-
-               memcpy(resp, &tmp_cap, sizeof(u64));
-               /* Bytes 8, 9, 10, 11 are reserved */
-               memcpy(&resp[12], &tmp_len, sizeof(u32));
-       }
-
-       kfree(id_ns);
-       return res;
-}
-
-static int nvme_trans_fill_control_page(struct nvme_ns *ns,
-                                       struct sg_io_hdr *hdr, u8 *resp,
-                                       int len)
-{
-       if (len < MODE_PAGE_CONTROL_LEN)
-               return -EINVAL;
-
-       resp[0] = MODE_PAGE_CONTROL;
-       resp[1] = MODE_PAGE_CONTROL_LEN_FIELD;
-       resp[2] = 0x0E;         /* TST=000b, TMF_ONLY=0, DPICZ=1,
-                                * D_SENSE=1, GLTSD=1, RLEC=0 */
-       resp[3] = 0x12;         /* Q_ALGO_MODIFIER=1h, NUAR=0, QERR=01b */
-       /* Byte 4:  VS=0, RAC=0, UA_INT=0, SWP=0 */
-       resp[5] = 0x40;         /* ATO=0, TAS=1, ATMPE=0, RWWP=0, AUTOLOAD=0 */
-       /* resp[6] and [7] are obsolete, thus zero */
-       resp[8] = 0xFF;         /* Busy timeout period = 0xffff */
-       resp[9] = 0xFF;
-       /* Bytes 10,11: Extended selftest completion time = 0x0000 */
-
-       return 0;
-}
-
-static int nvme_trans_fill_caching_page(struct nvme_ns *ns,
-                                       struct sg_io_hdr *hdr,
-                                       u8 *resp, int len)
-{
-       int res = 0;
-       int nvme_sc;
-       u32 feature_resp;
-       u8 vwc;
-
-       if (len < MODE_PAGE_CACHING_LEN)
-               return -EINVAL;
-
-       nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, NULL, 0,
-                                                               &feature_resp);
-       res = nvme_trans_status_code(hdr, nvme_sc);
-       if (res)
-               return res;
-
-       vwc = feature_resp & 0x00000001;
-
-       resp[0] = MODE_PAGE_CACHING;
-       resp[1] = MODE_PAGE_CACHING_LEN_FIELD;
-       resp[2] = vwc << 2;
-       return 0;
-}
-
-static int nvme_trans_fill_pow_cnd_page(struct nvme_ns *ns,
-                                       struct sg_io_hdr *hdr, u8 *resp,
-                                       int len)
-{
-       if (len < MODE_PAGE_POW_CND_LEN)
-               return -EINVAL;
-
-       resp[0] = MODE_PAGE_POWER_CONDITION;
-       resp[1] = MODE_PAGE_POW_CND_LEN_FIELD;
-       /* All other bytes are zero */
-
-       return 0;
-}
-
-static int nvme_trans_fill_inf_exc_page(struct nvme_ns *ns,
-                                       struct sg_io_hdr *hdr, u8 *resp,
-                                       int len)
-{
-       if (len < MODE_PAGE_INF_EXC_LEN)
-               return -EINVAL;
-
-       resp[0] = MODE_PAGE_INFO_EXCEP;
-       resp[1] = MODE_PAGE_INF_EXC_LEN_FIELD;
-       resp[2] = 0x88;
-       /* All other bytes are zero */
-
-       return 0;
-}
-
-static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                    u8 *resp, int len)
-{
-       int res;
-       u16 mode_pages_offset_1 = 0;
-       u16 mode_pages_offset_2, mode_pages_offset_3, mode_pages_offset_4;
-
-       mode_pages_offset_2 = mode_pages_offset_1 + MODE_PAGE_CACHING_LEN;
-       mode_pages_offset_3 = mode_pages_offset_2 + MODE_PAGE_CONTROL_LEN;
-       mode_pages_offset_4 = mode_pages_offset_3 + MODE_PAGE_POW_CND_LEN;
-
-       res = nvme_trans_fill_caching_page(ns, hdr, &resp[mode_pages_offset_1],
-                                       MODE_PAGE_CACHING_LEN);
-       if (res)
-               return res;
-       res = nvme_trans_fill_control_page(ns, hdr, &resp[mode_pages_offset_2],
-                                       MODE_PAGE_CONTROL_LEN);
-       if (res)
-               return res;
-       res = nvme_trans_fill_pow_cnd_page(ns, hdr, &resp[mode_pages_offset_3],
-                                       MODE_PAGE_POW_CND_LEN);
-       if (res)
-               return res;
-       return nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4],
-                                       MODE_PAGE_INF_EXC_LEN);
-}
-
-static inline int nvme_trans_get_blk_desc_len(u8 dbd, u8 llbaa)
-{
-       if (dbd == MODE_SENSE_BLK_DESC_ENABLED) {
-               /* SPC-4: len = 8 x Num_of_descriptors if llbaa = 0, 16x if 1 */
-               return 8 * (llbaa + 1) * MODE_SENSE_BLK_DESC_COUNT;
-       } else {
-               return 0;
-       }
-}
-
-static int nvme_trans_mode_page_create(struct nvme_ns *ns,
-                                       struct sg_io_hdr *hdr, u8 *cmd,
-                                       u16 alloc_len, u8 cdb10,
-                                       int (*mode_page_fill_func)
-                                       (struct nvme_ns *,
-                                       struct sg_io_hdr *hdr, u8 *, int),
-                                       u16 mode_pages_tot_len)
-{
-       int res;
-       int xfer_len;
-       u8 *response;
-       u8 dbd, llbaa;
-       u16 resp_size;
-       int mph_size;
-       u16 mode_pages_offset_1;
-       u16 blk_desc_len, blk_desc_offset, mode_data_length;
-
-       dbd = (cmd[1] & MODE_SENSE_DBD_MASK) >> MODE_SENSE_DBD_SHIFT;
-       llbaa = (cmd[1] & MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT;
-       mph_size = cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE;
-
-       blk_desc_len = nvme_trans_get_blk_desc_len(dbd, llbaa);
-
-       resp_size = mph_size + blk_desc_len + mode_pages_tot_len;
-       /* Refer spc4r34 Table 440 for calculation of Mode data Length field */
-       mode_data_length = 3 + (3 * cdb10) + blk_desc_len + mode_pages_tot_len;
-
-       blk_desc_offset = mph_size;
-       mode_pages_offset_1 = blk_desc_offset + blk_desc_len;
-
-       response = kzalloc(resp_size, GFP_KERNEL);
-       if (response == NULL) {
-               res = -ENOMEM;
-               goto out_mem;
-       }
-
-       res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10,
-                                       llbaa, mode_data_length, blk_desc_len);
-       if (res)
-               goto out_free;
-       if (blk_desc_len > 0) {
-               res = nvme_trans_fill_blk_desc(ns, hdr,
-                                              &response[blk_desc_offset],
-                                              blk_desc_len, llbaa);
-               if (res)
-                       goto out_free;
-       }
-       res = mode_page_fill_func(ns, hdr, &response[mode_pages_offset_1],
-                                       mode_pages_tot_len);
-       if (res)
-               goto out_free;
-
-       xfer_len = min(alloc_len, resp_size);
-       res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
- out_free:
-       kfree(response);
- out_mem:
-       return res;
-}
-
-/* Read Capacity Helper Functions */
-
-static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns,
-                                                               u8 cdb16)
-{
-       u8 flbas;
-       u32 lba_length;
-       u64 rlba;
-       u8 prot_en;
-       u8 p_type_lut[4] = {0, 0, 1, 2};
-       __be64 tmp_rlba;
-       __be32 tmp_rlba_32;
-       __be32 tmp_len;
-
-       flbas = (id_ns->flbas) & 0x0F;
-       lba_length = (1 << (id_ns->lbaf[flbas].ds));
-       rlba = le64_to_cpup(&id_ns->nsze) - 1;
-       (id_ns->dps) ? (prot_en = 0x01) : (prot_en = 0);
-
-       if (!cdb16) {
-               if (rlba > 0xFFFFFFFF)
-                       rlba = 0xFFFFFFFF;
-               tmp_rlba_32 = cpu_to_be32(rlba);
-               tmp_len = cpu_to_be32(lba_length);
-               memcpy(response, &tmp_rlba_32, sizeof(u32));
-               memcpy(&response[4], &tmp_len, sizeof(u32));
-       } else {
-               tmp_rlba = cpu_to_be64(rlba);
-               tmp_len = cpu_to_be32(lba_length);
-               memcpy(response, &tmp_rlba, sizeof(u64));
-               memcpy(&response[8], &tmp_len, sizeof(u32));
-               response[12] = (p_type_lut[id_ns->dps & 0x3] << 1) | prot_en;
-               /* P_I_Exponent = 0x0 | LBPPBE = 0x0 */
-               /* LBPME = 0 | LBPRZ = 0 | LALBA = 0x00 */
-               /* Bytes 16-31 - Reserved */
-       }
-}
-
-/* Start Stop Unit Helper Functions */
-
-static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                       u8 buffer_id)
-{
-       struct nvme_command c;
-       int nvme_sc;
-
-       memset(&c, 0, sizeof(c));
-       c.common.opcode = nvme_admin_activate_fw;
-       c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV);
-
-       nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
-       return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                       u8 opcode, u32 tot_len, u32 offset,
-                                       u8 buffer_id)
-{
-       int nvme_sc;
-       struct nvme_command c;
-
-       if (hdr->iovec_count > 0) {
-               /* Assuming SGL is not allowed for this command */
-               return nvme_trans_completion(hdr,
-                                       SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST,
-                                       SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-       }
-
-       memset(&c, 0, sizeof(c));
-       c.common.opcode = nvme_admin_download_fw;
-       c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
-       c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
-
-       nvme_sc = nvme_submit_user_cmd(ns->ctrl->admin_q, &c,
-                       hdr->dxferp, tot_len, NULL, 0);
-       return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-/* Mode Select Helper Functions */
-
-static inline void nvme_trans_modesel_get_bd_len(u8 *parm_list, u8 cdb10,
-                                               u16 *bd_len, u8 *llbaa)
-{
-       if (cdb10) {
-               /* 10 Byte CDB */
-               *bd_len = (parm_list[MODE_SELECT_10_BD_OFFSET] << 8) +
-                       parm_list[MODE_SELECT_10_BD_OFFSET + 1];
-               *llbaa = parm_list[MODE_SELECT_10_LLBAA_OFFSET] &
-                               MODE_SELECT_10_LLBAA_MASK;
-       } else {
-               /* 6 Byte CDB */
-               *bd_len = parm_list[MODE_SELECT_6_BD_OFFSET];
-       }
-}
-
-static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list,
-                                       u16 idx, u16 bd_len, u8 llbaa)
-{
-       /* Store block descriptor info if a FORMAT UNIT comes later */
-       /* TODO Saving 1st BD info; what to do if multiple BD received? */
-       if (llbaa == 0) {
-               /* Standard Block Descriptor - spc4r34 7.5.5.1 */
-               ns->mode_select_num_blocks =
-                               (parm_list[idx + 1] << 16) +
-                               (parm_list[idx + 2] << 8) +
-                               (parm_list[idx + 3]);
-
-               ns->mode_select_block_len =
-                               (parm_list[idx + 5] << 16) +
-                               (parm_list[idx + 6] << 8) +
-                               (parm_list[idx + 7]);
-       } else {
-               /* Long LBA Block Descriptor - sbc3r27 6.4.2.3 */
-               ns->mode_select_num_blocks =
-                               (((u64)parm_list[idx + 0]) << 56) +
-                               (((u64)parm_list[idx + 1]) << 48) +
-                               (((u64)parm_list[idx + 2]) << 40) +
-                               (((u64)parm_list[idx + 3]) << 32) +
-                               (((u64)parm_list[idx + 4]) << 24) +
-                               (((u64)parm_list[idx + 5]) << 16) +
-                               (((u64)parm_list[idx + 6]) << 8) +
-                               ((u64)parm_list[idx + 7]);
-
-               ns->mode_select_block_len =
-                               (parm_list[idx + 12] << 24) +
-                               (parm_list[idx + 13] << 16) +
-                               (parm_list[idx + 14] << 8) +
-                               (parm_list[idx + 15]);
-       }
-}
-
-static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                       u8 *mode_page, u8 page_code)
-{
-       int res = 0;
-       int nvme_sc;
-       unsigned dword11;
-
-       switch (page_code) {
-       case MODE_PAGE_CACHING:
-               dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0);
-               nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_VOLATILE_WC,
-                                           dword11, NULL, 0, NULL);
-               res = nvme_trans_status_code(hdr, nvme_sc);
-               break;
-       case MODE_PAGE_CONTROL:
-               break;
-       case MODE_PAGE_POWER_CONDITION:
-               /* Verify the OS is not trying to set timers */
-               if ((mode_page[2] & 0x01) != 0 || (mode_page[3] & 0x0F) != 0) {
-                       res = nvme_trans_completion(hdr,
-                                               SAM_STAT_CHECK_CONDITION,
-                                               ILLEGAL_REQUEST,
-                                               SCSI_ASC_INVALID_PARAMETER,
-                                               SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-                       break;
-               }
-               break;
-       default:
-               res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               break;
-       }
-
-       return res;
-}
-
-static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                       u8 *cmd, u16 parm_list_len, u8 pf,
-                                       u8 sp, u8 cdb10)
-{
-       int res;
-       u8 *parm_list;
-       u16 bd_len;
-       u8 llbaa = 0;
-       u16 index, saved_index;
-       u8 page_code;
-       u16 mp_size;
-
-       /* Get parm list from data-in/out buffer */
-       parm_list = kmalloc(parm_list_len, GFP_KERNEL);
-       if (parm_list == NULL) {
-               res = -ENOMEM;
-               goto out;
-       }
-
-       res = nvme_trans_copy_from_user(hdr, parm_list, parm_list_len);
-       if (res)
-               goto out_mem;
-
-       nvme_trans_modesel_get_bd_len(parm_list, cdb10, &bd_len, &llbaa);
-       index = (cdb10) ? (MODE_SELECT_10_MPH_SIZE) : (MODE_SELECT_6_MPH_SIZE);
-
-       if (bd_len != 0) {
-               /* Block Descriptors present, parse */
-               nvme_trans_modesel_save_bd(ns, parm_list, index, bd_len, llbaa);
-               index += bd_len;
-       }
-       saved_index = index;
-
-       /* Multiple mode pages may be present; iterate through all */
-       /* In 1st Iteration, don't do NVME Command, only check for CDB errors */
-       do {
-               page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK;
-               mp_size = parm_list[index + 1] + 2;
-               if ((page_code != MODE_PAGE_CACHING) &&
-                   (page_code != MODE_PAGE_CONTROL) &&
-                   (page_code != MODE_PAGE_POWER_CONDITION)) {
-                       res = nvme_trans_completion(hdr,
-                                               SAM_STAT_CHECK_CONDITION,
-                                               ILLEGAL_REQUEST,
-                                               SCSI_ASC_INVALID_CDB,
-                                               SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-                       goto out_mem;
-               }
-               index += mp_size;
-       } while (index < parm_list_len);
-
-       /* In 2nd Iteration, do the NVME Commands */
-       index = saved_index;
-       do {
-               page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK;
-               mp_size = parm_list[index + 1] + 2;
-               res = nvme_trans_modesel_get_mp(ns, hdr, &parm_list[index],
-                                                               page_code);
-               if (res)
-                       break;
-               index += mp_size;
-       } while (index < parm_list_len);
-
- out_mem:
-       kfree(parm_list);
- out:
-       return res;
-}
-
-/* Format Unit Helper Functions */
-
-static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
-                                            struct sg_io_hdr *hdr)
-{
-       int res = 0;
-       int nvme_sc;
-       u8 flbas;
-
-       /*
-        * SCSI Expects a MODE SELECT would have been issued prior to
-        * a FORMAT UNIT, and the block size and number would be used
-        * from the block descriptor in it. If a MODE SELECT had not
-        * been issued, FORMAT shall use the current values for both.
-        */
-
-       if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
-               struct nvme_id_ns *id_ns;
-
-               nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
-               res = nvme_trans_status_code(hdr, nvme_sc);
-               if (res)
-                       return res;
-
-               if (ns->mode_select_num_blocks == 0)
-                       ns->mode_select_num_blocks = le64_to_cpu(id_ns->ncap);
-               if (ns->mode_select_block_len == 0) {
-                       flbas = (id_ns->flbas) & 0x0F;
-                       ns->mode_select_block_len =
-                                               (1 << (id_ns->lbaf[flbas].ds));
-               }
-
-               kfree(id_ns);
-       }
-
-       return 0;
-}
-
-static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
-                                       u8 format_prot_info, u8 *nvme_pf_code)
-{
-       int res;
-       u8 *parm_list;
-       u8 pf_usage, pf_code;
-
-       parm_list = kmalloc(len, GFP_KERNEL);
-       if (parm_list == NULL) {
-               res = -ENOMEM;
-               goto out;
-       }
-       res = nvme_trans_copy_from_user(hdr, parm_list, len);
-       if (res)
-               goto out_mem;
-
-       if ((parm_list[FORMAT_UNIT_IMMED_OFFSET] &
-                               FORMAT_UNIT_IMMED_MASK) != 0) {
-               res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               goto out_mem;
-       }
-
-       if (len == FORMAT_UNIT_LONG_PARM_LIST_LEN &&
-           (parm_list[FORMAT_UNIT_PROT_INT_OFFSET] & 0x0F) != 0) {
-               res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               goto out_mem;
-       }
-       pf_usage = parm_list[FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET] &
-                       FORMAT_UNIT_PROT_FIELD_USAGE_MASK;
-       pf_code = (pf_usage << 2) | format_prot_info;
-       switch (pf_code) {
-       case 0:
-               *nvme_pf_code = 0;
-               break;
-       case 2:
-               *nvme_pf_code = 1;
-               break;
-       case 3:
-               *nvme_pf_code = 2;
-               break;
-       case 7:
-               *nvme_pf_code = 3;
-               break;
-       default:
-               res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               break;
-       }
-
- out_mem:
-       kfree(parm_list);
- out:
-       return res;
-}
-
-static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                  u8 prot_info)
-{
-       int res;
-       int nvme_sc;
-       struct nvme_id_ns *id_ns;
-       u8 i;
-       u8 nlbaf;
-       u8 selected_lbaf = 0xFF;
-       u32 cdw10 = 0;
-       struct nvme_command c;
-
-       /* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
-       nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
-       res = nvme_trans_status_code(hdr, nvme_sc);
-       if (res)
-               return res;
-
-       nlbaf = id_ns->nlbaf;
-
-       for (i = 0; i < nlbaf; i++) {
-               if (ns->mode_select_block_len == (1 << (id_ns->lbaf[i].ds))) {
-                       selected_lbaf = i;
-                       break;
-               }
-       }
-       if (selected_lbaf > 0x0F) {
-               res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                               ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER,
-                               SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-       }
-       if (ns->mode_select_num_blocks != le64_to_cpu(id_ns->ncap)) {
-               res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                               ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER,
-                               SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-       }
-
-       cdw10 |= prot_info << 5;
-       cdw10 |= selected_lbaf & 0x0F;
-       memset(&c, 0, sizeof(c));
-       c.format.opcode = nvme_admin_format_nvm;
-       c.format.nsid = cpu_to_le32(ns->ns_id);
-       c.format.cdw10 = cpu_to_le32(cdw10);
-
-       nvme_sc = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0);
-       res = nvme_trans_status_code(hdr, nvme_sc);
-
-       kfree(id_ns);
-       return res;
-}
-
-static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr,
-                                       struct nvme_trans_io_cdb *cdb_info,
-                                       u32 max_blocks)
-{
-       /* If using iovecs, send one nvme command per vector */
-       if (hdr->iovec_count > 0)
-               return hdr->iovec_count;
-       else if (cdb_info->xfer_len > max_blocks)
-               return ((cdb_info->xfer_len - 1) / max_blocks) + 1;
-       else
-               return 1;
-}
-
-static u16 nvme_trans_io_get_control(struct nvme_ns *ns,
-                                       struct nvme_trans_io_cdb *cdb_info)
-{
-       u16 control = 0;
-
-       /* When Protection information support is added, implement here */
-
-       if (cdb_info->fua > 0)
-               control |= NVME_RW_FUA;
-
-       return control;
-}
-
-static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                               struct nvme_trans_io_cdb *cdb_info, u8 is_write)
-{
-       int nvme_sc = NVME_SC_SUCCESS;
-       u32 num_cmds;
-       u64 unit_len;
-       u64 unit_num_blocks;    /* Number of blocks to xfer in each nvme cmd */
-       u32 retcode;
-       u32 i = 0;
-       u64 nvme_offset = 0;
-       void __user *next_mapping_addr;
-       struct nvme_command c;
-       u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read);
-       u16 control;
-       u32 max_blocks = queue_max_hw_sectors(ns->queue) >> (ns->lba_shift - 9);
-
-       num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks);
-
-       /*
-        * This loop handles two cases.
-        * First, when an SGL is used in the form of an iovec list:
-        *   - Use iov_base as the next mapping address for the nvme command_id
-        *   - Use iov_len as the data transfer length for the command.
-        * Second, when we have a single buffer
-        *   - If larger than max_blocks, split into chunks, offset
-        *        each nvme command accordingly.
-        */
-       for (i = 0; i < num_cmds; i++) {
-               memset(&c, 0, sizeof(c));
-               if (hdr->iovec_count > 0) {
-                       struct sg_iovec sgl;
-
-                       retcode = copy_from_user(&sgl, hdr->dxferp +
-                                       i * sizeof(struct sg_iovec),
-                                       sizeof(struct sg_iovec));
-                       if (retcode)
-                               return -EFAULT;
-                       unit_len = sgl.iov_len;
-                       unit_num_blocks = unit_len >> ns->lba_shift;
-                       next_mapping_addr = sgl.iov_base;
-               } else {
-                       unit_num_blocks = min((u64)max_blocks,
-                                       (cdb_info->xfer_len - nvme_offset));
-                       unit_len = unit_num_blocks << ns->lba_shift;
-                       next_mapping_addr = hdr->dxferp +
-                                       ((1 << ns->lba_shift) * nvme_offset);
-               }
-
-               c.rw.opcode = opcode;
-               c.rw.nsid = cpu_to_le32(ns->ns_id);
-               c.rw.slba = cpu_to_le64(cdb_info->lba + nvme_offset);
-               c.rw.length = cpu_to_le16(unit_num_blocks - 1);
-               control = nvme_trans_io_get_control(ns, cdb_info);
-               c.rw.control = cpu_to_le16(control);
-
-               if (get_capacity(ns->disk) - unit_num_blocks <
-                               cdb_info->lba + nvme_offset) {
-                       nvme_sc = NVME_SC_LBA_RANGE;
-                       break;
-               }
-               nvme_sc = nvme_submit_user_cmd(ns->queue, &c,
-                               next_mapping_addr, unit_len, NULL, 0);
-               if (nvme_sc)
-                       break;
-
-               nvme_offset += unit_num_blocks;
-       }
-
-       return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-
-/* SCSI Command Translation Functions */
-
-static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
-                                                       u8 *cmd)
-{
-       int res = 0;
-       struct nvme_trans_io_cdb cdb_info = { 0, };
-       u8 opcode = cmd[0];
-       u64 xfer_bytes;
-       u64 sum_iov_len = 0;
-       struct sg_iovec sgl;
-       int i;
-       size_t not_copied;
-
-       /*
-        * The FUA and WPROTECT fields are not supported in 6-byte CDBs,
-        * but always in the same place for all others.
-        */
-       switch (opcode) {
-       case WRITE_6:
-       case READ_6:
-               break;
-       default:
-               cdb_info.fua = cmd[1] & 0x8;
-               cdb_info.prot_info = (cmd[1] & 0xe0) >> 5;
-               if (cdb_info.prot_info && !ns->pi_type) {
-                       return nvme_trans_completion(hdr,
-                                       SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST,
-                                       SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               }
-       }
-
-       switch (opcode) {
-       case WRITE_6:
-       case READ_6:
-               cdb_info.lba = get_unaligned_be24(&cmd[1]);
-               cdb_info.xfer_len = cmd[4];
-               if (cdb_info.xfer_len == 0)
-                       cdb_info.xfer_len = 256;
-               break;
-       case WRITE_10:
-       case READ_10:
-               cdb_info.lba = get_unaligned_be32(&cmd[2]);
-               cdb_info.xfer_len = get_unaligned_be16(&cmd[7]);
-               break;
-       case WRITE_12:
-       case READ_12:
-               cdb_info.lba = get_unaligned_be32(&cmd[2]);
-               cdb_info.xfer_len = get_unaligned_be32(&cmd[6]);
-               break;
-       case WRITE_16:
-       case READ_16:
-               cdb_info.lba = get_unaligned_be64(&cmd[2]);
-               cdb_info.xfer_len = get_unaligned_be32(&cmd[10]);
-               break;
-       default:
-               /* Will never really reach here */
-               res = -EIO;
-               goto out;
-       }
-
-       /* Calculate total length of transfer (in bytes) */
-       if (hdr->iovec_count > 0) {
-               for (i = 0; i < hdr->iovec_count; i++) {
-                       not_copied = copy_from_user(&sgl, hdr->dxferp +
-                                               i * sizeof(struct sg_iovec),
-                                               sizeof(struct sg_iovec));
-                       if (not_copied)
-                               return -EFAULT;
-                       sum_iov_len += sgl.iov_len;
-                       /* IO vector sizes should be multiples of block size */
-                       if (sgl.iov_len % (1 << ns->lba_shift) != 0) {
-                               res = nvme_trans_completion(hdr,
-                                               SAM_STAT_CHECK_CONDITION,
-                                               ILLEGAL_REQUEST,
-                                               SCSI_ASC_INVALID_PARAMETER,
-                                               SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-                               goto out;
-                       }
-               }
-       } else {
-               sum_iov_len = hdr->dxfer_len;
-       }
-
-       /* As Per sg ioctl howto, if the lengths differ, use the lower one */
-       xfer_bytes = min(((u64)hdr->dxfer_len), sum_iov_len);
-
-       /* If block count and actual data buffer size dont match, error out */
-       if (xfer_bytes != (cdb_info.xfer_len << ns->lba_shift)) {
-               res = -EINVAL;
-               goto out;
-       }
-
-       /* Check for 0 length transfer - it is not illegal */
-       if (cdb_info.xfer_len == 0)
-               goto out;
-
-       /* Send NVMe IO Command(s) */
-       res = nvme_trans_do_nvme_io(ns, hdr, &cdb_info, is_write);
-       if (res)
-               goto out;
-
- out:
-       return res;
-}
-
-static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                                       u8 *cmd)
-{
-       int res = 0;
-       u8 evpd;
-       u8 page_code;
-       int alloc_len;
-       u8 *inq_response;
-
-       evpd = cmd[1] & 0x01;
-       page_code = cmd[2];
-       alloc_len = get_unaligned_be16(&cmd[3]);
-
-       inq_response = kmalloc(max(alloc_len, STANDARD_INQUIRY_LENGTH),
-                               GFP_KERNEL);
-       if (inq_response == NULL) {
-               res = -ENOMEM;
-               goto out_mem;
-       }
-
-       if (evpd == 0) {
-               if (page_code == INQ_STANDARD_INQUIRY_PAGE) {
-                       res = nvme_trans_standard_inquiry_page(ns, hdr,
-                                               inq_response, alloc_len);
-               } else {
-                       res = nvme_trans_completion(hdr,
-                                               SAM_STAT_CHECK_CONDITION,
-                                               ILLEGAL_REQUEST,
-                                               SCSI_ASC_INVALID_CDB,
-                                               SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               }
-       } else {
-               switch (page_code) {
-               case VPD_SUPPORTED_PAGES:
-                       res = nvme_trans_supported_vpd_pages(ns, hdr,
-                                               inq_response, alloc_len);
-                       break;
-               case VPD_SERIAL_NUMBER:
-                       res = nvme_trans_unit_serial_page(ns, hdr, inq_response,
-                                                               alloc_len);
-                       break;
-               case VPD_DEVICE_IDENTIFIERS:
-                       res = nvme_trans_device_id_page(ns, hdr, inq_response,
-                                                               alloc_len);
-                       break;
-               case VPD_EXTENDED_INQUIRY:
-                       res = nvme_trans_ext_inq_page(ns, hdr, alloc_len);
-                       break;
-               case VPD_BLOCK_LIMITS:
-                       res = nvme_trans_bdev_limits_page(ns, hdr, inq_response,
-                                                               alloc_len);
-                       break;
-               case VPD_BLOCK_DEV_CHARACTERISTICS:
-                       res = nvme_trans_bdev_char_page(ns, hdr, alloc_len);
-                       break;
-               default:
-                       res = nvme_trans_completion(hdr,
-                                               SAM_STAT_CHECK_CONDITION,
-                                               ILLEGAL_REQUEST,
-                                               SCSI_ASC_INVALID_CDB,
-                                               SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-                       break;
-               }
-       }
-       kfree(inq_response);
- out_mem:
-       return res;
-}
-
-static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                                       u8 *cmd)
-{
-       int res;
-       u16 alloc_len;
-       u8 pc;
-       u8 page_code;
-
-       if (cmd[1] != LOG_SENSE_CDB_SP_NOT_ENABLED) {
-               res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               goto out;
-       }
-
-       page_code = cmd[2] & LOG_SENSE_CDB_PAGE_CODE_MASK;
-       pc = (cmd[2] & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT;
-       if (pc != LOG_SENSE_CDB_PC_CUMULATIVE_VALUES) {
-               res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               goto out;
-       }
-       alloc_len = get_unaligned_be16(&cmd[7]);
-       switch (page_code) {
-       case LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE:
-               res = nvme_trans_log_supp_pages(ns, hdr, alloc_len);
-               break;
-       case LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE:
-               res = nvme_trans_log_info_exceptions(ns, hdr, alloc_len);
-               break;
-       case LOG_PAGE_TEMPERATURE_PAGE:
-               res = nvme_trans_log_temperature(ns, hdr, alloc_len);
-               break;
-       default:
-               res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               break;
-       }
-
- out:
-       return res;
-}
-
-static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                                       u8 *cmd)
-{
-       u8 cdb10 = 0;
-       u16 parm_list_len;
-       u8 page_format;
-       u8 save_pages;
-
-       page_format = cmd[1] & MODE_SELECT_CDB_PAGE_FORMAT_MASK;
-       save_pages = cmd[1] & MODE_SELECT_CDB_SAVE_PAGES_MASK;
-
-       if (cmd[0] == MODE_SELECT) {
-               parm_list_len = cmd[4];
-       } else {
-               parm_list_len = cmd[7];
-               cdb10 = 1;
-       }
-
-       if (parm_list_len != 0) {
-               /*
-                * According to SPC-4 r24, a paramter list length field of 0
-                * shall not be considered an error
-                */
-               return nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len,
-                                               page_format, save_pages, cdb10);
-       }
-
-       return 0;
-}
-
-static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                                       u8 *cmd)
-{
-       int res = 0;
-       u16 alloc_len;
-       u8 cdb10 = 0;
-
-       if (cmd[0] == MODE_SENSE) {
-               alloc_len = cmd[4];
-       } else {
-               alloc_len = get_unaligned_be16(&cmd[7]);
-               cdb10 = 1;
-       }
-
-       if ((cmd[2] & MODE_SENSE_PAGE_CONTROL_MASK) !=
-                       MODE_SENSE_PC_CURRENT_VALUES) {
-               res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               goto out;
-       }
-
-       switch (cmd[2] & MODE_SENSE_PAGE_CODE_MASK) {
-       case MODE_PAGE_CACHING:
-               res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
-                                               cdb10,
-                                               &nvme_trans_fill_caching_page,
-                                               MODE_PAGE_CACHING_LEN);
-               break;
-       case MODE_PAGE_CONTROL:
-               res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
-                                               cdb10,
-                                               &nvme_trans_fill_control_page,
-                                               MODE_PAGE_CONTROL_LEN);
-               break;
-       case MODE_PAGE_POWER_CONDITION:
-               res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
-                                               cdb10,
-                                               &nvme_trans_fill_pow_cnd_page,
-                                               MODE_PAGE_POW_CND_LEN);
-               break;
-       case MODE_PAGE_INFO_EXCEP:
-               res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
-                                               cdb10,
-                                               &nvme_trans_fill_inf_exc_page,
-                                               MODE_PAGE_INF_EXC_LEN);
-               break;
-       case MODE_PAGE_RETURN_ALL:
-               res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
-                                               cdb10,
-                                               &nvme_trans_fill_all_pages,
-                                               MODE_PAGE_ALL_LEN);
-               break;
-       default:
-               res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               break;
-       }
-
- out:
-       return res;
-}
-
-static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                                       u8 *cmd, u8 cdb16)
-{
-       int res;
-       int nvme_sc;
-       u32 alloc_len;
-       u32 resp_size;
-       u32 xfer_len;
-       struct nvme_id_ns *id_ns;
-       u8 *response;
-
-       if (cdb16) {
-               alloc_len = get_unaligned_be32(&cmd[10]);
-               resp_size = READ_CAP_16_RESP_SIZE;
-       } else {
-               alloc_len = READ_CAP_10_RESP_SIZE;
-               resp_size = READ_CAP_10_RESP_SIZE;
-       }
-
-       nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
-       res = nvme_trans_status_code(hdr, nvme_sc);
-       if (res)
-               return res;     
-
-       response = kzalloc(resp_size, GFP_KERNEL);
-       if (response == NULL) {
-               res = -ENOMEM;
-               goto out_free_id;
-       }
-       nvme_trans_fill_read_cap(response, id_ns, cdb16);
-
-       xfer_len = min(alloc_len, resp_size);
-       res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
-       kfree(response);
- out_free_id:
-       kfree(id_ns);
-       return res;
-}
-
-static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                                       u8 *cmd)
-{
-       int res;
-       int nvme_sc;
-       u32 alloc_len, xfer_len, resp_size;
-       u8 *response;
-       struct nvme_id_ctrl *id_ctrl;
-       u32 ll_length, lun_id;
-       u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
-       __be32 tmp_len;
-
-       switch (cmd[2]) {
-       default:
-               return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-       case ALL_LUNS_RETURNED:
-       case ALL_WELL_KNOWN_LUNS_RETURNED:
-       case RESTRICTED_LUNS_RETURNED:
-               nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
-               res = nvme_trans_status_code(hdr, nvme_sc);
-               if (res)
-                       return res;
-
-               ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE;
-               resp_size = ll_length + LUN_DATA_HEADER_SIZE;
-
-               alloc_len = get_unaligned_be32(&cmd[6]);
-               if (alloc_len < resp_size) {
-                       res = nvme_trans_completion(hdr,
-                                       SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-                       goto out_free_id;
-               }
-
-               response = kzalloc(resp_size, GFP_KERNEL);
-               if (response == NULL) {
-                       res = -ENOMEM;
-                       goto out_free_id;
-               }
-
-               /* The first LUN ID will always be 0 per the SAM spec */
-               for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) {
-                       /*
-                        * Set the LUN Id and then increment to the next LUN
-                        * location in the parameter data.
-                        */
-                       __be64 tmp_id = cpu_to_be64(lun_id);
-                       memcpy(&response[lun_id_offset], &tmp_id, sizeof(u64));
-                       lun_id_offset += LUN_ENTRY_SIZE;
-               }
-               tmp_len = cpu_to_be32(ll_length);
-               memcpy(response, &tmp_len, sizeof(u32));
-       }
-
-       xfer_len = min(alloc_len, resp_size);
-       res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
-       kfree(response);
- out_free_id:
-       kfree(id_ctrl);
-       return res;
-}
-
-static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                                       u8 *cmd)
-{
-       int res;
-       u8 alloc_len, xfer_len, resp_size;
-       u8 desc_format;
-       u8 *response;
-
-       desc_format = cmd[1] & 0x01;
-       alloc_len = cmd[4];
-
-       resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) :
-                                       (FIXED_FMT_SENSE_DATA_SIZE));
-       response = kzalloc(resp_size, GFP_KERNEL);
-       if (response == NULL) {
-               res = -ENOMEM;
-               goto out;
-       }
-
-       if (desc_format) {
-               /* Descriptor Format Sense Data */
-               response[0] = DESC_FORMAT_SENSE_DATA;
-               response[1] = NO_SENSE;
-               /* TODO How is LOW POWER CONDITION ON handled? (byte 2) */
-               response[2] = SCSI_ASC_NO_SENSE;
-               response[3] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               /* SDAT_OVFL = 0 | Additional Sense Length = 0 */
-       } else {
-               /* Fixed Format Sense Data */
-               response[0] = FIXED_SENSE_DATA;
-               /* Byte 1 = Obsolete */
-               response[2] = NO_SENSE; /* FM, EOM, ILI, SDAT_OVFL = 0 */
-               /* Bytes 3-6 - Information - set to zero */
-               response[7] = FIXED_SENSE_DATA_ADD_LENGTH;
-               /* Bytes 8-11 - Cmd Specific Information - set to zero */
-               response[12] = SCSI_ASC_NO_SENSE;
-               response[13] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
-               /* Byte 14 = Field Replaceable Unit Code = 0 */
-               /* Bytes 15-17 - SKSV=0; Sense Key Specific = 0 */
-       }
-
-       xfer_len = min(alloc_len, resp_size);
-       res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
-       kfree(response);
- out:
-       return res;
-}
-
-static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
-                                       struct sg_io_hdr *hdr)
-{
-       int nvme_sc;
-       struct nvme_command c;
-
-       memset(&c, 0, sizeof(c));
-       c.common.opcode = nvme_cmd_flush;
-       c.common.nsid = cpu_to_le32(ns->ns_id);
-
-       nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
-       return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                                       u8 *cmd)
-{
-       int res;
-       u8 parm_hdr_len = 0;
-       u8 nvme_pf_code = 0;
-       u8 format_prot_info, long_list, format_data;
-
-       format_prot_info = (cmd[1] & 0xc0) >> 6;
-       long_list = cmd[1] & 0x20;
-       format_data = cmd[1] & 0x10;
-
-       if (format_data != 0) {
-               if (format_prot_info != 0) {
-                       if (long_list == 0)
-                               parm_hdr_len = FORMAT_UNIT_SHORT_PARM_LIST_LEN;
-                       else
-                               parm_hdr_len = FORMAT_UNIT_LONG_PARM_LIST_LEN;
-               }
-       } else if (format_data == 0 && format_prot_info != 0) {
-               res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               goto out;
-       }
-
-       /* Get parm header from data-in/out buffer */
-       /*
-        * According to the translation spec, the only fields in the parameter
-        * list we are concerned with are in the header. So allocate only that.
-        */
-       if (parm_hdr_len > 0) {
-               res = nvme_trans_fmt_get_parm_header(hdr, parm_hdr_len,
-                                       format_prot_info, &nvme_pf_code);
-               if (res)
-                       goto out;
-       }
-
-       /* Attempt to activate any previously downloaded firmware image */
-       res = nvme_trans_send_activate_fw_cmd(ns, hdr, 0);
-
-       /* Determine Block size and count and send format command */
-       res = nvme_trans_fmt_set_blk_size_count(ns, hdr);
-       if (res)
-               goto out;
-
-       res = nvme_trans_fmt_send_cmd(ns, hdr, nvme_pf_code);
-
- out:
-       return res;
-}
-
-static int nvme_trans_test_unit_ready(struct nvme_ns *ns,
-                                       struct sg_io_hdr *hdr,
-                                       u8 *cmd)
-{
-       if (nvme_ctrl_ready(ns->ctrl))
-               return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                                           NOT_READY, SCSI_ASC_LUN_NOT_READY,
-                                           SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-       else
-               return nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0);
-}
-
-static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                                       u8 *cmd)
-{
-       int res = 0;
-       u32 buffer_offset, parm_list_length;
-       u8 buffer_id, mode;
-
-       parm_list_length = get_unaligned_be24(&cmd[6]);
-       if (parm_list_length % BYTES_TO_DWORDS != 0) {
-               /* NVMe expects Firmware file to be a whole number of DWORDS */
-               res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               goto out;
-       }
-       buffer_id = cmd[2];
-       if (buffer_id > NVME_MAX_FIRMWARE_SLOT) {
-               res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               goto out;
-       }
-       mode = cmd[1] & 0x1f;
-       buffer_offset = get_unaligned_be24(&cmd[3]);
-
-       switch (mode) {
-       case DOWNLOAD_SAVE_ACTIVATE:
-               res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
-                                               parm_list_length, buffer_offset,
-                                               buffer_id);
-               if (res)
-                       goto out;
-               res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
-               break;
-       case DOWNLOAD_SAVE_DEFER_ACTIVATE:
-               res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
-                                               parm_list_length, buffer_offset,
-                                               buffer_id);
-               break;
-       case ACTIVATE_DEFERRED_MICROCODE:
-               res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
-               break;
-       default:
-               res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                                       ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
-                                       SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               break;
-       }
-
- out:
-       return res;
-}
-
-struct scsi_unmap_blk_desc {
-       __be64  slba;
-       __be32  nlb;
-       u32     resv;
-};
-
-struct scsi_unmap_parm_list {
-       __be16  unmap_data_len;
-       __be16  unmap_blk_desc_data_len;
-       u32     resv;
-       struct scsi_unmap_blk_desc desc[0];
-};
-
-static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-                                                       u8 *cmd)
-{
-       struct scsi_unmap_parm_list *plist;
-       struct nvme_dsm_range *range;
-       struct nvme_command c;
-       int i, nvme_sc, res;
-       u16 ndesc, list_len;
-
-       list_len = get_unaligned_be16(&cmd[7]);
-       if (!list_len)
-               return -EINVAL;
-
-       plist = kmalloc(list_len, GFP_KERNEL);
-       if (!plist)
-               return -ENOMEM;
-
-       res = nvme_trans_copy_from_user(hdr, plist, list_len);
-       if (res)
-               goto out;
-
-       ndesc = be16_to_cpu(plist->unmap_blk_desc_data_len) >> 4;
-       if (!ndesc || ndesc > 256) {
-               res = -EINVAL;
-               goto out;
-       }
-
-       range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL);
-       if (!range) {
-               res = -ENOMEM;
-               goto out;
-       }
-
-       for (i = 0; i < ndesc; i++) {
-               range[i].nlb = cpu_to_le32(be32_to_cpu(plist->desc[i].nlb));
-               range[i].slba = cpu_to_le64(be64_to_cpu(plist->desc[i].slba));
-               range[i].cattr = 0;
-       }
-
-       memset(&c, 0, sizeof(c));
-       c.dsm.opcode = nvme_cmd_dsm;
-       c.dsm.nsid = cpu_to_le32(ns->ns_id);
-       c.dsm.nr = cpu_to_le32(ndesc - 1);
-       c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
-
-       nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, range,
-                       ndesc * sizeof(*range));
-       res = nvme_trans_status_code(hdr, nvme_sc);
-
-       kfree(range);
- out:
-       kfree(plist);
-       return res;
-}
-
-static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
-{
-       u8 cmd[16];
-       int retcode;
-       unsigned int opcode;
-
-       if (hdr->cmdp == NULL)
-               return -EMSGSIZE;
-       if (hdr->cmd_len > sizeof(cmd))
-               return -EINVAL;
-       if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len))
-               return -EFAULT;
-
-       /*
-        * Prime the hdr with good status for scsi commands that don't require
-        * an nvme command for translation.
-        */
-       retcode = nvme_trans_status_code(hdr, NVME_SC_SUCCESS);
-       if (retcode)
-               return retcode;
-
-       opcode = cmd[0];
-
-       switch (opcode) {
-       case READ_6:
-       case READ_10:
-       case READ_12:
-       case READ_16:
-               retcode = nvme_trans_io(ns, hdr, 0, cmd);
-               break;
-       case WRITE_6:
-       case WRITE_10:
-       case WRITE_12:
-       case WRITE_16:
-               retcode = nvme_trans_io(ns, hdr, 1, cmd);
-               break;
-       case INQUIRY:
-               retcode = nvme_trans_inquiry(ns, hdr, cmd);
-               break;
-       case LOG_SENSE:
-               retcode = nvme_trans_log_sense(ns, hdr, cmd);
-               break;
-       case MODE_SELECT:
-       case MODE_SELECT_10:
-               retcode = nvme_trans_mode_select(ns, hdr, cmd);
-               break;
-       case MODE_SENSE:
-       case MODE_SENSE_10:
-               retcode = nvme_trans_mode_sense(ns, hdr, cmd);
-               break;
-       case READ_CAPACITY:
-               retcode = nvme_trans_read_capacity(ns, hdr, cmd, 0);
-               break;
-       case SERVICE_ACTION_IN_16:
-               switch (cmd[1]) {
-               case SAI_READ_CAPACITY_16:
-                       retcode = nvme_trans_read_capacity(ns, hdr, cmd, 1);
-                       break;
-               default:
-                       goto out;
-               }
-               break;
-       case REPORT_LUNS:
-               retcode = nvme_trans_report_luns(ns, hdr, cmd);
-               break;
-       case REQUEST_SENSE:
-               retcode = nvme_trans_request_sense(ns, hdr, cmd);
-               break;
-       case SYNCHRONIZE_CACHE:
-               retcode = nvme_trans_synchronize_cache(ns, hdr);
-               break;
-       case FORMAT_UNIT:
-               retcode = nvme_trans_format_unit(ns, hdr, cmd);
-               break;
-       case TEST_UNIT_READY:
-               retcode = nvme_trans_test_unit_ready(ns, hdr, cmd);
-               break;
-       case WRITE_BUFFER:
-               retcode = nvme_trans_write_buffer(ns, hdr, cmd);
-               break;
-       case UNMAP:
-               retcode = nvme_trans_unmap(ns, hdr, cmd);
-               break;
-       default:
- out:
-               retcode = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-                               ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND,
-                               SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-               break;
-       }
-       return retcode;
-}
-
-int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr)
-{
-       struct sg_io_hdr hdr;
-       int retcode;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EACCES;
-       if (copy_from_user(&hdr, u_hdr, sizeof(hdr)))
-               return -EFAULT;
-       if (hdr.interface_id != 'S')
-               return -EINVAL;
-
-       /*
-        * A positive return code means a NVMe status, which has been
-        * translated to sense data.
-        */
-       retcode = nvme_scsi_translate(ns, &hdr);
-       if (retcode < 0)
-               return retcode;
-       if (copy_to_user(u_hdr, &hdr, sizeof(sg_io_hdr_t)) > 0)
-               return -EFAULT;
-       return 0;
-}
-
-int nvme_sg_get_version_num(int __user *ip)
-{
-       return put_user(sg_version_num, ip);
-}
index ff1f970..35f930d 100644 (file)
@@ -336,7 +336,7 @@ out:
 
 static void nvmet_execute_identify_nslist(struct nvmet_req *req)
 {
-       static const int buf_size = 4096;
+       static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
        struct nvmet_ctrl *ctrl = req->sq->ctrl;
        struct nvmet_ns *ns;
        u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid);
@@ -367,6 +367,64 @@ out:
        nvmet_req_complete(req, status);
 }
 
+static u16 nvmet_copy_ns_identifier(struct nvmet_req *req, u8 type, u8 len,
+                                   void *id, off_t *off)
+{
+       struct nvme_ns_id_desc desc = {
+               .nidt = type,
+               .nidl = len,
+       };
+       u16 status;
+
+       status = nvmet_copy_to_sgl(req, *off, &desc, sizeof(desc));
+       if (status)
+               return status;
+       *off += sizeof(desc);
+
+       status = nvmet_copy_to_sgl(req, *off, id, len);
+       if (status)
+               return status;
+       *off += len;
+
+       return 0;
+}
+
+static void nvmet_execute_identify_desclist(struct nvmet_req *req)
+{
+       struct nvmet_ns *ns;
+       u16 status = 0;
+       off_t off = 0;
+
+       ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
+       if (!ns) {
+               status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+               goto out;
+       }
+
+       if (memchr_inv(&ns->uuid, 0, sizeof(ns->uuid))) {
+               status = nvmet_copy_ns_identifier(req, NVME_NIDT_UUID,
+                                                 NVME_NIDT_UUID_LEN,
+                                                 &ns->uuid, &off);
+               if (status)
+                       goto out_put_ns;
+       }
+       if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid))) {
+               status = nvmet_copy_ns_identifier(req, NVME_NIDT_NGUID,
+                                                 NVME_NIDT_NGUID_LEN,
+                                                 &ns->nguid, &off);
+               if (status)
+                       goto out_put_ns;
+       }
+
+       if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off,
+                       off) != NVME_IDENTIFY_DATA_SIZE - off)
+               status = NVME_SC_INTERNAL | NVME_SC_DNR;
+out_put_ns:
+       nvmet_put_namespace(ns);
+out:
+       nvmet_req_complete(req, status);
+}
+
 /*
  * A "mimimum viable" abort implementation: the command is mandatory in the
  * spec, but we are not required to do any useful work.  We couldn't really
@@ -504,7 +562,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
                }
                break;
        case nvme_admin_identify:
-               req->data_len = 4096;
+               req->data_len = NVME_IDENTIFY_DATA_SIZE;
                switch (cmd->identify.cns) {
                case NVME_ID_CNS_NS:
                        req->execute = nvmet_execute_identify_ns;
@@ -515,6 +573,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
                case NVME_ID_CNS_NS_ACTIVE_LIST:
                        req->execute = nvmet_execute_identify_nslist;
                        return 0;
+               case NVME_ID_CNS_NS_DESC_LIST:
+                       req->execute = nvmet_execute_identify_desclist;
+                       return 0;
                }
                break;
        case nvme_admin_abort_cmd:
index be8c800..a358ecd 100644 (file)
@@ -305,11 +305,41 @@ out_unlock:
 
 CONFIGFS_ATTR(nvmet_ns_, device_path);
 
+static ssize_t nvmet_ns_device_uuid_show(struct config_item *item, char *page)
+{
+       return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->uuid);
+}
+
+static ssize_t nvmet_ns_device_uuid_store(struct config_item *item,
+                                         const char *page, size_t count)
+{
+       struct nvmet_ns *ns = to_nvmet_ns(item);
+       struct nvmet_subsys *subsys = ns->subsys;
+       int ret = 0;
+
+
+       mutex_lock(&subsys->lock);
+       if (ns->enabled) {
+               ret = -EBUSY;
+               goto out_unlock;
+       }
+
+
+       if (uuid_parse(page, &ns->uuid))
+               ret = -EINVAL;
+
+out_unlock:
+       mutex_unlock(&subsys->lock);
+       return ret ? ret : count;
+}
+
 static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page)
 {
        return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid);
 }
 
+CONFIGFS_ATTR(nvmet_ns_, device_uuid);
+
 static ssize_t nvmet_ns_device_nguid_store(struct config_item *item,
                const char *page, size_t count)
 {
@@ -379,6 +409,7 @@ CONFIGFS_ATTR(nvmet_ns_, enable);
 static struct configfs_attribute *nvmet_ns_attrs[] = {
        &nvmet_ns_attr_device_path,
        &nvmet_ns_attr_device_nguid,
+       &nvmet_ns_attr_device_uuid,
        &nvmet_ns_attr_enable,
        NULL,
 };
@@ -619,8 +650,45 @@ out_unlock:
 
 CONFIGFS_ATTR(nvmet_subsys_, attr_allow_any_host);
 
+static ssize_t nvmet_subsys_version_show(struct config_item *item,
+                                             char *page)
+{
+       struct nvmet_subsys *subsys = to_subsys(item);
+
+       if (NVME_TERTIARY(subsys->ver))
+               return snprintf(page, PAGE_SIZE, "%d.%d.%d\n",
+                               (int)NVME_MAJOR(subsys->ver),
+                               (int)NVME_MINOR(subsys->ver),
+                               (int)NVME_TERTIARY(subsys->ver));
+       else
+               return snprintf(page, PAGE_SIZE, "%d.%d\n",
+                               (int)NVME_MAJOR(subsys->ver),
+                               (int)NVME_MINOR(subsys->ver));
+}
+
+static ssize_t nvmet_subsys_version_store(struct config_item *item,
+                                              const char *page, size_t count)
+{
+       struct nvmet_subsys *subsys = to_subsys(item);
+       int major, minor, tertiary = 0;
+       int ret;
+
+
+       ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary);
+       if (ret != 2 && ret != 3)
+               return -EINVAL;
+
+       down_write(&nvmet_config_sem);
+       subsys->ver = NVME_VS(major, minor, tertiary);
+       up_write(&nvmet_config_sem);
+
+       return count;
+}
+CONFIGFS_ATTR(nvmet_subsys_, version);
+
 static struct configfs_attribute *nvmet_subsys_attrs[] = {
        &nvmet_subsys_attr_attr_allow_any_host,
+       &nvmet_subsys_attr_version,
        NULL,
 };
 
index eb9399a..b5b4ac1 100644 (file)
@@ -380,6 +380,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
 
        ns->nsid = nsid;
        ns->subsys = subsys;
+       uuid_gen(&ns->uuid);
 
        return ns;
 }
@@ -926,7 +927,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
        if (!subsys)
                return NULL;
 
-       subsys->ver = NVME_VS(1, 2, 1); /* NVMe 1.2.1 */
+       subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
 
        switch (type) {
        case NVME_NQN_NVME:
index 1aaf597..8f3b57b 100644 (file)
@@ -53,7 +53,7 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
        e->portid = port->disc_addr.portid;
        /* we support only dynamic controllers */
        e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC);
-       e->asqsz = cpu_to_le16(NVMF_AQ_DEPTH);
+       e->asqsz = cpu_to_le16(NVME_AQ_DEPTH);
        e->subtype = type;
        memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE);
        memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
@@ -185,7 +185,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
                return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
                }
        case nvme_admin_identify:
-               req->data_len = 4096;
+               req->data_len = NVME_IDENTIFY_DATA_SIZE;
                switch (cmd->identify.cns) {
                case NVME_ID_CNS_CTRL:
                        req->execute =
index 2006fae..7692a96 100644 (file)
@@ -2096,20 +2096,22 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
        /* clear any response payload */
        memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf));
 
+       fod->data_sg = NULL;
+       fod->data_sg_cnt = 0;
+
        ret = nvmet_req_init(&fod->req,
                                &fod->queue->nvme_cq,
                                &fod->queue->nvme_sq,
                                &nvmet_fc_tgt_fcp_ops);
-       if (!ret) {     /* bad SQE content or invalid ctrl state */
-               nvmet_fc_abort_op(tgtport, fod);
+       if (!ret) {
+               /* bad SQE content or invalid ctrl state */
+               /* nvmet layer has already called op done to send rsp. */
                return;
        }
 
        /* keep a running counter of tail position */
        atomic_inc(&fod->queue->sqtail);
 
-       fod->data_sg = NULL;
-       fod->data_sg_cnt = 0;
        if (fod->total_length) {
                ret = nvmet_fc_alloc_tgt_pgs(fod);
                if (ret) {
index 294a661..1bb9d5b 100644 (file)
@@ -569,7 +569,6 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
                        struct nvmefc_tgt_fcp_req *tgt_fcpreq)
 {
        struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
-       int active;
 
        /*
         * mark aborted only in case there were 2 threads in transport
@@ -577,7 +576,6 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
         * after the abort request
         */
        spin_lock(&tfcp_req->reqlock);
-       active = tfcp_req->active;
        tfcp_req->aborted = true;
        spin_unlock(&tfcp_req->reqlock);
 
index c77940d..4012879 100644 (file)
@@ -21,7 +21,7 @@ static void nvmet_bio_done(struct bio *bio)
        struct nvmet_req *req = bio->bi_private;
 
        nvmet_req_complete(req,
-               bio->bi_error ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+               bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
 
        if (bio != &req->inline_bio)
                bio_put(bio);
@@ -145,7 +145,7 @@ static void nvmet_execute_discard(struct nvmet_req *req)
                bio->bi_private = req;
                bio->bi_end_io = nvmet_bio_done;
                if (status) {
-                       bio->bi_error = -EIO;
+                       bio->bi_status = BLK_STS_IOERR;
                        bio_endio(bio);
                } else {
                        submit_bio(bio);
index e503cff..5f55c68 100644 (file)
@@ -21,8 +21,6 @@
 #include "../host/nvme.h"
 #include "../host/fabrics.h"
 
-#define NVME_LOOP_AQ_DEPTH             256
-
 #define NVME_LOOP_MAX_SEGMENTS         256
 
 /*
@@ -31,7 +29,7 @@
  */
 #define NVME_LOOP_NR_AEN_COMMANDS      1
 #define NVME_LOOP_AQ_BLKMQ_DEPTH       \
-       (NVME_LOOP_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
+       (NVME_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
 
 struct nvme_loop_iod {
        struct nvme_request     nvme_req;
@@ -45,7 +43,6 @@ struct nvme_loop_iod {
 };
 
 struct nvme_loop_ctrl {
-       spinlock_t              lock;
        struct nvme_loop_queue  *queues;
        u32                     queue_count;
 
@@ -59,7 +56,6 @@ struct nvme_loop_ctrl {
 
        struct nvmet_ctrl       *target_ctrl;
        struct work_struct      delete_work;
-       struct work_struct      reset_work;
 };
 
 static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
@@ -151,7 +147,7 @@ nvme_loop_timeout(struct request *rq, bool reserved)
        struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq);
 
        /* queue error recovery */
-       schedule_work(&iod->queue->ctrl->reset_work);
+       nvme_reset_ctrl(&iod->queue->ctrl->ctrl);
 
        /* fail with DNR on admin cmd timeout */
        nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
@@ -159,17 +155,17 @@ nvme_loop_timeout(struct request *rq, bool reserved)
        return BLK_EH_HANDLED;
 }
 
-static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
                const struct blk_mq_queue_data *bd)
 {
        struct nvme_ns *ns = hctx->queue->queuedata;
        struct nvme_loop_queue *queue = hctx->driver_data;
        struct request *req = bd->rq;
        struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
-       int ret;
+       blk_status_t ret;
 
        ret = nvme_setup_cmd(ns, req, &iod->cmd);
-       if (ret != BLK_MQ_RQ_QUEUE_OK)
+       if (ret)
                return ret;
 
        iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
@@ -179,16 +175,15 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
                nvme_cleanup_cmd(req);
                blk_mq_start_request(req);
                nvme_loop_queue_response(&iod->req);
-               return BLK_MQ_RQ_QUEUE_OK;
+               return BLK_STS_OK;
        }
 
        if (blk_rq_bytes(req)) {
                iod->sg_table.sgl = iod->first_sgl;
-               ret = sg_alloc_table_chained(&iod->sg_table,
+               if (sg_alloc_table_chained(&iod->sg_table,
                                blk_rq_nr_phys_segments(req),
-                               iod->sg_table.sgl);
-               if (ret)
-                       return BLK_MQ_RQ_QUEUE_BUSY;
+                               iod->sg_table.sgl))
+                       return BLK_STS_RESOURCE;
 
                iod->req.sg = iod->sg_table.sgl;
                iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
@@ -197,7 +192,7 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
        blk_mq_start_request(req);
 
        schedule_work(&iod->work);
-       return BLK_MQ_RQ_QUEUE_OK;
+       return BLK_STS_OK;
 }
 
 static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
@@ -234,15 +229,10 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set,
                struct request *req, unsigned int hctx_idx,
                unsigned int numa_node)
 {
-       return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req),
-                       hctx_idx + 1);
-}
+       struct nvme_loop_ctrl *ctrl = set->driver_data;
 
-static int nvme_loop_init_admin_request(struct blk_mq_tag_set *set,
-               struct request *req, unsigned int hctx_idx,
-               unsigned int numa_node)
-{
-       return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req), 0);
+       return nvme_loop_init_iod(ctrl, blk_mq_rq_to_pdu(req),
+                       (set == &ctrl->tag_set) ? hctx_idx + 1 : 0);
 }
 
 static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
@@ -280,7 +270,7 @@ static const struct blk_mq_ops nvme_loop_mq_ops = {
 static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
        .queue_rq       = nvme_loop_queue_rq,
        .complete       = nvme_loop_complete_rq,
-       .init_request   = nvme_loop_init_admin_request,
+       .init_request   = nvme_loop_init_request,
        .init_hctx      = nvme_loop_init_admin_hctx,
        .timeout        = nvme_loop_timeout,
 };
@@ -467,7 +457,7 @@ static int __nvme_loop_del_ctrl(struct nvme_loop_ctrl *ctrl)
        if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
                return -EBUSY;
 
-       if (!schedule_work(&ctrl->delete_work))
+       if (!queue_work(nvme_wq, &ctrl->delete_work))
                return -EBUSY;
 
        return 0;
@@ -501,8 +491,8 @@ static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl)
 
 static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 {
-       struct nvme_loop_ctrl *ctrl = container_of(work,
-                                       struct nvme_loop_ctrl, reset_work);
+       struct nvme_loop_ctrl *ctrl =
+               container_of(work, struct nvme_loop_ctrl, ctrl.reset_work);
        bool changed;
        int ret;
 
@@ -540,21 +530,6 @@ out_disable:
        nvme_put_ctrl(&ctrl->ctrl);
 }
 
-static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl)
-{
-       struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
-
-       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
-               return -EBUSY;
-
-       if (!schedule_work(&ctrl->reset_work))
-               return -EBUSY;
-
-       flush_work(&ctrl->reset_work);
-
-       return 0;
-}
-
 static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
        .name                   = "loop",
        .module                 = THIS_MODULE,
@@ -562,11 +537,9 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
        .reg_read32             = nvmf_reg_read32,
        .reg_read64             = nvmf_reg_read64,
        .reg_write32            = nvmf_reg_write32,
-       .reset_ctrl             = nvme_loop_reset_ctrl,
        .free_ctrl              = nvme_loop_free_ctrl,
        .submit_async_event     = nvme_loop_submit_async_event,
        .delete_ctrl            = nvme_loop_del_ctrl,
-       .get_subsysnqn          = nvmf_get_subsysnqn,
 };
 
 static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
@@ -629,15 +602,13 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
        INIT_LIST_HEAD(&ctrl->list);
 
        INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work);
-       INIT_WORK(&ctrl->reset_work, nvme_loop_reset_ctrl_work);
+       INIT_WORK(&ctrl->ctrl.reset_work, nvme_loop_reset_ctrl_work);
 
        ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops,
                                0 /* no quirks, we're perfect! */);
        if (ret)
                goto out_put_ctrl;
 
-       spin_lock_init(&ctrl->lock);
-
        ret = -ENOMEM;
 
        ctrl->ctrl.sqsize = opts->queue_size - 1;
@@ -766,7 +737,7 @@ static void __exit nvme_loop_cleanup_module(void)
                __nvme_loop_del_ctrl(ctrl);
        mutex_unlock(&nvme_loop_ctrl_mutex);
 
-       flush_scheduled_work();
+       flush_workqueue(nvme_wq);
 }
 
 module_init(nvme_loop_init_module);
index cfc5c7f..747bbdb 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/percpu-refcount.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
+#include <linux/uuid.h>
 #include <linux/nvme.h>
 #include <linux/configfs.h>
 #include <linux/rcupdate.h>
@@ -46,6 +47,7 @@ struct nvmet_ns {
        u32                     blksize_shift;
        loff_t                  size;
        u8                      nguid[16];
+       uuid_t                  uuid;
 
        bool                    enabled;
        struct nvmet_subsys     *subsys;
index 9e45cde..56a4cba 100644 (file)
@@ -1027,7 +1027,7 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
        queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1;
        queue->send_queue_size = le16_to_cpu(req->hrqsize);
 
-       if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH)
+       if (!queue->host_qid && queue->recv_queue_size > NVME_AQ_DEPTH)
                return NVME_RDMA_CM_INVALID_HSQSIZE;
 
        /* XXX: Should we enforce some kind of max for IO queues? */
@@ -1307,53 +1307,44 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
 
 /**
  * nvme_rdma_device_removal() - Handle RDMA device removal
+ * @cm_id:     rdma_cm id, used for nvmet port
  * @queue:      nvmet rdma queue (cm id qp_context)
- * @addr:      nvmet address (cm_id context)
  *
  * DEVICE_REMOVAL event notifies us that the RDMA device is about
- * to unplug so we should take care of destroying our RDMA resources.
- * This event will be generated for each allocated cm_id.
+ * to unplug. Note that this event can be generated on a normal
+ * queue cm_id and/or a device bound listener cm_id (where in this
+ * case queue will be null).
  *
- * Note that this event can be generated on a normal queue cm_id
- * and/or a device bound listener cm_id (where in this case
- * queue will be null).
- *
- * we claim ownership on destroying the cm_id. For queues we move
- * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
+ * We registered an ib_client to handle device removal for queues,
+ * so we only need to handle the listening port cm_ids. In this case
  * we nullify the priv to prevent double cm_id destruction and destroying
  * the cm_id implicitely by returning a non-zero rc to the callout.
  */
 static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
                struct nvmet_rdma_queue *queue)
 {
-       unsigned long flags;
-
-       if (!queue) {
-               struct nvmet_port *port = cm_id->context;
+       struct nvmet_port *port;
 
+       if (queue) {
                /*
-                * This is a listener cm_id. Make sure that
-                * future remove_port won't invoke a double
-                * cm_id destroy. use atomic xchg to make sure
-                * we don't compete with remove_port.
-                */
-               if (xchg(&port->priv, NULL) != cm_id)
-                       return 0;
-       } else {
-               /*
-                * This is a queue cm_id. Make sure that
-                * release queue will not destroy the cm_id
-                * and schedule all ctrl queues removal (only
-                * if the queue is not disconnecting already).
+                * This is a queue cm_id. we have registered
+                * an ib_client to handle queues removal
+                * so don't interfear and just return.
                 */
-               spin_lock_irqsave(&queue->state_lock, flags);
-               if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
-                       queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
-               spin_unlock_irqrestore(&queue->state_lock, flags);
-               nvmet_rdma_queue_disconnect(queue);
-               flush_scheduled_work();
+               return 0;
        }
 
+       port = cm_id->context;
+
+       /*
+        * This is a listener cm_id. Make sure that
+        * future remove_port won't invoke a double
+        * cm_id destroy. use atomic xchg to make sure
+        * we don't compete with remove_port.
+        */
+       if (xchg(&port->priv, NULL) != cm_id)
+               return 0;
+
        /*
         * We need to return 1 so that the core will destroy
         * it's own ID.  What a great API design..
@@ -1519,9 +1510,51 @@ static struct nvmet_fabrics_ops nvmet_rdma_ops = {
        .delete_ctrl            = nvmet_rdma_delete_ctrl,
 };
 
+static void nvmet_rdma_add_one(struct ib_device *ib_device)
+{
+}
+
+static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
+{
+       struct nvmet_rdma_queue *queue;
+
+       /* Device is being removed, delete all queues using this device */
+       mutex_lock(&nvmet_rdma_queue_mutex);
+       list_for_each_entry(queue, &nvmet_rdma_queue_list, queue_list) {
+               if (queue->dev->device != ib_device)
+                       continue;
+
+               pr_info("Removing queue %d\n", queue->idx);
+               __nvmet_rdma_queue_disconnect(queue);
+       }
+       mutex_unlock(&nvmet_rdma_queue_mutex);
+
+       flush_scheduled_work();
+}
+
+static struct ib_client nvmet_rdma_ib_client = {
+       .name   = "nvmet_rdma",
+       .add = nvmet_rdma_add_one,
+       .remove = nvmet_rdma_remove_one
+};
+
 static int __init nvmet_rdma_init(void)
 {
-       return nvmet_register_transport(&nvmet_rdma_ops);
+       int ret;
+
+       ret = ib_register_client(&nvmet_rdma_ib_client);
+       if (ret)
+               return ret;
+
+       ret = nvmet_register_transport(&nvmet_rdma_ops);
+       if (ret)
+               goto err_ib_client;
+
+       return 0;
+
+err_ib_client:
+       ib_unregister_client(&nvmet_rdma_ib_client);
+       return ret;
 }
 
 static void __exit nvmet_rdma_exit(void)
@@ -1544,6 +1577,7 @@ static void __exit nvmet_rdma_exit(void)
        mutex_unlock(&nvmet_rdma_queue_mutex);
 
        flush_scheduled_work();
+       ib_unregister_client(&nvmet_rdma_ib_client);
        ida_destroy(&nvmet_rdma_queue_ida);
 }
 
index 74cf5ff..c80e37a 100644 (file)
@@ -896,7 +896,7 @@ int pci_read_config_byte(const struct pci_dev *dev, int where, u8 *val)
 {
        if (pci_dev_is_disconnected(dev)) {
                *val = ~0;
-               return -ENODEV;
+               return PCIBIOS_DEVICE_NOT_FOUND;
        }
        return pci_bus_read_config_byte(dev->bus, dev->devfn, where, val);
 }
@@ -906,7 +906,7 @@ int pci_read_config_word(const struct pci_dev *dev, int where, u16 *val)
 {
        if (pci_dev_is_disconnected(dev)) {
                *val = ~0;
-               return -ENODEV;
+               return PCIBIOS_DEVICE_NOT_FOUND;
        }
        return pci_bus_read_config_word(dev->bus, dev->devfn, where, val);
 }
@@ -917,7 +917,7 @@ int pci_read_config_dword(const struct pci_dev *dev, int where,
 {
        if (pci_dev_is_disconnected(dev)) {
                *val = ~0;
-               return -ENODEV;
+               return PCIBIOS_DEVICE_NOT_FOUND;
        }
        return pci_bus_read_config_dword(dev->bus, dev->devfn, where, val);
 }
@@ -926,7 +926,7 @@ EXPORT_SYMBOL(pci_read_config_dword);
 int pci_write_config_byte(const struct pci_dev *dev, int where, u8 val)
 {
        if (pci_dev_is_disconnected(dev))
-               return -ENODEV;
+               return PCIBIOS_DEVICE_NOT_FOUND;
        return pci_bus_write_config_byte(dev->bus, dev->devfn, where, val);
 }
 EXPORT_SYMBOL(pci_write_config_byte);
@@ -934,7 +934,7 @@ EXPORT_SYMBOL(pci_write_config_byte);
 int pci_write_config_word(const struct pci_dev *dev, int where, u16 val)
 {
        if (pci_dev_is_disconnected(dev))
-               return -ENODEV;
+               return PCIBIOS_DEVICE_NOT_FOUND;
        return pci_bus_write_config_word(dev->bus, dev->devfn, where, val);
 }
 EXPORT_SYMBOL(pci_write_config_word);
@@ -943,7 +943,7 @@ int pci_write_config_dword(const struct pci_dev *dev, int where,
                                         u32 val)
 {
        if (pci_dev_is_disconnected(dev))
-               return -ENODEV;
+               return PCIBIOS_DEVICE_NOT_FOUND;
        return pci_bus_write_config_dword(dev->bus, dev->devfn, where, val);
 }
 EXPORT_SYMBOL(pci_write_config_dword);
index 175edad..2942066 100644 (file)
@@ -5,6 +5,7 @@
 config PCI_EPF_TEST
        tristate "PCI Endpoint Test driver"
        depends on PCI_ENDPOINT
+       select CRC32
        help
           Enable this configuration option to enable the test driver
           for PCI Endpoint.
index 0018603..47070cf 100644 (file)
 #include "pci.h"
 
 /*
- * The UUID is defined in the PCI Firmware Specification available here:
+ * The GUID is defined in the PCI Firmware Specification available here:
  * https://www.pcisig.com/members/downloads/pcifw_r3_1_13Dec10.pdf
  */
-const u8 pci_acpi_dsm_uuid[] = {
-       0xd0, 0x37, 0xc9, 0xe5, 0x53, 0x35, 0x7a, 0x4d,
-       0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d
-};
+const guid_t pci_acpi_dsm_guid =
+       GUID_INIT(0xe5c937d0, 0x3553, 0x4d7a,
+                 0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d);
 
 #if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64)
 static int acpi_get_rc_addr(struct acpi_device *adev, struct resource *res)
@@ -680,7 +679,7 @@ void acpi_pci_add_bus(struct pci_bus *bus)
        if (!pci_is_root_bus(bus))
                return;
 
-       obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), pci_acpi_dsm_uuid, 3,
+       obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), &pci_acpi_dsm_guid, 3,
                                RESET_DELAY_DSM, NULL);
        if (!obj)
                return;
@@ -745,7 +744,7 @@ static void pci_acpi_optimize_delay(struct pci_dev *pdev,
        if (bridge->ignore_reset_delay)
                pdev->d3cold_delay = 0;
 
-       obj = acpi_evaluate_dsm(handle, pci_acpi_dsm_uuid, 3,
+       obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 3,
                                FUNCTION_DELAY_DSM, NULL);
        if (!obj)
                return;
index 5135737..2d8db3e 100644 (file)
@@ -172,7 +172,7 @@ static int dsm_get_label(struct device *dev, char *buf,
        if (!handle)
                return -1;
 
-       obj = acpi_evaluate_dsm(handle, pci_acpi_dsm_uuid, 0x2,
+       obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 0x2,
                                DEVICE_LABEL_DSM, NULL);
        if (!obj)
                return -1;
@@ -212,7 +212,7 @@ static bool device_has_dsm(struct device *dev)
        if (!handle)
                return false;
 
-       return !!acpi_check_dsm(handle, pci_acpi_dsm_uuid, 0x2,
+       return !!acpi_check_dsm(handle, &pci_acpi_dsm_guid, 0x2,
                                1 << DEVICE_LABEL_DSM);
 }
 
index 1482d13..e432ec8 100644 (file)
@@ -495,64 +495,54 @@ static struct irq_chip amd_gpio_irqchip = {
        .flags        = IRQCHIP_SKIP_SET_WAKE,
 };
 
-static void amd_gpio_irq_handler(struct irq_desc *desc)
+#define PIN_IRQ_PENDING        (BIT(INTERRUPT_STS_OFF) | BIT(WAKE_STS_OFF))
+
+static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id)
 {
-       u32 i;
-       u32 off;
-       u32 reg;
-       u32 pin_reg;
-       u64 reg64;
-       int handled = 0;
-       unsigned int irq;
+       struct amd_gpio *gpio_dev = dev_id;
+       struct gpio_chip *gc = &gpio_dev->gc;
+       irqreturn_t ret = IRQ_NONE;
+       unsigned int i, irqnr;
        unsigned long flags;
-       struct irq_chip *chip = irq_desc_get_chip(desc);
-       struct gpio_chip *gc = irq_desc_get_handler_data(desc);
-       struct amd_gpio *gpio_dev = gpiochip_get_data(gc);
+       u32 *regs, regval;
+       u64 status, mask;
 
-       chained_irq_enter(chip, desc);
-       /*enable GPIO interrupt again*/
+       /* Read the wake status */
        raw_spin_lock_irqsave(&gpio_dev->lock, flags);
-       reg = readl(gpio_dev->base + WAKE_INT_STATUS_REG1);
-       reg64 = reg;
-       reg64 = reg64 << 32;
-
-       reg = readl(gpio_dev->base + WAKE_INT_STATUS_REG0);
-       reg64 |= reg;
+       status = readl(gpio_dev->base + WAKE_INT_STATUS_REG1);
+       status <<= 32;
+       status |= readl(gpio_dev->base + WAKE_INT_STATUS_REG0);
        raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
 
-       /*
-        * first 46 bits indicates interrupt status.
-        * one bit represents four interrupt sources.
-       */
-       for (off = 0; off < 46 ; off++) {
-               if (reg64 & BIT(off)) {
-                       for (i = 0; i < 4; i++) {
-                               pin_reg = readl(gpio_dev->base +
-                                               (off * 4 + i) * 4);
-                               if ((pin_reg & BIT(INTERRUPT_STS_OFF)) ||
-                                       (pin_reg & BIT(WAKE_STS_OFF))) {
-                                       irq = irq_find_mapping(gc->irqdomain,
-                                                               off * 4 + i);
-                                       generic_handle_irq(irq);
-                                       writel(pin_reg,
-                                               gpio_dev->base
-                                               + (off * 4 + i) * 4);
-                                       handled++;
-                               }
-                       }
+       /* Bit 0-45 contain the relevant status bits */
+       status &= (1ULL << 46) - 1;
+       regs = gpio_dev->base;
+       for (mask = 1, irqnr = 0; status; mask <<= 1, regs += 4, irqnr += 4) {
+               if (!(status & mask))
+                       continue;
+               status &= ~mask;
+
+               /* Each status bit covers four pins */
+               for (i = 0; i < 4; i++) {
+                       regval = readl(regs + i);
+                       if (!(regval & PIN_IRQ_PENDING))
+                               continue;
+                       irq = irq_find_mapping(gc->irqdomain, irqnr + i);
+                       generic_handle_irq(irq);
+                       /* Clear interrupt */
+                       writel(regval, regs + i);
+                       ret = IRQ_HANDLED;
                }
        }
 
-       if (handled == 0)
-               handle_bad_irq(desc);
-
+       /* Signal EOI to the GPIO unit */
        raw_spin_lock_irqsave(&gpio_dev->lock, flags);
-       reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG);
-       reg |= EOI_MASK;
-       writel(reg, gpio_dev->base + WAKE_INT_MASTER_REG);
+       regval = readl(gpio_dev->base + WAKE_INT_MASTER_REG);
+       regval |= EOI_MASK;
+       writel(regval, gpio_dev->base + WAKE_INT_MASTER_REG);
        raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
 
-       chained_irq_exit(chip, desc);
+       return ret;
 }
 
 static int amd_get_groups_count(struct pinctrl_dev *pctldev)
@@ -821,10 +811,11 @@ static int amd_gpio_probe(struct platform_device *pdev)
                goto out2;
        }
 
-       gpiochip_set_chained_irqchip(&gpio_dev->gc,
-                                &amd_gpio_irqchip,
-                                irq_base,
-                                amd_gpio_irq_handler);
+       ret = devm_request_irq(&pdev->dev, irq_base, amd_gpio_irq_handler, 0,
+                              KBUILD_MODNAME, gpio_dev);
+       if (ret)
+               goto out2;
+
        platform_set_drvdata(pdev, gpio_dev);
 
        dev_dbg(&pdev->dev, "amd gpio driver loaded\n");
index f141aa0..9dd981d 100644 (file)
@@ -143,9 +143,6 @@ struct rockchip_drv {
  * @gpio_chip: gpiolib chip
  * @grange: gpio range
  * @slock: spinlock for the gpio bank
- * @irq_lock: bus lock for irq chip
- * @new_irqs: newly configured irqs which must be muxed as GPIOs in
- *     irq_bus_sync_unlock()
  */
 struct rockchip_pin_bank {
        void __iomem                    *reg_base;
@@ -168,8 +165,6 @@ struct rockchip_pin_bank {
        struct pinctrl_gpio_range       grange;
        raw_spinlock_t                  slock;
        u32                             toggle_edge_mode;
-       struct mutex                    irq_lock;
-       u32                             new_irqs;
 };
 
 #define PIN_BANK(id, pins, label)                      \
@@ -2134,12 +2129,11 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type)
        int ret;
 
        /* make sure the pin is configured as gpio input */
-       ret = rockchip_verify_mux(bank, d->hwirq, RK_FUNC_GPIO);
+       ret = rockchip_set_mux(bank, d->hwirq, RK_FUNC_GPIO);
        if (ret < 0)
                return ret;
 
-       bank->new_irqs |= mask;
-
+       clk_enable(bank->clk);
        raw_spin_lock_irqsave(&bank->slock, flags);
 
        data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR);
@@ -2197,6 +2191,7 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type)
        default:
                irq_gc_unlock(gc);
                raw_spin_unlock_irqrestore(&bank->slock, flags);
+               clk_disable(bank->clk);
                return -EINVAL;
        }
 
@@ -2205,6 +2200,7 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type)
 
        irq_gc_unlock(gc);
        raw_spin_unlock_irqrestore(&bank->slock, flags);
+       clk_disable(bank->clk);
 
        return 0;
 }
@@ -2248,34 +2244,6 @@ static void rockchip_irq_disable(struct irq_data *d)
        clk_disable(bank->clk);
 }
 
-static void rockchip_irq_bus_lock(struct irq_data *d)
-{
-       struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
-       struct rockchip_pin_bank *bank = gc->private;
-
-       clk_enable(bank->clk);
-       mutex_lock(&bank->irq_lock);
-}
-
-static void rockchip_irq_bus_sync_unlock(struct irq_data *d)
-{
-       struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
-       struct rockchip_pin_bank *bank = gc->private;
-
-       while (bank->new_irqs) {
-               unsigned int irq = __ffs(bank->new_irqs);
-               int ret;
-
-               ret = rockchip_set_mux(bank, irq, RK_FUNC_GPIO);
-               WARN_ON(ret < 0);
-
-               bank->new_irqs &= ~BIT(irq);
-       }
-
-       mutex_unlock(&bank->irq_lock);
-       clk_disable(bank->clk);
-}
-
 static int rockchip_interrupts_register(struct platform_device *pdev,
                                                struct rockchip_pinctrl *info)
 {
@@ -2342,9 +2310,6 @@ static int rockchip_interrupts_register(struct platform_device *pdev,
                gc->chip_types[0].chip.irq_suspend = rockchip_irq_suspend;
                gc->chip_types[0].chip.irq_resume = rockchip_irq_resume;
                gc->chip_types[0].chip.irq_set_type = rockchip_irq_set_type;
-               gc->chip_types[0].chip.irq_bus_lock = rockchip_irq_bus_lock;
-               gc->chip_types[0].chip.irq_bus_sync_unlock =
-                                               rockchip_irq_bus_sync_unlock;
                gc->wake_enabled = IRQ_MSK(bank->nr_pins);
 
                irq_set_chained_handler_and_data(bank->irq,
@@ -2518,7 +2483,6 @@ static struct rockchip_pin_ctrl *rockchip_pinctrl_get_soc_data(
                int bank_pins = 0;
 
                raw_spin_lock_init(&bank->slock);
-               mutex_init(&bank->irq_lock);
                bank->drvdata = d;
                bank->pin_base = ctrl->nr_pins;
                ctrl->nr_pins += bank->nr_pins;
index d3c5f5d..222b668 100644 (file)
@@ -798,7 +798,7 @@ static int stm32_pconf_parse_conf(struct pinctrl_dev *pctldev,
                break;
        case PIN_CONFIG_OUTPUT:
                __stm32_gpio_set(bank, offset, arg);
-               ret = stm32_pmx_gpio_set_direction(pctldev, NULL, pin, false);
+               ret = stm32_pmx_gpio_set_direction(pctldev, range, pin, false);
                break;
        default:
                ret = -EINVAL;
index ef29f18..4cc2f4e 100644 (file)
        } \
 }
 
-#ifdef CONFIG_PM_SLEEP
 static u8 suspend_prep_ok;
 static u32 suspend_shlw_ctr_temp, suspend_deep_ctr_temp;
 static u64 suspend_shlw_res_temp, suspend_deep_res_temp;
-#endif
 
 struct telemetry_susp_stats {
        u32 shlw_swake_ctr;
@@ -807,7 +805,6 @@ static const struct file_operations telem_ioss_trc_verb_ops = {
        .release        = single_release,
 };
 
-#ifdef CONFIG_PM_SLEEP
 static int pm_suspend_prep_cb(void)
 {
        struct telemetry_evtlog evtlog[TELEM_MAX_OS_ALLOCATED_EVENTS];
@@ -937,7 +934,6 @@ static int pm_notification(struct notifier_block *this,
 static struct notifier_block pm_notifier = {
        .notifier_call = pm_notification,
 };
-#endif /* CONFIG_PM_SLEEP */
 
 static int __init telemetry_debugfs_init(void)
 {
@@ -960,14 +956,13 @@ static int __init telemetry_debugfs_init(void)
        if (err < 0)
                return -EINVAL;
 
-
-#ifdef CONFIG_PM_SLEEP
        register_pm_notifier(&pm_notifier);
-#endif /* CONFIG_PM_SLEEP */
 
        debugfs_conf->telemetry_dbg_dir = debugfs_create_dir("telemetry", NULL);
-       if (!debugfs_conf->telemetry_dbg_dir)
-               return -ENOMEM;
+       if (!debugfs_conf->telemetry_dbg_dir) {
+               err = -ENOMEM;
+               goto out_pm;
+       }
 
        f = debugfs_create_file("pss_info", S_IFREG | S_IRUGO,
                                debugfs_conf->telemetry_dbg_dir, NULL,
@@ -1014,6 +1009,8 @@ static int __init telemetry_debugfs_init(void)
 out:
        debugfs_remove_recursive(debugfs_conf->telemetry_dbg_dir);
        debugfs_conf->telemetry_dbg_dir = NULL;
+out_pm:
+       unregister_pm_notifier(&pm_notifier);
 
        return err;
 }
@@ -1022,6 +1019,7 @@ static void __exit telemetry_debugfs_exit(void)
 {
        debugfs_remove_recursive(debugfs_conf->telemetry_dbg_dir);
        debugfs_conf->telemetry_dbg_dir = NULL;
+       unregister_pm_notifier(&pm_notifier);
 }
 
 late_initcall(telemetry_debugfs_init);
index 6fb3fd5..b7cbd5d 100644 (file)
@@ -2672,7 +2672,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
         */
        if (basedev->state < DASD_STATE_READY) {
                while ((req = blk_fetch_request(block->request_queue)))
-                       __blk_end_request_all(req, -EIO);
+                       __blk_end_request_all(req, BLK_STS_IOERR);
                return;
        }
 
@@ -2692,7 +2692,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
                                      "Rejecting write request %p",
                                      req);
                        blk_start_request(req);
-                       __blk_end_request_all(req, -EIO);
+                       __blk_end_request_all(req, BLK_STS_IOERR);
                        continue;
                }
                if (test_bit(DASD_FLAG_ABORTALL, &basedev->flags) &&
@@ -2702,7 +2702,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
                                      "Rejecting failfast request %p",
                                      req);
                        blk_start_request(req);
-                       __blk_end_request_all(req, -ETIMEDOUT);
+                       __blk_end_request_all(req, BLK_STS_TIMEOUT);
                        continue;
                }
                cqr = basedev->discipline->build_cp(basedev, block, req);
@@ -2734,7 +2734,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
                                      "on request %p",
                                      PTR_ERR(cqr), req);
                        blk_start_request(req);
-                       __blk_end_request_all(req, -EIO);
+                       __blk_end_request_all(req, BLK_STS_IOERR);
                        continue;
                }
                /*
@@ -2755,21 +2755,29 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
 {
        struct request *req;
        int status;
-       int error = 0;
+       blk_status_t error = BLK_STS_OK;
 
        req = (struct request *) cqr->callback_data;
        dasd_profile_end(cqr->block, cqr, req);
+
        status = cqr->block->base->discipline->free_cp(cqr, req);
        if (status < 0)
-               error = status;
+               error = errno_to_blk_status(status);
        else if (status == 0) {
-               if (cqr->intrc == -EPERM)
-                       error = -EBADE;
-               else if (cqr->intrc == -ENOLINK ||
-                        cqr->intrc == -ETIMEDOUT)
-                       error = cqr->intrc;
-               else
-                       error = -EIO;
+               switch (cqr->intrc) {
+               case -EPERM:
+                       error = BLK_STS_NEXUS;
+                       break;
+               case -ENOLINK:
+                       error = BLK_STS_TRANSPORT;
+                       break;
+               case -ETIMEDOUT:
+                       error = BLK_STS_TIMEOUT;
+                       break;
+               default:
+                       error = BLK_STS_IOERR;
+                       break;
+               }
        }
        __blk_end_request_all(req, error);
 }
@@ -3190,7 +3198,7 @@ static void dasd_flush_request_queue(struct dasd_block *block)
 
        spin_lock_irq(&block->request_queue_lock);
        while ((req = blk_fetch_request(block->request_queue)))
-               __blk_end_request_all(req, -EIO);
+               __blk_end_request_all(req, BLK_STS_IOERR);
        spin_unlock_irq(&block->request_queue_lock);
 }
 
index 36e5280..06eb1de 100644 (file)
@@ -845,7 +845,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
        unsigned long source_addr;
        unsigned long bytes_done;
 
-       blk_queue_split(q, &bio, q->bio_split);
+       blk_queue_split(q, &bio);
 
        bytes_done = 0;
        dev_info = bio->bi_bdev->bd_disk->private_data;
index 152de68..3c2c84b 100644 (file)
@@ -231,7 +231,7 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
        aob->request.data = (u64) aobrq;
        scmrq->bdev = bdev;
        scmrq->retries = 4;
-       scmrq->error = 0;
+       scmrq->error = BLK_STS_OK;
        /* We don't use all msbs - place aidaws at the end of the aob page. */
        scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io];
        scm_request_cluster_init(scmrq);
@@ -364,7 +364,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
 {
        struct aob *aob = scmrq->aob;
 
-       if (scmrq->error == -ETIMEDOUT)
+       if (scmrq->error == BLK_STS_TIMEOUT)
                SCM_LOG(1, "Request timeout");
        else {
                SCM_LOG(1, "Request error");
@@ -377,7 +377,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
                       scmrq->error);
 }
 
-void scm_blk_irq(struct scm_device *scmdev, void *data, int error)
+void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
 {
        struct scm_request *scmrq = data;
        struct scm_blk_dev *bdev = scmrq->bdev;
@@ -397,7 +397,7 @@ static void scm_blk_handle_error(struct scm_request *scmrq)
        struct scm_blk_dev *bdev = scmrq->bdev;
        unsigned long flags;
 
-       if (scmrq->error != -EIO)
+       if (scmrq->error != BLK_STS_IOERR)
                goto restart;
 
        /* For -EIO the response block is valid. */
index 09218cd..cd598d1 100644 (file)
@@ -35,7 +35,7 @@ struct scm_request {
        struct aob *aob;
        struct list_head list;
        u8 retries;
-       int error;
+       blk_status_t error;
 #ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE
        struct {
                enum {CLUSTER_NONE, CLUSTER_READ, CLUSTER_WRITE} state;
@@ -50,7 +50,7 @@ struct scm_request {
 int scm_blk_dev_setup(struct scm_blk_dev *, struct scm_device *);
 void scm_blk_dev_cleanup(struct scm_blk_dev *);
 void scm_blk_set_available(struct scm_blk_dev *);
-void scm_blk_irq(struct scm_device *, void *, int);
+void scm_blk_irq(struct scm_device *, void *, blk_status_t);
 
 void scm_request_finish(struct scm_request *);
 void scm_request_requeue(struct scm_request *);
index b9d7e75..a48f0d4 100644 (file)
@@ -190,7 +190,7 @@ static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio)
        unsigned long page_addr;
        unsigned long bytes;
 
-       blk_queue_split(q, &bio, q->bio_split);
+       blk_queue_split(q, &bio);
 
        if ((bio->bi_iter.bi_sector & 7) != 0 ||
            (bio->bi_iter.bi_size & 4095) != 0)
index b3f44bc..0f11f3b 100644 (file)
@@ -135,7 +135,7 @@ static void eadm_subchannel_irq(struct subchannel *sch)
        struct eadm_private *private = get_eadm_private(sch);
        struct eadm_scsw *scsw = &sch->schib.scsw.eadm;
        struct irb *irb = this_cpu_ptr(&cio_irb);
-       int error = 0;
+       blk_status_t error = BLK_STS_OK;
 
        EADM_LOG(6, "irq");
        EADM_LOG_HEX(6, irb, sizeof(*irb));
@@ -144,10 +144,10 @@ static void eadm_subchannel_irq(struct subchannel *sch)
 
        if ((scsw->stctl & (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND))
            && scsw->eswf == 1 && irb->esw.eadm.erw.r)
-               error = -EIO;
+               error = BLK_STS_IOERR;
 
        if (scsw->fctl & SCSW_FCTL_CLEAR_FUNC)
-               error = -ETIMEDOUT;
+               error = BLK_STS_TIMEOUT;
 
        eadm_subchannel_set_timeout(sch, 0);
 
index 15268ed..1fa53ec 100644 (file)
@@ -71,7 +71,7 @@ void scm_driver_unregister(struct scm_driver *scmdrv)
 }
 EXPORT_SYMBOL_GPL(scm_driver_unregister);
 
-void scm_irq_handler(struct aob *aob, int error)
+void scm_irq_handler(struct aob *aob, blk_status_t error)
 {
        struct aob_rq_header *aobrq = (void *) aob->request.data;
        struct scm_device *scmdev = aobrq->scmdev;
index e72abbc..a66a317 100644 (file)
@@ -70,14 +70,14 @@ static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
 {
        return sprintf(buf, "I/O subchannel (Non-QDIO)\n");
 }
-MDEV_TYPE_ATTR_RO(name);
+static MDEV_TYPE_ATTR_RO(name);
 
 static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
                               char *buf)
 {
        return sprintf(buf, "%s\n", VFIO_DEVICE_API_CCW_STRING);
 }
-MDEV_TYPE_ATTR_RO(device_api);
+static MDEV_TYPE_ATTR_RO(device_api);
 
 static ssize_t available_instances_show(struct kobject *kobj,
                                        struct device *dev, char *buf)
@@ -86,7 +86,7 @@ static ssize_t available_instances_show(struct kobject *kobj,
 
        return sprintf(buf, "%d\n", atomic_read(&private->avail));
 }
-MDEV_TYPE_ATTR_RO(available_instances);
+static MDEV_TYPE_ATTR_RO(available_instances);
 
 static struct attribute *mdev_types_attrs[] = {
        &mdev_type_attr_name.attr,
@@ -100,7 +100,7 @@ static struct attribute_group mdev_type_group = {
        .attrs = mdev_types_attrs,
 };
 
-struct attribute_group *mdev_type_groups[] = {
+static struct attribute_group *mdev_type_groups[] = {
        &mdev_type_group,
        NULL,
 };
@@ -152,7 +152,7 @@ static int vfio_ccw_mdev_open(struct mdev_device *mdev)
                                      &events, &private->nb);
 }
 
-void vfio_ccw_mdev_release(struct mdev_device *mdev)
+static void vfio_ccw_mdev_release(struct mdev_device *mdev)
 {
        struct vfio_ccw_private *private =
                dev_get_drvdata(mdev_parent_dev(mdev));
@@ -233,7 +233,7 @@ static int vfio_ccw_mdev_get_region_info(struct vfio_region_info *info,
        }
 }
 
-int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
+static int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
 {
        if (info->index != VFIO_CCW_IO_IRQ_INDEX)
                return -EINVAL;
index 9be4596..ea09991 100644 (file)
@@ -668,10 +668,28 @@ static int ap_device_probe(struct device *dev)
        struct ap_driver *ap_drv = to_ap_drv(dev->driver);
        int rc;
 
+       /* Add queue/card to list of active queues/cards */
+       spin_lock_bh(&ap_list_lock);
+       if (is_card_dev(dev))
+               list_add(&to_ap_card(dev)->list, &ap_card_list);
+       else
+               list_add(&to_ap_queue(dev)->list,
+                        &to_ap_queue(dev)->card->queues);
+       spin_unlock_bh(&ap_list_lock);
+
        ap_dev->drv = ap_drv;
        rc = ap_drv->probe ? ap_drv->probe(ap_dev) : -ENODEV;
-       if (rc)
+
+       if (rc) {
+               spin_lock_bh(&ap_list_lock);
+               if (is_card_dev(dev))
+                       list_del_init(&to_ap_card(dev)->list);
+               else
+                       list_del_init(&to_ap_queue(dev)->list);
+               spin_unlock_bh(&ap_list_lock);
                ap_dev->drv = NULL;
+       }
+
        return rc;
 }
 
@@ -680,14 +698,17 @@ static int ap_device_remove(struct device *dev)
        struct ap_device *ap_dev = to_ap_dev(dev);
        struct ap_driver *ap_drv = ap_dev->drv;
 
+       if (ap_drv->remove)
+               ap_drv->remove(ap_dev);
+
+       /* Remove queue/card from list of active queues/cards */
        spin_lock_bh(&ap_list_lock);
        if (is_card_dev(dev))
                list_del_init(&to_ap_card(dev)->list);
        else
                list_del_init(&to_ap_queue(dev)->list);
        spin_unlock_bh(&ap_list_lock);
-       if (ap_drv->remove)
-               ap_drv->remove(ap_dev);
+
        return 0;
 }
 
@@ -1056,10 +1077,6 @@ static void ap_scan_bus(struct work_struct *unused)
                                }
                                /* get it and thus adjust reference counter */
                                get_device(&ac->ap_dev.device);
-                               /* Add card device to card list */
-                               spin_lock_bh(&ap_list_lock);
-                               list_add(&ac->list, &ap_card_list);
-                               spin_unlock_bh(&ap_list_lock);
                        }
                        /* now create the new queue device */
                        aq = ap_queue_create(qid, type);
@@ -1070,10 +1087,6 @@ static void ap_scan_bus(struct work_struct *unused)
                        aq->ap_dev.device.parent = &ac->ap_dev.device;
                        dev_set_name(&aq->ap_dev.device,
                                     "%02x.%04x", id, dom);
-                       /* Add queue device to card queue list */
-                       spin_lock_bh(&ap_list_lock);
-                       list_add(&aq->list, &ac->queues);
-                       spin_unlock_bh(&ap_list_lock);
                        /* Start with a device reset */
                        spin_lock_bh(&aq->lock);
                        ap_wait(ap_sm_event(aq, AP_EVENT_POLL));
@@ -1081,9 +1094,6 @@ static void ap_scan_bus(struct work_struct *unused)
                        /* Register device */
                        rc = device_register(&aq->ap_dev.device);
                        if (rc) {
-                               spin_lock_bh(&ap_list_lock);
-                               list_del_init(&aq->list);
-                               spin_unlock_bh(&ap_list_lock);
                                put_device(&aq->ap_dev.device);
                                continue;
                        }
index cfa161c..836efac 100644 (file)
@@ -160,7 +160,14 @@ static struct device_type ap_card_type = {
 
 static void ap_card_device_release(struct device *dev)
 {
-       kfree(to_ap_card(dev));
+       struct ap_card *ac = to_ap_card(dev);
+
+       if (!list_empty(&ac->list)) {
+               spin_lock_bh(&ap_list_lock);
+               list_del_init(&ac->list);
+               spin_unlock_bh(&ap_list_lock);
+       }
+       kfree(ac);
 }
 
 struct ap_card *ap_card_create(int id, int queue_depth, int device_type,
index 480c58a..0f1a5d0 100644 (file)
@@ -584,7 +584,14 @@ static struct device_type ap_queue_type = {
 
 static void ap_queue_device_release(struct device *dev)
 {
-       kfree(to_ap_queue(dev));
+       struct ap_queue *aq = to_ap_queue(dev);
+
+       if (!list_empty(&aq->list)) {
+               spin_lock_bh(&ap_list_lock);
+               list_del_init(&aq->list);
+               spin_unlock_bh(&ap_list_lock);
+       }
+       kfree(aq);
 }
 
 struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type)
index dba94b4..fa732bd 100644 (file)
@@ -1954,7 +1954,6 @@ static void netiucv_free_netdevice(struct net_device *dev)
                privptr->conn = NULL; privptr->fsm = NULL;
                /* privptr gets freed by free_netdev() */
        }
-       free_netdev(dev);
 }
 
 /**
@@ -1972,7 +1971,8 @@ static void netiucv_setup_netdevice(struct net_device *dev)
        dev->mtu                 = NETIUCV_MTU_DEFAULT;
        dev->min_mtu             = 576;
        dev->max_mtu             = NETIUCV_MTU_MAX;
-       dev->destructor          = netiucv_free_netdevice;
+       dev->needs_free_netdev   = true;
+       dev->priv_destructor     = netiucv_free_netdevice;
        dev->hard_header_len     = NETIUCV_HDRLEN;
        dev->addr_len            = 0;
        dev->type                = ARPHRD_SLIP;
index 62fed9d..14f377a 100644 (file)
@@ -214,7 +214,7 @@ static void jsfd_request(void)
                struct jsfd_part *jdp = req->rq_disk->private_data;
                unsigned long offset = blk_rq_pos(req) << 9;
                size_t len = blk_rq_cur_bytes(req);
-               int err = -EIO;
+               blk_status_t err = BLK_STS_IOERR;
 
                if ((offset + len) > jdp->dsize)
                        goto end;
@@ -230,7 +230,7 @@ static void jsfd_request(void)
                }
 
                jsfd_read(bio_data(req->bio), jdp->dbase + offset, len);
-               err = 0;
+               err = BLK_STS_OK;
        end:
                if (!__blk_end_request_cur(req, err))
                        req = jsfd_next_request();
@@ -592,6 +592,7 @@ static int jsfd_init(void)
                        put_disk(disk);
                        goto out;
                }
+               blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
                jsfd_disk[i] = disk;
        }
 
index 8a1b948..a4f28b7 100644 (file)
@@ -446,7 +446,7 @@ static void _put_request(struct request *rq)
         *       code paths.
         */
        if (unlikely(rq->bio))
-               blk_end_request(rq, -ENOMEM, blk_rq_bytes(rq));
+               blk_end_request(rq, BLK_STS_IOERR, blk_rq_bytes(rq));
        else
                blk_put_request(rq);
 }
@@ -474,10 +474,10 @@ void osd_end_request(struct osd_request *or)
 EXPORT_SYMBOL(osd_end_request);
 
 static void _set_error_resid(struct osd_request *or, struct request *req,
-                            int error)
+                            blk_status_t error)
 {
        or->async_error = error;
-       or->req_errors = scsi_req(req)->result ? : error;
+       or->req_errors = scsi_req(req)->result;
        or->sense_len = scsi_req(req)->sense_len;
        if (or->sense_len)
                memcpy(or->sense, scsi_req(req)->sense, or->sense_len);
@@ -489,17 +489,19 @@ static void _set_error_resid(struct osd_request *or, struct request *req,
 
 int osd_execute_request(struct osd_request *or)
 {
-       int error;
-
        blk_execute_rq(or->request->q, NULL, or->request, 0);
-       error = scsi_req(or->request)->result ? -EIO : 0;
 
-       _set_error_resid(or, or->request, error);
-       return error;
+       if (scsi_req(or->request)->result) {
+               _set_error_resid(or, or->request, BLK_STS_IOERR);
+               return -EIO;
+       }
+
+       _set_error_resid(or, or->request, BLK_STS_OK);
+       return 0;
 }
 EXPORT_SYMBOL(osd_execute_request);
 
-static void osd_request_async_done(struct request *req, int error)
+static void osd_request_async_done(struct request *req, blk_status_t error)
 {
        struct osd_request *or = req->end_io_data;
 
@@ -1572,13 +1574,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
                        flags);
        if (IS_ERR(req))
                return req;
-       scsi_req_init(req);
 
        for_each_bio(bio) {
-               struct bio *bounce_bio = bio;
-
-               blk_queue_bounce(req->q, &bounce_bio);
-               ret = blk_rq_append_bio(req, bounce_bio);
+               ret = blk_rq_append_bio(req, bio);
                if (ret)
                        return ERR_PTR(ret);
        }
@@ -1617,7 +1615,6 @@ static int _init_blk_request(struct osd_request *or,
                                ret = PTR_ERR(req);
                                goto out;
                        }
-                       scsi_req_init(req);
                        or->in.req = or->request->next_rq = req;
                }
        } else if (has_in)
@@ -1914,7 +1911,7 @@ analyze:
                /* scsi sense is Empty, the request was never issued to target
                 * linux return code might tell us what happened.
                 */
-               if (or->async_error == -ENOMEM)
+               if (or->async_error == BLK_STS_RESOURCE)
                        osi->osd_err_pri = OSD_ERR_PRI_RESOURCE;
                else
                        osi->osd_err_pri = OSD_ERR_PRI_UNREACHABLE;
index 67cbed9..929ee7e 100644 (file)
@@ -320,7 +320,7 @@ static int osst_chk_result(struct osst_tape * STp, struct osst_request * SRpnt)
 
 
 /* Wakeup from interrupt */
-static void osst_end_async(struct request *req, int update)
+static void osst_end_async(struct request *req, blk_status_t status)
 {
        struct scsi_request *rq = scsi_req(req);
        struct osst_request *SRpnt = req->end_io_data;
@@ -373,7 +373,6 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
                return DRIVER_ERROR << 24;
 
        rq = scsi_req(req);
-       scsi_req_init(req);
        req->rq_flags |= RQF_QUIET;
 
        SRpnt->bio = NULL;
index 8bc7ee1..507512c 100644 (file)
@@ -870,7 +870,6 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
                QEDI_ERR(&qedi->dbg_ctx,
                         "Delayed or untracked cleanup response, itt=0x%x, tid=0x%x, cid=0x%x, task=%p\n",
                         protoitt, cqe->itid, qedi_conn->iscsi_conn_id, task);
-               WARN_ON(1);
        }
 }
 
index 09a2946..879d3b7 100644 (file)
@@ -1499,11 +1499,9 @@ err_idx:
 
 void qedi_clear_task_idx(struct qedi_ctx *qedi, int idx)
 {
-       if (!test_and_clear_bit(idx, qedi->task_idx_map)) {
+       if (!test_and_clear_bit(idx, qedi->task_idx_map))
                QEDI_ERR(&qedi->dbg_ctx,
                         "FW task context, already cleared, tid=0x%x\n", idx);
-               WARN_ON(1);
-       }
 }
 
 void qedi_update_itt_map(struct qedi_ctx *qedi, u32 tid, u32 proto_itt,
index dc095a2..3be980d 100644 (file)
@@ -245,7 +245,7 @@ struct sdebug_dev_info {
        unsigned int channel;
        unsigned int target;
        u64 lun;
-       uuid_be lu_name;
+       uuid_t lu_name;
        struct sdebug_host_info *sdbg_host;
        unsigned long uas_bm[1];
        atomic_t num_in_q;
@@ -965,7 +965,7 @@ static const u64 naa3_comp_c = 0x3111111000000000ULL;
 static int inquiry_vpd_83(unsigned char *arr, int port_group_id,
                          int target_dev_id, int dev_id_num,
                          const char *dev_id_str, int dev_id_str_len,
-                         const uuid_be *lu_name)
+                         const uuid_t *lu_name)
 {
        int num, port_a;
        char b[32];
@@ -3568,7 +3568,7 @@ static void sdebug_q_cmd_wq_complete(struct work_struct *work)
 }
 
 static bool got_shared_uuid;
-static uuid_be shared_uuid;
+static uuid_t shared_uuid;
 
 static struct sdebug_dev_info *sdebug_device_create(
                        struct sdebug_host_info *sdbg_host, gfp_t flags)
@@ -3578,12 +3578,12 @@ static struct sdebug_dev_info *sdebug_device_create(
        devip = kzalloc(sizeof(*devip), flags);
        if (devip) {
                if (sdebug_uuid_ctl == 1)
-                       uuid_be_gen(&devip->lu_name);
+                       uuid_gen(&devip->lu_name);
                else if (sdebug_uuid_ctl == 2) {
                        if (got_shared_uuid)
                                devip->lu_name = shared_uuid;
                        else {
-                               uuid_be_gen(&shared_uuid);
+                               uuid_gen(&shared_uuid);
                                got_shared_uuid = true;
                                devip->lu_name = shared_uuid;
                        }
index ecc07da..304a715 100644 (file)
@@ -1874,7 +1874,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
        }
 }
 
-static void eh_lock_door_done(struct request *req, int uptodate)
+static void eh_lock_door_done(struct request *req, blk_status_t status)
 {
        __blk_put_request(req->q, req);
 }
@@ -1903,7 +1903,6 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
        if (IS_ERR(req))
                return;
        rq = scsi_req(req);
-       scsi_req_init(req);
 
        rq->cmd[0] = ALLOW_MEDIUM_REMOVAL;
        rq->cmd[1] = 0;
index 99e16ac..550e29f 100644 (file)
@@ -250,7 +250,6 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
        if (IS_ERR(req))
                return ret;
        rq = scsi_req(req);
-       scsi_req_init(req);
 
        if (bufflen &&  blk_rq_map_kern(sdev->request_queue, req,
                                        buffer, bufflen, __GFP_RECLAIM))
@@ -635,7 +634,7 @@ static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd)
        cmd->request->next_rq->special = NULL;
 }
 
-static bool scsi_end_request(struct request *req, int error,
+static bool scsi_end_request(struct request *req, blk_status_t error,
                unsigned int bytes, unsigned int bidi_bytes)
 {
        struct scsi_cmnd *cmd = req->special;
@@ -694,45 +693,28 @@ static bool scsi_end_request(struct request *req, int error,
  * @cmd:       SCSI command (unused)
  * @result:    scsi error code
  *
- * Translate SCSI error code into standard UNIX errno.
- * Return values:
- * -ENOLINK    temporary transport failure
- * -EREMOTEIO  permanent target failure, do not retry
- * -EBADE      permanent nexus failure, retry on other path
- * -ENOSPC     No write space available
- * -ENODATA    Medium error
- * -EIO                unspecified I/O error
+ * Translate SCSI error code into block errors.
  */
-static int __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result)
+static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd,
+               int result)
 {
-       int error = 0;
-
-       switch(host_byte(result)) {
+       switch (host_byte(result)) {
        case DID_TRANSPORT_FAILFAST:
-               error = -ENOLINK;
-               break;
+               return BLK_STS_TRANSPORT;
        case DID_TARGET_FAILURE:
                set_host_byte(cmd, DID_OK);
-               error = -EREMOTEIO;
-               break;
+               return BLK_STS_TARGET;
        case DID_NEXUS_FAILURE:
-               set_host_byte(cmd, DID_OK);
-               error = -EBADE;
-               break;
+               return BLK_STS_NEXUS;
        case DID_ALLOC_FAILURE:
                set_host_byte(cmd, DID_OK);
-               error = -ENOSPC;
-               break;
+               return BLK_STS_NOSPC;
        case DID_MEDIUM_ERROR:
                set_host_byte(cmd, DID_OK);
-               error = -ENODATA;
-               break;
+               return BLK_STS_MEDIUM;
        default:
-               error = -EIO;
-               break;
+               return BLK_STS_IOERR;
        }
-
-       return error;
 }
 
 /*
@@ -769,7 +751,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
        int result = cmd->result;
        struct request_queue *q = cmd->device->request_queue;
        struct request *req = cmd->request;
-       int error = 0;
+       blk_status_t error = BLK_STS_OK;
        struct scsi_sense_hdr sshdr;
        bool sense_valid = false;
        int sense_deferred = 0, level = 0;
@@ -808,7 +790,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
                         * both sides at once.
                         */
                        scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid;
-                       if (scsi_end_request(req, 0, blk_rq_bytes(req),
+                       if (scsi_end_request(req, BLK_STS_OK, blk_rq_bytes(req),
                                        blk_rq_bytes(req->next_rq)))
                                BUG();
                        return;
@@ -850,7 +832,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
                        scsi_print_sense(cmd);
                result = 0;
                /* for passthrough error may be set */
-               error = 0;
+               error = BLK_STS_OK;
        }
 
        /*
@@ -922,18 +904,18 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
                                action = ACTION_REPREP;
                        } else if (sshdr.asc == 0x10) /* DIX */ {
                                action = ACTION_FAIL;
-                               error = -EILSEQ;
+                               error = BLK_STS_PROTECTION;
                        /* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
                        } else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
                                action = ACTION_FAIL;
-                               error = -EREMOTEIO;
+                               error = BLK_STS_TARGET;
                        } else
                                action = ACTION_FAIL;
                        break;
                case ABORTED_COMMAND:
                        action = ACTION_FAIL;
                        if (sshdr.asc == 0x10) /* DIF */
-                               error = -EILSEQ;
+                               error = BLK_STS_PROTECTION;
                        break;
                case NOT_READY:
                        /* If the device is in the process of becoming
@@ -1134,6 +1116,20 @@ err_exit:
 }
 EXPORT_SYMBOL(scsi_init_io);
 
+/**
+ * scsi_initialize_rq - initialize struct scsi_cmnd.req
+ *
+ * Called from inside blk_get_request().
+ */
+void scsi_initialize_rq(struct request *rq)
+{
+       struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+
+       scsi_req_init(&cmd->req);
+}
+EXPORT_SYMBOL(scsi_initialize_rq);
+
+/* Called after a request has been started. */
 void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
 {
        void *buf = cmd->sense_buffer;
@@ -1829,15 +1825,15 @@ out_delay:
                blk_delay_queue(q, SCSI_QUEUE_DELAY);
 }
 
-static inline int prep_to_mq(int ret)
+static inline blk_status_t prep_to_mq(int ret)
 {
        switch (ret) {
        case BLKPREP_OK:
-               return BLK_MQ_RQ_QUEUE_OK;
+               return BLK_STS_OK;
        case BLKPREP_DEFER:
-               return BLK_MQ_RQ_QUEUE_BUSY;
+               return BLK_STS_RESOURCE;
        default:
-               return BLK_MQ_RQ_QUEUE_ERROR;
+               return BLK_STS_IOERR;
        }
 }
 
@@ -1909,7 +1905,7 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
        blk_mq_complete_request(cmd->request);
 }
 
-static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
                         const struct blk_mq_queue_data *bd)
 {
        struct request *req = bd->rq;
@@ -1917,14 +1913,14 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
        struct scsi_device *sdev = q->queuedata;
        struct Scsi_Host *shost = sdev->host;
        struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
-       int ret;
+       blk_status_t ret;
        int reason;
 
        ret = prep_to_mq(scsi_prep_state_check(sdev, req));
-       if (ret != BLK_MQ_RQ_QUEUE_OK)
+       if (ret != BLK_STS_OK)
                goto out;
 
-       ret = BLK_MQ_RQ_QUEUE_BUSY;
+       ret = BLK_STS_RESOURCE;
        if (!get_device(&sdev->sdev_gendev))
                goto out;
 
@@ -1937,7 +1933,7 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 
        if (!(req->rq_flags & RQF_DONTPREP)) {
                ret = prep_to_mq(scsi_mq_prep_fn(req));
-               if (ret != BLK_MQ_RQ_QUEUE_OK)
+               if (ret != BLK_STS_OK)
                        goto out_dec_host_busy;
                req->rq_flags |= RQF_DONTPREP;
        } else {
@@ -1955,11 +1951,11 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
        reason = scsi_dispatch_cmd(cmd);
        if (reason) {
                scsi_set_blocked(cmd, reason);
-               ret = BLK_MQ_RQ_QUEUE_BUSY;
+               ret = BLK_STS_RESOURCE;
                goto out_dec_host_busy;
        }
 
-       return BLK_MQ_RQ_QUEUE_OK;
+       return BLK_STS_OK;
 
 out_dec_host_busy:
        atomic_dec(&shost->host_busy);
@@ -1972,12 +1968,14 @@ out_put_device:
        put_device(&sdev->sdev_gendev);
 out:
        switch (ret) {
-       case BLK_MQ_RQ_QUEUE_BUSY:
+       case BLK_STS_OK:
+               break;
+       case BLK_STS_RESOURCE:
                if (atomic_read(&sdev->device_busy) == 0 &&
                    !scsi_device_blocked(sdev))
                        blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
                break;
-       case BLK_MQ_RQ_QUEUE_ERROR:
+       default:
                /*
                 * Make sure to release all allocated ressources when
                 * we hit an error, as we will never see this command
@@ -1986,8 +1984,6 @@ out:
                if (req->rq_flags & RQF_DONTPREP)
                        scsi_mq_uninit_cmd(cmd);
                break;
-       default:
-               break;
        }
        return ret;
 }
@@ -2057,6 +2053,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 {
        struct device *dev = shost->dma_dev;
 
+       queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
+
        /*
         * this limit is imposed by hardware restrictions
         */
@@ -2139,6 +2137,7 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
        q->request_fn = scsi_request_fn;
        q->init_rq_fn = scsi_init_rq;
        q->exit_rq_fn = scsi_exit_rq;
+       q->initialize_rq_fn = scsi_initialize_rq;
 
        if (blk_init_allocated_queue(q) < 0) {
                blk_cleanup_queue(q);
@@ -2163,6 +2162,7 @@ static const struct blk_mq_ops scsi_mq_ops = {
 #endif
        .init_request   = scsi_init_request,
        .exit_request   = scsi_exit_request,
+       .initialize_rq_fn = scsi_initialize_rq,
        .map_queues     = scsi_map_queues,
 };
 
@@ -2977,7 +2977,7 @@ scsi_internal_device_block(struct scsi_device *sdev, bool wait)
                if (wait)
                        blk_mq_quiesce_queue(q);
                else
-                       blk_mq_stop_hw_queues(q);
+                       blk_mq_quiesce_queue_nowait(q);
        } else {
                spin_lock_irqsave(q->queue_lock, flags);
                blk_stop_queue(q);
@@ -3031,7 +3031,7 @@ scsi_internal_device_unblock(struct scsi_device *sdev,
                return -EINVAL;
 
        if (q->mq_ops) {
-               blk_mq_start_stopped_hw_queues(q, false);
+               blk_mq_unquiesce_queue(q);
        } else {
                spin_lock_irqsave(q->queue_lock, flags);
                blk_start_queue(q);
index 0ebe2f1..5006a65 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/bsg.h>
 
 #include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_request.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
@@ -172,7 +173,7 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
                            struct sas_rphy *rphy)
 {
        struct request *req;
-       int ret;
+       blk_status_t ret;
        int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *);
 
        while ((req = blk_fetch_request(q)) != NULL) {
@@ -230,6 +231,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
        q = blk_alloc_queue(GFP_KERNEL);
        if (!q)
                return -ENOMEM;
+       q->initialize_rq_fn = scsi_initialize_rq;
        q->cmd_size = sizeof(struct scsi_request);
 
        if (rphy) {
@@ -249,6 +251,11 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
        if (error)
                goto out_cleanup_queue;
 
+       /*
+        * by default assume old behaviour and bounce for any highmem page
+        */
+       blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+
        error = bsg_register_queue(q, dev, name, release);
        if (error)
                goto out_cleanup_queue;
@@ -264,6 +271,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
                q->queuedata = shost;
 
        queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+       queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
        return 0;
 
 out_cleanup_queue:
index 82c33a6..21225d6 100644 (file)
@@ -177,7 +177,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */
 } Sg_device;
 
 /* tasklet or soft irq callback */
-static void sg_rq_end_io(struct request *rq, int uptodate);
+static void sg_rq_end_io(struct request *rq, blk_status_t status);
 static int sg_start_req(Sg_request *srp, unsigned char *cmd);
 static int sg_finish_rem_req(Sg_request * srp);
 static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size);
@@ -808,7 +808,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
        if (atomic_read(&sdp->detaching)) {
                if (srp->bio) {
                        scsi_req_free_cmd(scsi_req(srp->rq));
-                       blk_end_request_all(srp->rq, -EIO);
+                       blk_end_request_all(srp->rq, BLK_STS_IOERR);
                        srp->rq = NULL;
                }
 
@@ -1300,7 +1300,7 @@ sg_rq_end_io_usercontext(struct work_struct *work)
  * level when a command is completed (or has failed).
  */
 static void
-sg_rq_end_io(struct request *rq, int uptodate)
+sg_rq_end_io(struct request *rq, blk_status_t status)
 {
        struct sg_request *srp = rq->end_io_data;
        struct scsi_request *req = scsi_req(rq);
@@ -1732,8 +1732,6 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
        }
        req = scsi_req(rq);
 
-       scsi_req_init(rq);
-
        if (hp->cmd_len > BLK_MAX_CDB)
                req->cmd = long_cmdp;
        memcpy(req->cmd, cmd, hp->cmd_len);
index 1ea34d6..8e5013d 100644 (file)
@@ -511,7 +511,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
        atomic64_dec(&STp->stats->in_flight);
 }
 
-static void st_scsi_execute_end(struct request *req, int uptodate)
+static void st_scsi_execute_end(struct request *req, blk_status_t status)
 {
        struct st_request *SRpnt = req->end_io_data;
        struct scsi_request *rq = scsi_req(req);
@@ -549,7 +549,6 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
        if (IS_ERR(req))
                return DRIVER_ERROR << 24;
        rq = scsi_req(req);
-       scsi_req_init(req);
        req->rq_flags |= RQF_QUIET;
 
        mdata->null_mapped = 1;
index dc6ecd8..ff10d1f 100644 (file)
@@ -231,16 +231,12 @@ static int ad7152_write_raw_samp_freq(struct device *dev, int val)
        if (i >= ARRAY_SIZE(ad7152_filter_rate_table))
                i = ARRAY_SIZE(ad7152_filter_rate_table) - 1;
 
-       mutex_lock(&chip->state_lock);
        ret = i2c_smbus_write_byte_data(chip->client,
                                        AD7152_REG_CFG2, AD7152_CFG2_OSR(i));
-       if (ret < 0) {
-               mutex_unlock(&chip->state_lock);
+       if (ret < 0)
                return ret;
-       }
 
        chip->filter_rate_setup = i;
-       mutex_unlock(&chip->state_lock);
 
        return ret;
 }
index cfe37eb..859d0d6 100644 (file)
@@ -152,7 +152,7 @@ static const struct net_device_ops mon_netdev_ops = {
 static void mon_setup(struct net_device *dev)
 {
        dev->netdev_ops = &mon_netdev_ops;
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
        ether_setup(dev);
        dev->priv_flags |= IFF_NO_QUEUE;
        dev->type = ARPHRD_IEEE80211;
index 36c3189..bd4352f 100644 (file)
@@ -2667,7 +2667,8 @@ static int rtw_cfg80211_add_monitor_if (struct adapter *padapter, char *name, st
        mon_ndev->type = ARPHRD_IEEE80211_RADIOTAP;
        strncpy(mon_ndev->name, name, IFNAMSIZ);
        mon_ndev->name[IFNAMSIZ - 1] = 0;
-       mon_ndev->destructor = rtw_ndev_destructor;
+       mon_ndev->needs_free_netdev = true;
+       mon_ndev->priv_destructor = rtw_ndev_destructor;
 
        mon_ndev->netdev_ops = &rtw_cfg80211_monitor_if_ops;
 
index f83cfc7..0215899 100644 (file)
@@ -1207,8 +1207,6 @@ void rtw_ndev_destructor(struct net_device *ndev)
 
        if (ndev->ieee80211_ptr)
                kfree((u8 *)ndev->ieee80211_ptr);
-
-       free_netdev(ndev);
 }
 
 void rtw_dev_unload(struct adapter *padapter)
index 02db59e..aa16d1a 100644 (file)
@@ -160,7 +160,7 @@ static int isFileReadable(char *path)
                oldfs = get_fs(); set_fs(get_ds());
 
                if (1!=readFile(fp, &buf, 1))
-                       ret = PTR_ERR(fp);
+                       ret = -EINVAL;
 
                set_fs(oldfs);
                filp_close(fp, NULL);
index 0d8f815..3fdca2c 100644 (file)
@@ -1279,6 +1279,18 @@ iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr,
         */
        if (dump_payload)
                goto after_immediate_data;
+       /*
+        * Check for underflow case where both EDTL and immediate data payload
+        * exceeds what is presented by CDB's TRANSFER LENGTH, and what has
+        * already been set in target_cmd_size_check() as se_cmd->data_length.
+        *
+        * For this special case, fail the command and dump the immediate data
+        * payload.
+        */
+       if (cmd->first_burst_len > cmd->se_cmd.data_length) {
+               cmd->sense_reason = TCM_INVALID_CDB_FIELD;
+               goto after_immediate_data;
+       }
 
        immed_ret = iscsit_handle_immediate_data(cmd, hdr,
                                        cmd->first_burst_len);
@@ -4423,8 +4435,11 @@ static void iscsit_logout_post_handler_closesession(
         * always sleep waiting for RX/TX thread shutdown to complete
         * within iscsit_close_connection().
         */
-       if (!conn->conn_transport->rdma_shutdown)
+       if (!conn->conn_transport->rdma_shutdown) {
                sleep = cmpxchg(&conn->tx_thread_active, true, false);
+               if (!sleep)
+                       return;
+       }
 
        atomic_set(&conn->conn_logout_remove, 0);
        complete(&conn->conn_logout_comp);
@@ -4440,8 +4455,11 @@ static void iscsit_logout_post_handler_samecid(
 {
        int sleep = 1;
 
-       if (!conn->conn_transport->rdma_shutdown)
+       if (!conn->conn_transport->rdma_shutdown) {
                sleep = cmpxchg(&conn->tx_thread_active, true, false);
+               if (!sleep)
+                       return;
+       }
 
        atomic_set(&conn->conn_logout_remove, 0);
        complete(&conn->conn_logout_comp);
index bb069eb..c05d380 100644 (file)
@@ -93,7 +93,7 @@ static int iblock_configure_device(struct se_device *dev)
                return -EINVAL;
        }
 
-       ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0);
+       ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
        if (!ib_dev->ibd_bio_set) {
                pr_err("IBLOCK: Unable to create bioset\n");
                goto out;
@@ -296,8 +296,8 @@ static void iblock_bio_done(struct bio *bio)
        struct se_cmd *cmd = bio->bi_private;
        struct iblock_req *ibr = cmd->priv;
 
-       if (bio->bi_error) {
-               pr_err("bio error: %p,  err: %d\n", bio, bio->bi_error);
+       if (bio->bi_status) {
+               pr_err("bio error: %p,  err: %d\n", bio, bio->bi_status);
                /*
                 * Bump the ib_bio_err_cnt and release bio.
                 */
@@ -354,11 +354,11 @@ static void iblock_end_io_flush(struct bio *bio)
 {
        struct se_cmd *cmd = bio->bi_private;
 
-       if (bio->bi_error)
-               pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_error);
+       if (bio->bi_status)
+               pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_status);
 
        if (cmd) {
-               if (bio->bi_error)
+               if (bio->bi_status)
                        target_complete_cmd(cmd, SAM_STAT_CHECK_CONDITION);
                else
                        target_complete_cmd(cmd, SAM_STAT_GOOD);
index 9ab7090..0912de7 100644 (file)
@@ -136,7 +136,7 @@ int init_se_kmem_caches(void);
 void   release_se_kmem_caches(void);
 u32    scsi_get_new_index(scsi_index_t);
 void   transport_subsystem_check_init(void);
-void   transport_cmd_finish_abort(struct se_cmd *, int);
+int    transport_cmd_finish_abort(struct se_cmd *, int);
 unsigned char *transport_dump_cmd_direction(struct se_cmd *);
 void   transport_dump_dev_state(struct se_device *, char *, int *);
 void   transport_dump_dev_info(struct se_device *, struct se_lun *,
index 3e4abb1..ceec021 100644 (file)
@@ -55,7 +55,7 @@ static inline struct pscsi_dev_virt *PSCSI_DEV(struct se_device *dev)
 }
 
 static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd);
-static void pscsi_req_done(struct request *, int);
+static void pscsi_req_done(struct request *, blk_status_t);
 
 /*     pscsi_attach_hba():
  *
@@ -992,8 +992,6 @@ pscsi_execute_cmd(struct se_cmd *cmd)
                goto fail;
        }
 
-       scsi_req_init(req);
-
        if (sgl) {
                ret = pscsi_map_sg(cmd, sgl, sgl_nents, req);
                if (ret)
@@ -1045,7 +1043,7 @@ static sector_t pscsi_get_blocks(struct se_device *dev)
        return 0;
 }
 
-static void pscsi_req_done(struct request *req, int uptodate)
+static void pscsi_req_done(struct request *req, blk_status_t status)
 {
        struct se_cmd *cmd = req->end_io_data;
        struct pscsi_plugin_task *pt = cmd->priv;
index dce1e1b..13f47bf 100644 (file)
@@ -75,7 +75,7 @@ void core_tmr_release_req(struct se_tmr_req *tmr)
        kfree(tmr);
 }
 
-static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
+static int core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
 {
        unsigned long flags;
        bool remove = true, send_tas;
@@ -91,7 +91,7 @@ static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
                transport_send_task_abort(cmd);
        }
 
-       transport_cmd_finish_abort(cmd, remove);
+       return transport_cmd_finish_abort(cmd, remove);
 }
 
 static int target_check_cdb_and_preempt(struct list_head *list,
@@ -184,8 +184,8 @@ void core_tmr_abort_task(
                cancel_work_sync(&se_cmd->work);
                transport_wait_for_tasks(se_cmd);
 
-               transport_cmd_finish_abort(se_cmd, true);
-               target_put_sess_cmd(se_cmd);
+               if (!transport_cmd_finish_abort(se_cmd, true))
+                       target_put_sess_cmd(se_cmd);
 
                printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for"
                                " ref_tag: %llu\n", ref_tag);
@@ -281,8 +281,8 @@ static void core_tmr_drain_tmr_list(
                cancel_work_sync(&cmd->work);
                transport_wait_for_tasks(cmd);
 
-               transport_cmd_finish_abort(cmd, 1);
-               target_put_sess_cmd(cmd);
+               if (!transport_cmd_finish_abort(cmd, 1))
+                       target_put_sess_cmd(cmd);
        }
 }
 
@@ -380,8 +380,8 @@ static void core_tmr_drain_state_list(
                cancel_work_sync(&cmd->work);
                transport_wait_for_tasks(cmd);
 
-               core_tmr_handle_tas_abort(cmd, tas);
-               target_put_sess_cmd(cmd);
+               if (!core_tmr_handle_tas_abort(cmd, tas))
+                       target_put_sess_cmd(cmd);
        }
 }
 
index 6025935..f1b3a46 100644 (file)
@@ -651,9 +651,10 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd)
                percpu_ref_put(&lun->lun_ref);
 }
 
-void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
+int transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
 {
        bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF);
+       int ret = 0;
 
        if (cmd->se_cmd_flags & SCF_SE_LUN_CMD)
                transport_lun_remove_cmd(cmd);
@@ -665,9 +666,11 @@ void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
                cmd->se_tfo->aborted_task(cmd);
 
        if (transport_cmd_check_stop_to_fabric(cmd))
-               return;
+               return 1;
        if (remove && ack_kref)
-               transport_put_cmd(cmd);
+               ret = transport_put_cmd(cmd);
+
+       return ret;
 }
 
 static void target_complete_failure_work(struct work_struct *work)
index 9413c4a..a9ec94e 100644 (file)
@@ -23,7 +23,7 @@ enum int3400_thermal_uuid {
        INT3400_THERMAL_MAXIMUM_UUID,
 };
 
-static u8 *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = {
+static char *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = {
        "42A441D6-AE6A-462b-A84B-4A8CE79027D3",
        "3A95C389-E4B8-4629-A526-C52C88626BAE",
        "97C68AE7-15FA-499c-B8C9-5DA81D606E0A",
@@ -141,10 +141,10 @@ static int int3400_thermal_get_uuids(struct int3400_thermal_priv *priv)
                }
 
                for (j = 0; j < INT3400_THERMAL_MAXIMUM_UUID; j++) {
-                       u8 uuid[16];
+                       guid_t guid;
 
-                       acpi_str_to_uuid(int3400_thermal_uuids[j], uuid);
-                       if (!strncmp(uuid, objb->buffer.pointer, 16)) {
+                       guid_parse(int3400_thermal_uuids[j], &guid);
+                       if (guid_equal((guid_t *)objb->buffer.pointer, &guid)) {
                                priv->uuid_bitmap |= (1 << j);
                                break;
                        }
index 84a2ceb..fe85154 100644 (file)
@@ -42,7 +42,7 @@
 #define PCI_DEVICE_ID_INTEL_CNPLP              0x9dee
 #define PCI_DEVICE_ID_INTEL_CNPH               0xa36e
 
-#define PCI_INTEL_BXT_DSM_UUID         "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
+#define PCI_INTEL_BXT_DSM_GUID         "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
 #define PCI_INTEL_BXT_FUNC_PMU_PWR     4
 #define PCI_INTEL_BXT_STATE_D0         0
 #define PCI_INTEL_BXT_STATE_D3         3
  * struct dwc3_pci - Driver private structure
  * @dwc3: child dwc3 platform_device
  * @pci: our link to PCI bus
- * @uuid: _DSM UUID
+ * @guid: _DSM GUID
  * @has_dsm_for_pm: true for devices which need to run _DSM on runtime PM
  */
 struct dwc3_pci {
        struct platform_device *dwc3;
        struct pci_dev *pci;
 
-       u8 uuid[16];
+       guid_t guid;
 
        unsigned int has_dsm_for_pm:1;
 };
@@ -120,7 +120,7 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc)
 
                if (pdev->device == PCI_DEVICE_ID_INTEL_BXT ||
                                pdev->device == PCI_DEVICE_ID_INTEL_BXT_M) {
-                       acpi_str_to_uuid(PCI_INTEL_BXT_DSM_UUID, dwc->uuid);
+                       guid_parse(PCI_INTEL_BXT_DSM_GUID, &dwc->guid);
                        dwc->has_dsm_for_pm = true;
                }
 
@@ -292,7 +292,7 @@ static int dwc3_pci_dsm(struct dwc3_pci *dwc, int param)
        tmp.type = ACPI_TYPE_INTEGER;
        tmp.integer.value = param;
 
-       obj = acpi_evaluate_dsm(ACPI_HANDLE(&dwc->pci->dev), dwc->uuid,
+       obj = acpi_evaluate_dsm(ACPI_HANDLE(&dwc->pci->dev), &dwc->guid,
                        1, PCI_INTEL_BXT_FUNC_PMU_PWR, &argv4);
        if (!obj) {
                dev_err(&dwc->pci->dev, "failed to evaluate _DSM\n");
index 49d685a..45b5540 100644 (file)
@@ -315,6 +315,9 @@ void usb_remove_function(struct usb_configuration *c, struct usb_function *f)
        list_del(&f->list);
        if (f->unbind)
                f->unbind(c, f);
+
+       if (f->bind_deactivated)
+               usb_function_activate(f);
 }
 EXPORT_SYMBOL_GPL(usb_remove_function);
 
@@ -956,12 +959,8 @@ static void remove_config(struct usb_composite_dev *cdev,
 
                f = list_first_entry(&config->functions,
                                struct usb_function, list);
-               list_del(&f->list);
-               if (f->unbind) {
-                       DBG(cdev, "unbind function '%s'/%p\n", f->name, f);
-                       f->unbind(config, f);
-                       /* may free memory for "f" */
-               }
+
+               usb_remove_function(config, f);
        }
        list_del(&config->list);
        if (config->unbind) {
index b4058f0..6a1ce6a 100644 (file)
@@ -281,7 +281,7 @@ static void pn_net_setup(struct net_device *dev)
        dev->tx_queue_len       = 1;
 
        dev->netdev_ops         = &pn_netdev_ops;
-       dev->destructor         = free_netdev;
+       dev->needs_free_netdev  = true;
        dev->header_ops         = &phonet_header_ops;
 }
 
index b9ca0a2..684900f 100644 (file)
@@ -1183,8 +1183,10 @@ dev_release (struct inode *inode, struct file *fd)
 
        /* closing ep0 === shutdown all */
 
-       if (dev->gadget_registered)
+       if (dev->gadget_registered) {
                usb_gadget_unregister_driver (&gadgetfs_driver);
+               dev->gadget_registered = false;
+       }
 
        /* at this point "good" hardware has disconnected the
         * device from USB; the host won't see it any more.
@@ -1677,9 +1679,10 @@ static void
 gadgetfs_suspend (struct usb_gadget *gadget)
 {
        struct dev_data         *dev = get_gadget_data (gadget);
+       unsigned long           flags;
 
        INFO (dev, "suspended from state %d\n", dev->state);
-       spin_lock (&dev->lock);
+       spin_lock_irqsave(&dev->lock, flags);
        switch (dev->state) {
        case STATE_DEV_SETUP:           // VERY odd... host died??
        case STATE_DEV_CONNECTED:
@@ -1690,7 +1693,7 @@ gadgetfs_suspend (struct usb_gadget *gadget)
        default:
                break;
        }
-       spin_unlock (&dev->lock);
+       spin_unlock_irqrestore(&dev->lock, flags);
 }
 
 static struct usb_gadget_driver gadgetfs_driver = {
index ccabb51..7635fd7 100644 (file)
@@ -442,23 +442,16 @@ static void set_link_state(struct dummy_hcd *dum_hcd)
                /* Report reset and disconnect events to the driver */
                if (dum->driver && (disconnect || reset)) {
                        stop_activity(dum);
-                       spin_unlock(&dum->lock);
                        if (reset)
                                usb_gadget_udc_reset(&dum->gadget, dum->driver);
                        else
                                dum->driver->disconnect(&dum->gadget);
-                       spin_lock(&dum->lock);
                }
        } else if (dum_hcd->active != dum_hcd->old_active) {
-               if (dum_hcd->old_active && dum->driver->suspend) {
-                       spin_unlock(&dum->lock);
+               if (dum_hcd->old_active && dum->driver->suspend)
                        dum->driver->suspend(&dum->gadget);
-                       spin_lock(&dum->lock);
-               } else if (!dum_hcd->old_active &&  dum->driver->resume) {
-                       spin_unlock(&dum->lock);
+               else if (!dum_hcd->old_active &&  dum->driver->resume)
                        dum->driver->resume(&dum->gadget);
-                       spin_lock(&dum->lock);
-               }
        }
 
        dum_hcd->old_status = dum_hcd->port_status;
@@ -983,7 +976,9 @@ static int dummy_udc_stop(struct usb_gadget *g)
        struct dummy_hcd        *dum_hcd = gadget_to_dummy_hcd(g);
        struct dummy            *dum = dum_hcd->dum;
 
+       spin_lock_irq(&dum->lock);
        dum->driver = NULL;
+       spin_unlock_irq(&dum->lock);
 
        return 0;
 }
index 6cf0785..f2cbd7f 100644 (file)
@@ -2470,11 +2470,8 @@ static void stop_activity(struct net2280 *dev, struct usb_gadget_driver *driver)
                nuke(&dev->ep[i]);
 
        /* report disconnect; the driver is already quiesced */
-       if (driver) {
-               spin_unlock(&dev->lock);
+       if (driver)
                driver->disconnect(&dev->gadget);
-               spin_lock(&dev->lock);
-       }
 
        usb_reinit(dev);
 }
@@ -3348,8 +3345,6 @@ next_endpoints:
                BIT(PCI_RETRY_ABORT_INTERRUPT))
 
 static void handle_stat1_irqs(struct net2280 *dev, u32 stat)
-__releases(dev->lock)
-__acquires(dev->lock)
 {
        struct net2280_ep       *ep;
        u32                     tmp, num, mask, scratch;
@@ -3390,14 +3385,12 @@ __acquires(dev->lock)
                        if (disconnect || reset) {
                                stop_activity(dev, dev->driver);
                                ep0_start(dev);
-                               spin_unlock(&dev->lock);
                                if (reset)
                                        usb_gadget_udc_reset
                                                (&dev->gadget, dev->driver);
                                else
                                        (dev->driver->disconnect)
                                                (&dev->gadget);
-                               spin_lock(&dev->lock);
                                return;
                        }
                }
index 1f1687e..fddf273 100644 (file)
@@ -2119,11 +2119,12 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
 {
        u32 temp, port_offset, port_count;
        int i;
-       u8 major_revision;
+       u8 major_revision, minor_revision;
        struct xhci_hub *rhub;
 
        temp = readl(addr);
        major_revision = XHCI_EXT_PORT_MAJOR(temp);
+       minor_revision = XHCI_EXT_PORT_MINOR(temp);
 
        if (major_revision == 0x03) {
                rhub = &xhci->usb3_rhub;
@@ -2137,7 +2138,9 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports,
                return;
        }
        rhub->maj_rev = XHCI_EXT_PORT_MAJOR(temp);
-       rhub->min_rev = XHCI_EXT_PORT_MINOR(temp);
+
+       if (rhub->min_rev < minor_revision)
+               rhub->min_rev = minor_revision;
 
        /* Port offset and count in the third dword, see section 7.2 */
        temp = readl(addr + 2);
index fcf1f3f..783e668 100644 (file)
@@ -201,6 +201,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
        if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
                        pdev->device == 0x1042)
                xhci->quirks |= XHCI_BROKEN_STREAMS;
+       if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
+                       pdev->device == 0x1142)
+               xhci->quirks |= XHCI_TRUST_TX_LENGTH;
 
        if (pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241)
                xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_7;
@@ -213,13 +216,12 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 #ifdef CONFIG_ACPI
 static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev)
 {
-       static const u8 intel_dsm_uuid[] = {
-               0xb7, 0x0c, 0x34, 0xac, 0x01, 0xe9, 0xbf, 0x45,
-               0xb7, 0xe6, 0x2b, 0x34, 0xec, 0x93, 0x1e, 0x23,
-       };
+       static const guid_t intel_dsm_guid =
+               GUID_INIT(0xac340cb7, 0xe901, 0x45bf,
+                         0xb7, 0xe6, 0x2b, 0x34, 0xec, 0x93, 0x1e, 0x23);
        union acpi_object *obj;
 
-       obj = acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), intel_dsm_uuid, 3, 1,
+       obj = acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), &intel_dsm_guid, 3, 1,
                                NULL);
        ACPI_FREE(obj);
 }
index 07397bd..81251aa 100644 (file)
@@ -55,13 +55,13 @@ struct ucsi {
 
 static int ucsi_acpi_cmd(struct ucsi *ucsi, struct ucsi_control *ctrl)
 {
-       uuid_le uuid = UUID_LE(0x6f8398c2, 0x7ca4, 0x11e4,
-                              0xad, 0x36, 0x63, 0x10, 0x42, 0xb5, 0x00, 0x8f);
+       guid_t guid = GUID_INIT(0x6f8398c2, 0x7ca4, 0x11e4,
+                               0xad, 0x36, 0x63, 0x10, 0x42, 0xb5, 0x00, 0x8f);
        union acpi_object *obj;
 
        ucsi->data->ctrl.raw_cmd = ctrl->raw_cmd;
 
-       obj = acpi_evaluate_dsm(ACPI_HANDLE(ucsi->dev), uuid.b, 1, 1, NULL);
+       obj = acpi_evaluate_dsm(ACPI_HANDLE(ucsi->dev), &guid, 1, 1, NULL);
        if (!obj) {
                dev_err(ucsi->dev, "%s: failed to evaluate _DSM\n", __func__);
                return -EIO;
index d5a7b21..c2ce252 100644 (file)
@@ -105,8 +105,8 @@ enum wcove_typec_role {
        WCOVE_ROLE_DEVICE,
 };
 
-static uuid_le uuid = UUID_LE(0x482383f0, 0x2876, 0x4e49,
-                             0x86, 0x85, 0xdb, 0x66, 0x21, 0x1a, 0xf0, 0x37);
+static guid_t guid = GUID_INIT(0x482383f0, 0x2876, 0x4e49,
+                              0x86, 0x85, 0xdb, 0x66, 0x21, 0x1a, 0xf0, 0x37);
 
 static int wcove_typec_func(struct wcove_typec *wcove,
                            enum wcove_typec_func func, int param)
@@ -118,7 +118,7 @@ static int wcove_typec_func(struct wcove_typec *wcove,
        tmp.type = ACPI_TYPE_INTEGER;
        tmp.integer.value = param;
 
-       obj = acpi_evaluate_dsm(ACPI_HANDLE(wcove->dev), uuid.b, 1, func,
+       obj = acpi_evaluate_dsm(ACPI_HANDLE(wcove->dev), &guid, 1, func,
                                &argv4);
        if (!obj) {
                dev_err(wcove->dev, "%s: failed to evaluate _DSM\n", __func__);
@@ -314,7 +314,7 @@ static int wcove_typec_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       if (!acpi_check_dsm(ACPI_HANDLE(&pdev->dev), uuid.b, 0, 0x1f)) {
+       if (!acpi_check_dsm(ACPI_HANDLE(&pdev->dev), &guid, 0, 0x1f)) {
                dev_err(&pdev->dev, "Missing _DSM functions\n");
                return -ENODEV;
        }
index 687ebb0..41d7979 100644 (file)
@@ -1048,7 +1048,7 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs)
 
        for (i = 0; i < (128 - edid[2]) / DETAILED_TIMING_DESCRIPTION_SIZE;
             i++, block += DETAILED_TIMING_DESCRIPTION_SIZE)
-               if (PIXEL_CLOCK)
+               if (PIXEL_CLOCK != 0)
                        edt[num++] = block - edid;
 
        /* Yikes, EDID data is totally useless */
index ec2e7e3..449fcea 100644 (file)
@@ -1646,8 +1646,9 @@ static int ufx_usb_probe(struct usb_interface *interface,
        dev_dbg(dev->gdev, "%s %s - serial #%s\n",
                usbdev->manufacturer, usbdev->product, usbdev->serial);
        dev_dbg(dev->gdev, "vid_%04x&pid_%04x&rev_%04x driver's ufx_data struct at %p\n",
-               usbdev->descriptor.idVendor, usbdev->descriptor.idProduct,
-               usbdev->descriptor.bcdDevice, dev);
+               le16_to_cpu(usbdev->descriptor.idVendor),
+               le16_to_cpu(usbdev->descriptor.idProduct),
+               le16_to_cpu(usbdev->descriptor.bcdDevice), dev);
        dev_dbg(dev->gdev, "console enable=%d\n", console);
        dev_dbg(dev->gdev, "fb_defio enable=%d\n", fb_defio);
 
index 6a3c353..05ef657 100644 (file)
@@ -1105,8 +1105,8 @@ static int dlfb_ops_blank(int blank_mode, struct fb_info *info)
        char *bufptr;
        struct urb *urb;
 
-       pr_info("/dev/fb%d FB_BLANK mode %d --> %d\n",
-               info->node, dev->blank_mode, blank_mode);
+       pr_debug("/dev/fb%d FB_BLANK mode %d --> %d\n",
+                info->node, dev->blank_mode, blank_mode);
 
        if ((dev->blank_mode == FB_BLANK_POWERDOWN) &&
            (blank_mode != FB_BLANK_POWERDOWN)) {
@@ -1613,8 +1613,9 @@ static int dlfb_usb_probe(struct usb_interface *interface,
        pr_info("%s %s - serial #%s\n",
                usbdev->manufacturer, usbdev->product, usbdev->serial);
        pr_info("vid_%04x&pid_%04x&rev_%04x driver's dlfb_data struct at %p\n",
-               usbdev->descriptor.idVendor, usbdev->descriptor.idProduct,
-               usbdev->descriptor.bcdDevice, dev);
+               le16_to_cpu(usbdev->descriptor.idVendor),
+               le16_to_cpu(usbdev->descriptor.idProduct),
+               le16_to_cpu(usbdev->descriptor.bcdDevice), dev);
        pr_info("console enable=%d\n", console);
        pr_info("fb_defio enable=%d\n", fb_defio);
        pr_info("shadow enable=%d\n", shadow);
index f9718f0..badee04 100644 (file)
@@ -1630,16 +1630,14 @@ static void viafb_init_proc(struct viafb_shared *shared)
 }
 static void viafb_remove_proc(struct viafb_shared *shared)
 {
-       struct proc_dir_entry *viafb_entry = shared->proc_entry,
-               *iga1_entry = shared->iga1_proc_entry,
-               *iga2_entry = shared->iga2_proc_entry;
+       struct proc_dir_entry *viafb_entry = shared->proc_entry;
 
        if (!viafb_entry)
                return;
 
-       remove_proc_entry("output_devices", iga2_entry);
+       remove_proc_entry("output_devices", shared->iga2_proc_entry);
        remove_proc_entry("iga2", viafb_entry);
-       remove_proc_entry("output_devices", iga1_entry);
+       remove_proc_entry("output_devices", shared->iga1_proc_entry);
        remove_proc_entry("iga1", viafb_entry);
        remove_proc_entry("supported_output_devices", viafb_entry);
 
index 408c174..22caf80 100644 (file)
@@ -663,6 +663,12 @@ static int virtballoon_restore(struct virtio_device *vdev)
 }
 #endif
 
+static int virtballoon_validate(struct virtio_device *vdev)
+{
+       __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
+       return 0;
+}
+
 static unsigned int features[] = {
        VIRTIO_BALLOON_F_MUST_TELL_HOST,
        VIRTIO_BALLOON_F_STATS_VQ,
@@ -675,6 +681,7 @@ static struct virtio_driver virtio_balloon_driver = {
        .driver.name =  KBUILD_MODNAME,
        .driver.owner = THIS_MODULE,
        .id_table =     id_table,
+       .validate =     virtballoon_validate,
        .probe =        virtballoon_probe,
        .remove =       virtballoon_remove,
        .config_changed = virtballoon_changed,
index 4ac2ca8..bf13d1e 100644 (file)
@@ -233,12 +233,12 @@ static int tmem_cleancache_init_fs(size_t pagesize)
        return xen_tmem_new_pool(uuid_private, 0, pagesize);
 }
 
-static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize)
+static int tmem_cleancache_init_shared_fs(uuid_t *uuid, size_t pagesize)
 {
        struct tmem_pool_uuid shared_uuid;
 
-       shared_uuid.uuid_lo = *(u64 *)uuid;
-       shared_uuid.uuid_hi = *(u64 *)(&uuid[8]);
+       shared_uuid.uuid_lo = *(u64 *)&uuid->b[0];
+       shared_uuid.uuid_hi = *(u64 *)&uuid->b[8];
        return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
 }
 
index 3062cce..782d4d0 100644 (file)
@@ -350,7 +350,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 {
        struct sockaddr_rxrpc srx;
        struct afs_server *server;
-       struct uuid_v1 *r;
+       struct afs_uuid *r;
        unsigned loop;
        __be32 *b;
        int ret;
@@ -380,7 +380,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
                }
 
                _debug("unmarshall UUID");
-               call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL);
+               call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
                if (!call->request)
                        return -ENOMEM;
 
@@ -453,7 +453,7 @@ static int afs_deliver_cb_probe(struct afs_call *call)
 static void SRXAFSCB_ProbeUuid(struct work_struct *work)
 {
        struct afs_call *call = container_of(work, struct afs_call, work);
-       struct uuid_v1 *r = call->request;
+       struct afs_uuid *r = call->request;
 
        struct {
                __be32  match;
@@ -476,7 +476,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
  */
 static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 {
-       struct uuid_v1 *r;
+       struct afs_uuid *r;
        unsigned loop;
        __be32 *b;
        int ret;
@@ -502,15 +502,15 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
                }
 
                _debug("unmarshall UUID");
-               call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL);
+               call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
                if (!call->request)
                        return -ENOMEM;
 
                b = call->buffer;
                r = call->request;
-               r->time_low                     = b[0];
-               r->time_mid                     = htons(ntohl(b[1]));
-               r->time_hi_and_version          = htons(ntohl(b[2]));
+               r->time_low                     = ntohl(b[0]);
+               r->time_mid                     = ntohl(b[1]);
+               r->time_hi_and_version          = ntohl(b[2]);
                r->clock_seq_hi_and_reserved    = ntohl(b[3]);
                r->clock_seq_low                = ntohl(b[4]);
 
index 3936729..4e25566 100644 (file)
@@ -410,6 +410,15 @@ struct afs_interface {
        unsigned        mtu;            /* MTU of interface */
 };
 
+struct afs_uuid {
+       __be32          time_low;                       /* low part of timestamp */
+       __be16          time_mid;                       /* mid part of timestamp */
+       __be16          time_hi_and_version;            /* high part of timestamp and version  */
+       __u8            clock_seq_hi_and_reserved;      /* clock seq hi and variant */
+       __u8            clock_seq_low;                  /* clock seq low */
+       __u8            node[6];                        /* spatially unique node ID (MAC addr) */
+};
+
 /*****************************************************************************/
 /*
  * cache.c
@@ -544,7 +553,7 @@ extern int afs_drop_inode(struct inode *);
  * main.c
  */
 extern struct workqueue_struct *afs_wq;
-extern struct uuid_v1 afs_uuid;
+extern struct afs_uuid afs_uuid;
 
 /*
  * misc.c
index 51d7d17..9944770 100644 (file)
@@ -31,7 +31,7 @@ static char *rootcell;
 module_param(rootcell, charp, 0);
 MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
 
-struct uuid_v1 afs_uuid;
+struct afs_uuid afs_uuid;
 struct workqueue_struct *afs_wq;
 
 /*
index f52d925..dcad3a6 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1541,7 +1541,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
        ssize_t ret;
 
        /* enforce forwards compatibility on users */
-       if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) {
+       if (unlikely(iocb->aio_reserved2)) {
                pr_debug("EINVAL: reserve field set\n");
                return -EINVAL;
        }
@@ -1568,6 +1568,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
        req->common.ki_pos = iocb->aio_offset;
        req->common.ki_complete = aio_complete;
        req->common.ki_flags = iocb_flags(req->common.ki_filp);
+       req->common.ki_hint = file_write_hint(file);
 
        if (iocb->aio_flags & IOCB_FLAG_RESFD) {
                /*
@@ -1586,6 +1587,18 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
                req->common.ki_flags |= IOCB_EVENTFD;
        }
 
+       ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags);
+       if (unlikely(ret)) {
+               pr_debug("EINVAL: aio_rw_flags\n");
+               goto out_put_req;
+       }
+
+       if ((req->common.ki_flags & IOCB_NOWAIT) &&
+                       !(req->common.ki_flags & IOCB_DIRECT)) {
+               ret = -EOPNOTSUPP;
+               goto out_put_req;
+       }
+
        ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
        if (unlikely(ret)) {
                pr_debug("EFAULT: aio_key\n");
index 734cbf8..dd9f1be 100644 (file)
@@ -344,7 +344,7 @@ static int autofs_dev_ioctl_fail(struct file *fp,
        int status;
 
        token = (autofs_wqt_t) param->fail.token;
-       status = param->fail.status ? param->fail.status : -ENOENT;
+       status = param->fail.status < 0 ? param->fail.status : -ENOENT;
        return autofs4_wait_release(sbi, token, status);
 }
 
index 519599d..a7df151 100644 (file)
@@ -225,6 +225,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
        bio_init(&bio, vecs, nr_pages);
        bio.bi_bdev = bdev;
        bio.bi_iter.bi_sector = pos >> 9;
+       bio.bi_write_hint = iocb->ki_hint;
        bio.bi_private = current;
        bio.bi_end_io = blkdev_bio_end_io_simple;
 
@@ -262,8 +263,11 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
        if (vecs != inline_vecs)
                kfree(vecs);
 
-       if (unlikely(bio.bi_error))
-               return bio.bi_error;
+       if (unlikely(bio.bi_status))
+               ret = blk_status_to_errno(bio.bi_status);
+
+       bio_uninit(&bio);
+
        return ret;
 }
 
@@ -288,16 +292,18 @@ static void blkdev_bio_end_io(struct bio *bio)
        bool should_dirty = dio->should_dirty;
 
        if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
-               if (bio->bi_error && !dio->bio.bi_error)
-                       dio->bio.bi_error = bio->bi_error;
+               if (bio->bi_status && !dio->bio.bi_status)
+                       dio->bio.bi_status = bio->bi_status;
        } else {
                if (!dio->is_sync) {
                        struct kiocb *iocb = dio->iocb;
-                       ssize_t ret = dio->bio.bi_error;
+                       ssize_t ret;
 
-                       if (likely(!ret)) {
+                       if (likely(!dio->bio.bi_status)) {
                                ret = dio->size;
                                iocb->ki_pos += ret;
+                       } else {
+                               ret = blk_status_to_errno(dio->bio.bi_status);
                        }
 
                        dio->iocb->ki_complete(iocb, ret, 0);
@@ -334,7 +340,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
        bool is_read = (iov_iter_rw(iter) == READ), is_sync;
        loff_t pos = iocb->ki_pos;
        blk_qc_t qc = BLK_QC_T_NONE;
-       int ret;
+       int ret = 0;
 
        if ((pos | iov_iter_alignment(iter)) &
            (bdev_logical_block_size(bdev) - 1))
@@ -358,12 +364,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
        for (;;) {
                bio->bi_bdev = bdev;
                bio->bi_iter.bi_sector = pos >> 9;
+               bio->bi_write_hint = iocb->ki_hint;
                bio->bi_private = dio;
                bio->bi_end_io = blkdev_bio_end_io;
 
                ret = bio_iov_iter_get_pages(bio, iter);
                if (unlikely(ret)) {
-                       bio->bi_error = ret;
+                       bio->bi_status = BLK_STS_IOERR;
                        bio_endio(bio);
                        break;
                }
@@ -412,7 +419,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
        }
        __set_current_state(TASK_RUNNING);
 
-       ret = dio->bio.bi_error;
+       if (!ret)
+               ret = blk_status_to_errno(dio->bio.bi_status);
        if (likely(!ret))
                ret = dio->size;
 
@@ -436,7 +444,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
 static __init int blkdev_init(void)
 {
-       blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio));
+       blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
        if (!blkdev_dio_pool)
                return -ENOMEM;
        return 0;
index b8622e4..d87ac27 100644 (file)
@@ -310,7 +310,8 @@ struct btrfs_dio_private {
         * The original bio may be split to several sub-bios, this is
         * done during endio of sub-bios
         */
-       int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int);
+       blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
+                       blk_status_t);
 };
 
 /*
index ab14c2e..4ded1c3 100644 (file)
@@ -2129,7 +2129,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
        /* mutex is not held! This is not save if IO is not yet completed
         * on umount */
        iodone_w_error = 0;
-       if (bp->bi_error)
+       if (bp->bi_status)
                iodone_w_error = 1;
 
        BUG_ON(NULL == block);
@@ -2143,7 +2143,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
                if ((dev_state->state->print_mask &
                     BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
                        pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
-                              bp->bi_error,
+                              bp->bi_status,
                               btrfsic_get_block_type(dev_state->state, block),
                               block->logical_bytenr, dev_state->name,
                               block->dev_bytenr, block->mirror_num);
index 10e6b28..a2fad39 100644 (file)
@@ -155,7 +155,7 @@ static void end_compressed_bio_read(struct bio *bio)
        unsigned long index;
        int ret;
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                cb->errors = 1;
 
        /* if there are more bios still pending for this compressed
@@ -268,7 +268,7 @@ static void end_compressed_bio_write(struct bio *bio)
        struct page *page;
        unsigned long index;
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                cb->errors = 1;
 
        /* if there are more bios still pending for this compressed
@@ -287,7 +287,7 @@ static void end_compressed_bio_write(struct bio *bio)
                                         cb->start,
                                         cb->start + cb->len - 1,
                                         NULL,
-                                        bio->bi_error ? 0 : 1);
+                                        bio->bi_status ? 0 : 1);
        cb->compressed_pages[0]->mapping = NULL;
 
        end_compressed_writeback(inode, cb);
@@ -320,7 +320,7 @@ out:
  * This also checksums the file bytes and gets things ready for
  * the end io hooks.
  */
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
                                 unsigned long len, u64 disk_start,
                                 unsigned long compressed_len,
                                 struct page **compressed_pages,
@@ -335,13 +335,13 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
        struct page *page;
        u64 first_byte = disk_start;
        struct block_device *bdev;
-       int ret;
+       blk_status_t ret;
        int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
        WARN_ON(start & ((u64)PAGE_SIZE - 1));
        cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
        if (!cb)
-               return -ENOMEM;
+               return BLK_STS_RESOURCE;
        refcount_set(&cb->pending_bios, 0);
        cb->errors = 0;
        cb->inode = inode;
@@ -358,7 +358,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
        bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
        if (!bio) {
                kfree(cb);
-               return -ENOMEM;
+               return BLK_STS_RESOURCE;
        }
        bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
        bio->bi_private = cb;
@@ -368,17 +368,17 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
        /* create and submit bios for the compressed pages */
        bytes_left = compressed_len;
        for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
+               int submit = 0;
+
                page = compressed_pages[pg_index];
                page->mapping = inode->i_mapping;
                if (bio->bi_iter.bi_size)
-                       ret = io_tree->ops->merge_bio_hook(page, 0,
+                       submit = io_tree->ops->merge_bio_hook(page, 0,
                                                           PAGE_SIZE,
                                                           bio, 0);
-               else
-                       ret = 0;
 
                page->mapping = NULL;
-               if (ret || bio_add_page(bio, page, PAGE_SIZE, 0) <
+               if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
                    PAGE_SIZE) {
                        bio_get(bio);
 
@@ -400,7 +400,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 
                        ret = btrfs_map_bio(fs_info, bio, 0, 1);
                        if (ret) {
-                               bio->bi_error = ret;
+                               bio->bi_status = ret;
                                bio_endio(bio);
                        }
 
@@ -434,7 +434,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 
        ret = btrfs_map_bio(fs_info, bio, 0, 1);
        if (ret) {
-               bio->bi_error = ret;
+               bio->bi_status = ret;
                bio_endio(bio);
        }
 
@@ -569,7 +569,7 @@ next:
  * After the compressed pages are read, we copy the bytes into the
  * bio we were passed and then call the bio end_io calls
  */
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                                 int mirror_num, unsigned long bio_flags)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -586,7 +586,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        u64 em_len;
        u64 em_start;
        struct extent_map *em;
-       int ret = -ENOMEM;
+       blk_status_t ret = BLK_STS_RESOURCE;
        int faili = 0;
        u32 *sums;
 
@@ -600,7 +600,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                                   PAGE_SIZE);
        read_unlock(&em_tree->lock);
        if (!em)
-               return -EIO;
+               return BLK_STS_IOERR;
 
        compressed_len = em->block_len;
        cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
@@ -638,7 +638,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                                                              __GFP_HIGHMEM);
                if (!cb->compressed_pages[pg_index]) {
                        faili = pg_index - 1;
-                       ret = -ENOMEM;
+                       ret = BLK_STS_RESOURCE;
                        goto fail2;
                }
        }
@@ -659,19 +659,19 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        refcount_set(&cb->pending_bios, 1);
 
        for (pg_index = 0; pg_index < nr_pages; pg_index++) {
+               int submit = 0;
+
                page = cb->compressed_pages[pg_index];
                page->mapping = inode->i_mapping;
                page->index = em_start >> PAGE_SHIFT;
 
                if (comp_bio->bi_iter.bi_size)
-                       ret = tree->ops->merge_bio_hook(page, 0,
+                       submit = tree->ops->merge_bio_hook(page, 0,
                                                        PAGE_SIZE,
                                                        comp_bio, 0);
-               else
-                       ret = 0;
 
                page->mapping = NULL;
-               if (ret || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
+               if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
                    PAGE_SIZE) {
                        bio_get(comp_bio);
 
@@ -697,7 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
                        ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
                        if (ret) {
-                               comp_bio->bi_error = ret;
+                               comp_bio->bi_status = ret;
                                bio_endio(comp_bio);
                        }
 
@@ -726,7 +726,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
        ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
        if (ret) {
-               comp_bio->bi_error = ret;
+               comp_bio->bi_status = ret;
                bio_endio(comp_bio);
        }
 
index 39ec43a..680d426 100644 (file)
@@ -48,12 +48,12 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
                              unsigned long total_out, u64 disk_start,
                              struct bio *bio);
 
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
                                  unsigned long len, u64 disk_start,
                                  unsigned long compressed_len,
                                  struct page **compressed_pages,
                                  unsigned long nr_pages);
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                                 int mirror_num, unsigned long bio_flags);
 
 enum btrfs_compression_type {
index 4f8f75d..a0d0c79 100644 (file)
@@ -3078,8 +3078,8 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
 struct btrfs_dio_private;
 int btrfs_del_csums(struct btrfs_trans_handle *trans,
                    struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
                              u64 logical_offset);
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
@@ -3094,7 +3094,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root,
                           struct btrfs_ordered_sum *sums);
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
                       u64 file_start, int contig);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
                             struct list_head *list, int search_commit);
index 5f678dc..6036d15 100644 (file)
@@ -87,7 +87,7 @@ struct btrfs_end_io_wq {
        bio_end_io_t *end_io;
        void *private;
        struct btrfs_fs_info *info;
-       int error;
+       blk_status_t status;
        enum btrfs_wq_endio_type metadata;
        struct list_head list;
        struct btrfs_work work;
@@ -131,7 +131,7 @@ struct async_submit_bio {
         */
        u64 bio_offset;
        struct btrfs_work work;
-       int error;
+       blk_status_t status;
 };
 
 /*
@@ -799,7 +799,7 @@ static void end_workqueue_bio(struct bio *bio)
        btrfs_work_func_t func;
 
        fs_info = end_io_wq->info;
-       end_io_wq->error = bio->bi_error;
+       end_io_wq->status = bio->bi_status;
 
        if (bio_op(bio) == REQ_OP_WRITE) {
                if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
@@ -836,19 +836,19 @@ static void end_workqueue_bio(struct bio *bio)
        btrfs_queue_work(wq, &end_io_wq->work);
 }
 
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
                        enum btrfs_wq_endio_type metadata)
 {
        struct btrfs_end_io_wq *end_io_wq;
 
        end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
        if (!end_io_wq)
-               return -ENOMEM;
+               return BLK_STS_RESOURCE;
 
        end_io_wq->private = bio->bi_private;
        end_io_wq->end_io = bio->bi_end_io;
        end_io_wq->info = info;
-       end_io_wq->error = 0;
+       end_io_wq->status = 0;
        end_io_wq->bio = bio;
        end_io_wq->metadata = metadata;
 
@@ -868,14 +868,14 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
 static void run_one_async_start(struct btrfs_work *work)
 {
        struct async_submit_bio *async;
-       int ret;
+       blk_status_t ret;
 
        async = container_of(work, struct  async_submit_bio, work);
        ret = async->submit_bio_start(async->inode, async->bio,
                                      async->mirror_num, async->bio_flags,
                                      async->bio_offset);
        if (ret)
-               async->error = ret;
+               async->status = ret;
 }
 
 static void run_one_async_done(struct btrfs_work *work)
@@ -898,8 +898,8 @@ static void run_one_async_done(struct btrfs_work *work)
                wake_up(&fs_info->async_submit_wait);
 
        /* If an error occurred we just want to clean up the bio and move on */
-       if (async->error) {
-               async->bio->bi_error = async->error;
+       if (async->status) {
+               async->bio->bi_status = async->status;
                bio_endio(async->bio);
                return;
        }
@@ -916,18 +916,17 @@ static void run_one_async_free(struct btrfs_work *work)
        kfree(async);
 }
 
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
-                       struct bio *bio, int mirror_num,
-                       unsigned long bio_flags,
-                       u64 bio_offset,
-                       extent_submit_bio_hook_t *submit_bio_start,
-                       extent_submit_bio_hook_t *submit_bio_done)
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
+               struct inode *inode, struct bio *bio, int mirror_num,
+               unsigned long bio_flags, u64 bio_offset,
+               extent_submit_bio_hook_t *submit_bio_start,
+               extent_submit_bio_hook_t *submit_bio_done)
 {
        struct async_submit_bio *async;
 
        async = kmalloc(sizeof(*async), GFP_NOFS);
        if (!async)
-               return -ENOMEM;
+               return BLK_STS_RESOURCE;
 
        async->inode = inode;
        async->bio = bio;
@@ -941,7 +940,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
        async->bio_flags = bio_flags;
        async->bio_offset = bio_offset;
 
-       async->error = 0;
+       async->status = 0;
 
        atomic_inc(&fs_info->nr_async_submits);
 
@@ -959,7 +958,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
        return 0;
 }
 
-static int btree_csum_one_bio(struct bio *bio)
+static blk_status_t btree_csum_one_bio(struct bio *bio)
 {
        struct bio_vec *bvec;
        struct btrfs_root *root;
@@ -972,12 +971,12 @@ static int btree_csum_one_bio(struct bio *bio)
                        break;
        }
 
-       return ret;
+       return errno_to_blk_status(ret);
 }
 
-static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
-                                   int mirror_num, unsigned long bio_flags,
-                                   u64 bio_offset)
+static blk_status_t __btree_submit_bio_start(struct inode *inode,
+               struct bio *bio, int mirror_num, unsigned long bio_flags,
+               u64 bio_offset)
 {
        /*
         * when we're called for a write, we're already in the async
@@ -986,11 +985,11 @@ static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
        return btree_csum_one_bio(bio);
 }
 
-static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
-                                int mirror_num, unsigned long bio_flags,
-                                u64 bio_offset)
+static blk_status_t __btree_submit_bio_done(struct inode *inode,
+               struct bio *bio, int mirror_num, unsigned long bio_flags,
+               u64 bio_offset)
 {
-       int ret;
+       blk_status_t ret;
 
        /*
         * when we're called for a write, we're already in the async
@@ -998,7 +997,7 @@ static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
         */
        ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1);
        if (ret) {
-               bio->bi_error = ret;
+               bio->bi_status = ret;
                bio_endio(bio);
        }
        return ret;
@@ -1015,13 +1014,13 @@ static int check_async_write(unsigned long bio_flags)
        return 1;
 }
 
-static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
+static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
                                 int mirror_num, unsigned long bio_flags,
                                 u64 bio_offset)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        int async = check_async_write(bio_flags);
-       int ret;
+       blk_status_t ret;
 
        if (bio_op(bio) != REQ_OP_WRITE) {
                /*
@@ -1054,7 +1053,7 @@ static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
        return 0;
 
 out_w_error:
-       bio->bi_error = ret;
+       bio->bi_status = ret;
        bio_endio(bio);
        return ret;
 }
@@ -1820,7 +1819,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
        end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
        bio = end_io_wq->bio;
 
-       bio->bi_error = end_io_wq->error;
+       bio->bi_status = end_io_wq->status;
        bio->bi_private = end_io_wq->private;
        bio->bi_end_io = end_io_wq->end_io;
        kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
@@ -3497,11 +3496,11 @@ static void btrfs_end_empty_barrier(struct bio *bio)
  * any device where the flush fails with eopnotsupp are flagged as not-barrier
  * capable
  */
-static int write_dev_flush(struct btrfs_device *device, int wait)
+static blk_status_t write_dev_flush(struct btrfs_device *device, int wait)
 {
        struct request_queue *q = bdev_get_queue(device->bdev);
        struct bio *bio;
-       int ret = 0;
+       blk_status_t ret = 0;
 
        if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
                return 0;
@@ -3513,8 +3512,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
 
                wait_for_completion(&device->flush_wait);
 
-               if (bio->bi_error) {
-                       ret = bio->bi_error;
+               if (bio->bi_status) {
+                       ret = bio->bi_status;
                        btrfs_dev_stat_inc_and_print(device,
                                BTRFS_DEV_STAT_FLUSH_ERRS);
                }
@@ -3533,7 +3532,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
        device->flush_bio = NULL;
        bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
        if (!bio)
-               return -ENOMEM;
+               return BLK_STS_RESOURCE;
 
        bio->bi_end_io = btrfs_end_empty_barrier;
        bio->bi_bdev = device->bdev;
@@ -3558,7 +3557,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
        struct btrfs_device *dev;
        int errors_send = 0;
        int errors_wait = 0;
-       int ret;
+       blk_status_t ret;
 
        /* send down all the barriers */
        head = &info->fs_devices->devices;
index 21f1ceb..c581927 100644 (file)
@@ -118,13 +118,13 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
 u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
 void btrfs_csum_final(u32 crc, u8 *result);
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
                        enum btrfs_wq_endio_type metadata);
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
-                       struct bio *bio, int mirror_num,
-                       unsigned long bio_flags, u64 bio_offset,
-                       extent_submit_bio_hook_t *submit_bio_start,
-                       extent_submit_bio_hook_t *submit_bio_done);
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
+               struct inode *inode, struct bio *bio, int mirror_num,
+               unsigned long bio_flags, u64 bio_offset,
+               extent_submit_bio_hook_t *submit_bio_start,
+               extent_submit_bio_hook_t *submit_bio_done);
 unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
 int btrfs_write_tree_block(struct extent_buffer *buf);
 int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
index d3619e0..d1cd601 100644 (file)
@@ -174,7 +174,8 @@ int __init extent_io_init(void)
                goto free_state_cache;
 
        btrfs_bioset = bioset_create(BIO_POOL_SIZE,
-                                    offsetof(struct btrfs_io_bio, bio));
+                                    offsetof(struct btrfs_io_bio, bio),
+                                    BIOSET_NEED_BVECS);
        if (!btrfs_bioset)
                goto free_buffer_cache;
 
@@ -2399,6 +2400,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
        struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
        struct bio *bio;
        int read_mode = 0;
+       blk_status_t status;
        int ret;
 
        BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -2431,11 +2433,12 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
                read_mode, failrec->this_mirror, failrec->in_validation);
 
-       ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
+       status = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
                                         failrec->bio_flags, 0);
-       if (ret) {
+       if (status) {
                free_io_failure(BTRFS_I(inode), failrec);
                bio_put(bio);
+               ret = blk_status_to_errno(status);
        }
 
        return ret;
@@ -2474,6 +2477,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
  */
 static void end_bio_extent_writepage(struct bio *bio)
 {
+       int error = blk_status_to_errno(bio->bi_status);
        struct bio_vec *bvec;
        u64 start;
        u64 end;
@@ -2503,7 +2507,7 @@ static void end_bio_extent_writepage(struct bio *bio)
                start = page_offset(page);
                end = start + bvec->bv_offset + bvec->bv_len - 1;
 
-               end_extent_writepage(page, bio->bi_error, start, end);
+               end_extent_writepage(page, error, start, end);
                end_page_writeback(page);
        }
 
@@ -2536,7 +2540,7 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
 static void end_bio_extent_readpage(struct bio *bio)
 {
        struct bio_vec *bvec;
-       int uptodate = !bio->bi_error;
+       int uptodate = !bio->bi_status;
        struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
        struct extent_io_tree *tree;
        u64 offset = 0;
@@ -2556,7 +2560,7 @@ static void end_bio_extent_readpage(struct bio *bio)
 
                btrfs_debug(fs_info,
                        "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
-                       (u64)bio->bi_iter.bi_sector, bio->bi_error,
+                       (u64)bio->bi_iter.bi_sector, bio->bi_status,
                        io_bio->mirror_num);
                tree = &BTRFS_I(inode)->io_tree;
 
@@ -2615,7 +2619,7 @@ static void end_bio_extent_readpage(struct bio *bio)
                                ret = bio_readpage_error(bio, offset, page,
                                                         start, end, mirror);
                                if (ret == 0) {
-                                       uptodate = !bio->bi_error;
+                                       uptodate = !bio->bi_status;
                                        offset += len;
                                        continue;
                                }
@@ -2673,7 +2677,7 @@ readpage_ok:
                endio_readpage_release_extent(tree, extent_start, extent_len,
                                              uptodate);
        if (io_bio->end_io)
-               io_bio->end_io(io_bio, bio->bi_error);
+               io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
        bio_put(bio);
 }
 
@@ -2743,7 +2747,7 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
                                       unsigned long bio_flags)
 {
-       int ret = 0;
+       blk_status_t ret = 0;
        struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
        struct page *page = bvec->bv_page;
        struct extent_io_tree *tree = bio->bi_private;
@@ -2761,7 +2765,7 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
                btrfsic_submit_bio(bio);
 
        bio_put(bio);
-       return ret;
+       return blk_status_to_errno(ret);
 }
 
 static int merge_bio(struct extent_io_tree *tree, struct page *page,
@@ -2826,6 +2830,7 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
        bio_add_page(bio, page, page_size, offset);
        bio->bi_end_io = end_io_func;
        bio->bi_private = tree;
+       bio->bi_write_hint = page->mapping->host->i_write_hint;
        bio_set_op_attrs(bio, op, op_flags);
        if (wbc) {
                wbc_init_bio(wbc, bio);
@@ -3707,7 +3712,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
                BUG_ON(!eb);
                done = atomic_dec_and_test(&eb->io_pages);
 
-               if (bio->bi_error ||
+               if (bio->bi_status ||
                    test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
                        ClearPageUptodate(page);
                        set_btree_ioerr(page);
index 1eafa2f..487ca02 100644 (file)
@@ -92,9 +92,9 @@ struct btrfs_inode;
 struct btrfs_io_bio;
 struct io_failure_record;
 
-typedef        int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio,
-                                      int mirror_num, unsigned long bio_flags,
-                                      u64 bio_offset);
+typedef        blk_status_t (extent_submit_bio_hook_t)(struct inode *inode,
+               struct bio *bio, int mirror_num, unsigned long bio_flags,
+               u64 bio_offset);
 struct extent_io_ops {
        /*
         * The following callbacks must be allways defined, the function
index 64fcb31..5b1c709 100644 (file)
@@ -160,7 +160,7 @@ static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err)
        kfree(bio->csum_allocated);
 }
 
-static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
+static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
                                   u64 logical_offset, u32 *dst, int dio)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -182,7 +182,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 
        path = btrfs_alloc_path();
        if (!path)
-               return -ENOMEM;
+               return BLK_STS_RESOURCE;
 
        nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
        if (!dst) {
@@ -191,7 +191,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
                                        csum_size, GFP_NOFS);
                        if (!btrfs_bio->csum_allocated) {
                                btrfs_free_path(path);
-                               return -ENOMEM;
+                               return BLK_STS_RESOURCE;
                        }
                        btrfs_bio->csum = btrfs_bio->csum_allocated;
                        btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
@@ -303,12 +303,12 @@ next:
        return 0;
 }
 
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
 {
        return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0);
 }
 
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
 {
        return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1);
 }
@@ -433,7 +433,7 @@ fail:
        return ret;
 }
 
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
                       u64 file_start, int contig)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -452,7 +452,7 @@ int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
        sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
                       GFP_NOFS);
        if (!sums)
-               return -ENOMEM;
+               return BLK_STS_RESOURCE;
 
        sums->len = bio->bi_iter.bi_size;
        INIT_LIST_HEAD(&sums->list);
index da1096e..59e2dcc 100644 (file)
@@ -1875,12 +1875,29 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
        ssize_t num_written = 0;
        bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
        ssize_t err;
-       loff_t pos;
-       size_t count;
+       loff_t pos = iocb->ki_pos;
+       size_t count = iov_iter_count(from);
        loff_t oldsize;
        int clean_page = 0;
 
-       inode_lock(inode);
+       if ((iocb->ki_flags & IOCB_NOWAIT) &&
+                       (iocb->ki_flags & IOCB_DIRECT)) {
+               /* Don't sleep on inode rwsem */
+               if (!inode_trylock(inode))
+                       return -EAGAIN;
+               /*
+                * We will allocate space in case nodatacow is not set,
+                * so bail
+                */
+               if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+                                             BTRFS_INODE_PREALLOC)) ||
+                   check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
+                       inode_unlock(inode);
+                       return -EAGAIN;
+               }
+       } else
+               inode_lock(inode);
+
        err = generic_write_checks(iocb, from);
        if (err <= 0) {
                inode_unlock(inode);
@@ -1914,8 +1931,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
         */
        update_time_for_write(inode);
 
-       pos = iocb->ki_pos;
-       count = iov_iter_count(from);
        start_pos = round_down(pos, fs_info->sectorsize);
        oldsize = i_size_read(inode);
        if (start_pos > oldsize) {
@@ -3071,13 +3086,19 @@ out:
        return offset;
 }
 
+static int btrfs_file_open(struct inode *inode, struct file *filp)
+{
+       filp->f_mode |= FMODE_AIO_NOWAIT;
+       return generic_file_open(inode, filp);
+}
+
 const struct file_operations btrfs_file_operations = {
        .llseek         = btrfs_file_llseek,
        .read_iter      = generic_file_read_iter,
        .splice_read    = generic_file_splice_read,
        .write_iter     = btrfs_file_write_iter,
        .mmap           = btrfs_file_mmap,
-       .open           = generic_file_open,
+       .open           = btrfs_file_open,
        .release        = btrfs_release_file,
        .fsync          = btrfs_sync_file,
        .fallocate      = btrfs_fallocate,
index a97fdc1..baacc18 100644 (file)
@@ -38,6 +38,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
 {
        SHASH_DESC_ON_STACK(shash, tfm);
        u32 *ctx = (u32 *)shash_desc_ctx(shash);
+       u32 retval;
        int err;
 
        shash->tfm = tfm;
@@ -47,5 +48,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
        err = crypto_shash_update(shash, address, length);
        BUG_ON(err);
 
-       return *ctx;
+       retval = *ctx;
+       barrier_data(ctx);
+       return retval;
 }
index ef3c98c..556c930 100644 (file)
@@ -842,13 +842,12 @@ retry:
                                NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
                                PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
                                PAGE_SET_WRITEBACK);
-               ret = btrfs_submit_compressed_write(inode,
+               if (btrfs_submit_compressed_write(inode,
                                    async_extent->start,
                                    async_extent->ram_size,
                                    ins.objectid,
                                    ins.offset, async_extent->pages,
-                                   async_extent->nr_pages);
-               if (ret) {
+                                   async_extent->nr_pages)) {
                        struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
                        struct page *p = async_extent->pages[0];
                        const u64 start = async_extent->start;
@@ -1901,11 +1900,11 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
  * At IO completion time the cums attached on the ordered extent record
  * are inserted into the btree
  */
-static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
-                                   int mirror_num, unsigned long bio_flags,
-                                   u64 bio_offset)
+static blk_status_t __btrfs_submit_bio_start(struct inode *inode,
+               struct bio *bio, int mirror_num, unsigned long bio_flags,
+               u64 bio_offset)
 {
-       int ret = 0;
+       blk_status_t ret = 0;
 
        ret = btrfs_csum_one_bio(inode, bio, 0, 0);
        BUG_ON(ret); /* -ENOMEM */
@@ -1920,16 +1919,16 @@ static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
  * At IO completion time the cums attached on the ordered extent record
  * are inserted into the btree
  */
-static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
-                         int mirror_num, unsigned long bio_flags,
-                         u64 bio_offset)
+static blk_status_t __btrfs_submit_bio_done(struct inode *inode,
+               struct bio *bio, int mirror_num, unsigned long bio_flags,
+               u64 bio_offset)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-       int ret;
+       blk_status_t ret;
 
        ret = btrfs_map_bio(fs_info, bio, mirror_num, 1);
        if (ret) {
-               bio->bi_error = ret;
+               bio->bi_status = ret;
                bio_endio(bio);
        }
        return ret;
@@ -1939,14 +1938,14 @@ static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
  * extent_io.c submission hook. This does the right thing for csum calculation
  * on write, or reading the csums from the tree before a read
  */
-static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
+static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
                          int mirror_num, unsigned long bio_flags,
                          u64 bio_offset)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
-       int ret = 0;
+       blk_status_t ret = 0;
        int skip_sum;
        int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
 
@@ -1991,8 +1990,8 @@ mapit:
        ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
 
 out:
-       if (ret < 0) {
-               bio->bi_error = ret;
+       if (ret) {
+               bio->bi_status = ret;
                bio_endio(bio);
        }
        return ret;
@@ -8037,7 +8036,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
        struct bio_vec *bvec;
        int i;
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                goto end;
 
        ASSERT(bio->bi_vcnt == 1);
@@ -8116,7 +8115,7 @@ static void btrfs_retry_endio(struct bio *bio)
        int ret;
        int i;
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                goto end;
 
        uptodate = 1;
@@ -8141,8 +8140,8 @@ end:
        bio_put(bio);
 }
 
-static int __btrfs_subio_endio_read(struct inode *inode,
-                                   struct btrfs_io_bio *io_bio, int err)
+static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
+               struct btrfs_io_bio *io_bio, blk_status_t err)
 {
        struct btrfs_fs_info *fs_info;
        struct bio_vec *bvec;
@@ -8184,7 +8183,7 @@ try_again:
                                io_bio->mirror_num,
                                btrfs_retry_endio, &done);
                if (ret) {
-                       err = ret;
+                       err = errno_to_blk_status(ret);
                        goto next;
                }
 
@@ -8211,8 +8210,8 @@ next:
        return err;
 }
 
-static int btrfs_subio_endio_read(struct inode *inode,
-                                 struct btrfs_io_bio *io_bio, int err)
+static blk_status_t btrfs_subio_endio_read(struct inode *inode,
+               struct btrfs_io_bio *io_bio, blk_status_t err)
 {
        bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
@@ -8232,7 +8231,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
        struct inode *inode = dip->inode;
        struct bio *dio_bio;
        struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
-       int err = bio->bi_error;
+       blk_status_t err = bio->bi_status;
 
        if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
                err = btrfs_subio_endio_read(inode, io_bio, err);
@@ -8243,11 +8242,11 @@ static void btrfs_endio_direct_read(struct bio *bio)
 
        kfree(dip);
 
-       dio_bio->bi_error = bio->bi_error;
-       dio_end_io(dio_bio, bio->bi_error);
+       dio_bio->bi_status = bio->bi_status;
+       dio_end_io(dio_bio);
 
        if (io_bio->end_io)
-               io_bio->end_io(io_bio, err);
+               io_bio->end_io(io_bio, blk_status_to_errno(err));
        bio_put(bio);
 }
 
@@ -8299,20 +8298,20 @@ static void btrfs_endio_direct_write(struct bio *bio)
        struct bio *dio_bio = dip->dio_bio;
 
        __endio_write_update_ordered(dip->inode, dip->logical_offset,
-                                    dip->bytes, !bio->bi_error);
+                                    dip->bytes, !bio->bi_status);
 
        kfree(dip);
 
-       dio_bio->bi_error = bio->bi_error;
-       dio_end_io(dio_bio, bio->bi_error);
+       dio_bio->bi_status = bio->bi_status;
+       dio_end_io(dio_bio);
        bio_put(bio);
 }
 
-static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
+static blk_status_t __btrfs_submit_bio_start_direct_io(struct inode *inode,
                                    struct bio *bio, int mirror_num,
                                    unsigned long bio_flags, u64 offset)
 {
-       int ret;
+       blk_status_t ret;
        ret = btrfs_csum_one_bio(inode, bio, offset, 1);
        BUG_ON(ret); /* -ENOMEM */
        return 0;
@@ -8321,7 +8320,7 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
 static void btrfs_end_dio_bio(struct bio *bio)
 {
        struct btrfs_dio_private *dip = bio->bi_private;
-       int err = bio->bi_error;
+       blk_status_t err = bio->bi_status;
 
        if (err)
                btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
@@ -8351,7 +8350,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
        if (dip->errors) {
                bio_io_error(dip->orig_bio);
        } else {
-               dip->dio_bio->bi_error = 0;
+               dip->dio_bio->bi_status = 0;
                bio_endio(dip->orig_bio);
        }
 out:
@@ -8368,14 +8367,14 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
        return bio;
 }
 
-static inline int btrfs_lookup_and_bind_dio_csum(struct inode *inode,
+static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
                                                 struct btrfs_dio_private *dip,
                                                 struct bio *bio,
                                                 u64 file_offset)
 {
        struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
        struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
-       int ret;
+       blk_status_t ret;
 
        /*
         * We load all the csum data we need when we submit
@@ -8406,7 +8405,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_dio_private *dip = bio->bi_private;
        bool write = bio_op(bio) == REQ_OP_WRITE;
-       int ret;
+       blk_status_t ret;
 
        if (async_submit)
                async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
@@ -8649,7 +8648,7 @@ free_ordered:
         * callbacks - they require an allocated dip and a clone of dio_bio.
         */
        if (io_bio && dip) {
-               io_bio->bi_error = -EIO;
+               io_bio->bi_status = BLK_STS_IOERR;
                bio_endio(io_bio);
                /*
                 * The end io callbacks free our dip, do the final put on io_bio
@@ -8668,12 +8667,12 @@ free_ordered:
                        unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
                              file_offset + dio_bio->bi_iter.bi_size - 1);
 
-               dio_bio->bi_error = -EIO;
+               dio_bio->bi_status = BLK_STS_IOERR;
                /*
                 * Releases and cleans up our dio_bio, no need to bio_put()
                 * nor bio_endio()/bio_io_error() against dio_bio.
                 */
-               dio_end_io(dio_bio, ret);
+               dio_end_io(dio_bio);
        }
        if (io_bio)
                bio_put(io_bio);
@@ -8755,6 +8754,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
                        dio_data.overwrite = 1;
                        inode_unlock(inode);
                        relock = true;
+               } else if (iocb->ki_flags & IOCB_NOWAIT) {
+                       ret = -EAGAIN;
+                       goto out;
                }
                ret = btrfs_delalloc_reserve_space(inode, offset, count);
                if (ret)
index d8ea0eb..f3d30d9 100644 (file)
@@ -871,7 +871,7 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)
  * this frees the rbio and runs through all the bios in the
  * bio_list and calls end_io on them
  */
-static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
+static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
 {
        struct bio *cur = bio_list_get(&rbio->bio_list);
        struct bio *next;
@@ -884,7 +884,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
        while (cur) {
                next = cur->bi_next;
                cur->bi_next = NULL;
-               cur->bi_error = err;
+               cur->bi_status = err;
                bio_endio(cur);
                cur = next;
        }
@@ -897,7 +897,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
 static void raid_write_end_io(struct bio *bio)
 {
        struct btrfs_raid_bio *rbio = bio->bi_private;
-       int err = bio->bi_error;
+       blk_status_t err = bio->bi_status;
        int max_errors;
 
        if (err)
@@ -914,7 +914,7 @@ static void raid_write_end_io(struct bio *bio)
        max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
                     0 : rbio->bbio->max_errors;
        if (atomic_read(&rbio->error) > max_errors)
-               err = -EIO;
+               err = BLK_STS_IOERR;
 
        rbio_orig_end_io(rbio, err);
 }
@@ -1092,7 +1092,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
                 * devices or if they are not contiguous
                 */
                if (last_end == disk_start && stripe->dev->bdev &&
-                   !last->bi_error &&
+                   !last->bi_status &&
                    last->bi_bdev == stripe->dev->bdev) {
                        ret = bio_add_page(last, page, PAGE_SIZE, 0);
                        if (ret == PAGE_SIZE)
@@ -1448,7 +1448,7 @@ static void raid_rmw_end_io(struct bio *bio)
 {
        struct btrfs_raid_bio *rbio = bio->bi_private;
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                fail_bio_stripe(rbio, bio);
        else
                set_bio_pages_uptodate(bio);
@@ -1991,7 +1991,7 @@ static void raid_recover_end_io(struct bio *bio)
         * we only read stripe pages off the disk, set them
         * up to date if there were no errors
         */
-       if (bio->bi_error)
+       if (bio->bi_status)
                fail_bio_stripe(rbio, bio);
        else
                set_bio_pages_uptodate(bio);
@@ -2530,7 +2530,7 @@ static void raid56_parity_scrub_end_io(struct bio *bio)
 {
        struct btrfs_raid_bio *rbio = bio->bi_private;
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                fail_bio_stripe(rbio, bio);
        else
                set_bio_pages_uptodate(bio);
index c7b45eb..ba5595d 100644 (file)
@@ -95,7 +95,7 @@ struct scrub_bio {
        struct scrub_ctx        *sctx;
        struct btrfs_device     *dev;
        struct bio              *bio;
-       int                     err;
+       blk_status_t            status;
        u64                     logical;
        u64                     physical;
 #if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
@@ -1668,14 +1668,14 @@ leave_nomem:
 
 struct scrub_bio_ret {
        struct completion event;
-       int error;
+       blk_status_t status;
 };
 
 static void scrub_bio_wait_endio(struct bio *bio)
 {
        struct scrub_bio_ret *ret = bio->bi_private;
 
-       ret->error = bio->bi_error;
+       ret->status = bio->bi_status;
        complete(&ret->event);
 }
 
@@ -1693,7 +1693,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
        int ret;
 
        init_completion(&done.event);
-       done.error = 0;
+       done.status = 0;
        bio->bi_iter.bi_sector = page->logical >> 9;
        bio->bi_private = &done;
        bio->bi_end_io = scrub_bio_wait_endio;
@@ -1705,7 +1705,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
                return ret;
 
        wait_for_completion(&done.event);
-       if (done.error)
+       if (done.status)
                return -EIO;
 
        return 0;
@@ -1937,7 +1937,7 @@ again:
                bio->bi_bdev = sbio->dev->bdev;
                bio->bi_iter.bi_sector = sbio->physical >> 9;
                bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-               sbio->err = 0;
+               sbio->status = 0;
        } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
                   spage->physical_for_dev_replace ||
                   sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -1992,7 +1992,7 @@ static void scrub_wr_bio_end_io(struct bio *bio)
        struct scrub_bio *sbio = bio->bi_private;
        struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
 
-       sbio->err = bio->bi_error;
+       sbio->status = bio->bi_status;
        sbio->bio = bio;
 
        btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
@@ -2007,7 +2007,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
        int i;
 
        WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
-       if (sbio->err) {
+       if (sbio->status) {
                struct btrfs_dev_replace *dev_replace =
                        &sbio->sctx->fs_info->dev_replace;
 
@@ -2341,7 +2341,7 @@ again:
                bio->bi_bdev = sbio->dev->bdev;
                bio->bi_iter.bi_sector = sbio->physical >> 9;
                bio_set_op_attrs(bio, REQ_OP_READ, 0);
-               sbio->err = 0;
+               sbio->status = 0;
        } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
                   spage->physical ||
                   sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -2377,7 +2377,7 @@ static void scrub_missing_raid56_end_io(struct bio *bio)
        struct scrub_block *sblock = bio->bi_private;
        struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                sblock->no_io_error_seen = 0;
 
        bio_put(bio);
@@ -2588,7 +2588,7 @@ static void scrub_bio_end_io(struct bio *bio)
        struct scrub_bio *sbio = bio->bi_private;
        struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
 
-       sbio->err = bio->bi_error;
+       sbio->status = bio->bi_status;
        sbio->bio = bio;
 
        btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
@@ -2601,7 +2601,7 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
        int i;
 
        BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
-       if (sbio->err) {
+       if (sbio->status) {
                for (i = 0; i < sbio->page_count; i++) {
                        struct scrub_page *spage = sbio->pagev[i];
 
@@ -3004,7 +3004,7 @@ static void scrub_parity_bio_endio(struct bio *bio)
        struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
        struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
                          sparity->nsectors);
 
index 017b67d..84a4959 100644 (file)
@@ -6042,9 +6042,10 @@ static void btrfs_end_bio(struct bio *bio)
        struct btrfs_bio *bbio = bio->bi_private;
        int is_orig_bio = 0;
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                atomic_inc(&bbio->error);
-               if (bio->bi_error == -EIO || bio->bi_error == -EREMOTEIO) {
+               if (bio->bi_status == BLK_STS_IOERR ||
+                   bio->bi_status == BLK_STS_TARGET) {
                        unsigned int stripe_index =
                                btrfs_io_bio(bio)->stripe_index;
                        struct btrfs_device *dev;
@@ -6082,13 +6083,13 @@ static void btrfs_end_bio(struct bio *bio)
                 * beyond the tolerance of the btrfs bio
                 */
                if (atomic_read(&bbio->error) > bbio->max_errors) {
-                       bio->bi_error = -EIO;
+                       bio->bi_status = BLK_STS_IOERR;
                } else {
                        /*
                         * this bio is actually up to date, we didn't
                         * go over the max number of errors
                         */
-                       bio->bi_error = 0;
+                       bio->bi_status = 0;
                }
 
                btrfs_end_bbio(bbio, bio);
@@ -6199,7 +6200,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
 
                btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
                bio->bi_iter.bi_sector = logical >> 9;
-               bio->bi_error = -EIO;
+               bio->bi_status = BLK_STS_IOERR;
                btrfs_end_bbio(bbio, bio);
        }
 }
index 161be58..5c2cba8 100644 (file)
@@ -49,7 +49,7 @@
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
-                        struct writeback_control *wbc);
+                        enum rw_hint hint, struct writeback_control *wbc);
 
 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
 
@@ -1829,7 +1829,8 @@ int __block_write_full_page(struct inode *inode, struct page *page,
        do {
                struct buffer_head *next = bh->b_this_page;
                if (buffer_async_write(bh)) {
-                       submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
+                       submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
+                                       inode->i_write_hint, wbc);
                        nr_underway++;
                }
                bh = next;
@@ -1883,7 +1884,8 @@ recover:
                struct buffer_head *next = bh->b_this_page;
                if (buffer_async_write(bh)) {
                        clear_buffer_dirty(bh);
-                       submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
+                       submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
+                                       inode->i_write_hint, wbc);
                        nr_underway++;
                }
                bh = next;
@@ -3038,7 +3040,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
        if (unlikely(bio_flagged(bio, BIO_QUIET)))
                set_bit(BH_Quiet, &bh->b_state);
 
-       bh->b_end_io(bh, !bio->bi_error);
+       bh->b_end_io(bh, !bio->bi_status);
        bio_put(bio);
 }
 
@@ -3091,7 +3093,7 @@ void guard_bio_eod(int op, struct bio *bio)
 }
 
 static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
-                        struct writeback_control *wbc)
+                        enum rw_hint write_hint, struct writeback_control *wbc)
 {
        struct bio *bio;
 
@@ -3120,6 +3122,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 
        bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
        bio->bi_bdev = bh->b_bdev;
+       bio->bi_write_hint = write_hint;
 
        bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
        BUG_ON(bio->bi_iter.bi_size != bh->b_size);
@@ -3142,7 +3145,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 
 int submit_bh(int op, int op_flags, struct buffer_head *bh)
 {
-       return submit_bh_wbc(op, op_flags, bh, NULL);
+       return submit_bh_wbc(op, op_flags, bh, 0, NULL);
 }
 EXPORT_SYMBOL(submit_bh);
 
index 987044b..59cb307 100644 (file)
@@ -131,6 +131,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
        }
 
        if (new_mode != old_mode) {
+               newattrs.ia_ctime = current_time(inode);
                newattrs.ia_mode = new_mode;
                newattrs.ia_valid = ATTR_MODE;
                ret = __ceph_setattr(inode, &newattrs);
index e8f11fa..7df550c 100644 (file)
@@ -91,6 +91,10 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
                ceph_mdsc_put_request(req);
                if (!inode)
                        return ERR_PTR(-ESTALE);
+               if (inode->i_nlink == 0) {
+                       iput(inode);
+                       return ERR_PTR(-ESTALE);
+               }
        }
 
        return d_obtain_alias(inode);
index dcce79b..4de6cdd 100644 (file)
@@ -2022,7 +2022,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                    attr->ia_size > inode->i_size) {
                        i_size_write(inode, attr->ia_size);
                        inode->i_blocks = calc_inode_blocks(attr->ia_size);
-                       inode->i_ctime = attr->ia_ctime;
                        ci->i_reported_size = attr->ia_size;
                        dirtied |= CEPH_CAP_FILE_EXCL;
                } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
@@ -2044,7 +2043,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                     inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
                     attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
                     only ? "ctime only" : "ignored");
-               inode->i_ctime = attr->ia_ctime;
                if (only) {
                        /*
                         * if kernel wants to dirty ctime but nothing else,
@@ -2067,7 +2065,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
        if (dirtied) {
                inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
                                                           &prealloc_cf);
-               inode->i_ctime = current_time(inode);
+               inode->i_ctime = attr->ia_ctime;
        }
 
        release &= issued;
@@ -2085,6 +2083,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                req->r_inode_drop = release;
                req->r_args.setattr.mask = cpu_to_le32(mask);
                req->r_num_caps = 1;
+               req->r_stamp = attr->ia_ctime;
                err = ceph_mdsc_do_request(mdsc, NULL, req);
        }
        dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
index f38e56f..0c05df4 100644 (file)
@@ -1687,7 +1687,6 @@ struct ceph_mds_request *
 ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
 {
        struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
-       struct timespec ts;
 
        if (!req)
                return ERR_PTR(-ENOMEM);
@@ -1706,8 +1705,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
        init_completion(&req->r_safe_completion);
        INIT_LIST_HEAD(&req->r_unsafe_item);
 
-       ktime_get_real_ts(&ts);
-       req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran);
+       req->r_stamp = timespec_trunc(current_kernel_time(), mdsc->fsc->sb->s_time_gran);
 
        req->r_op = op;
        req->r_direct_mode = mode;
index 0fd081b..fcef706 100644 (file)
@@ -3271,7 +3271,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
        if (!is_sync_kiocb(iocb))
                ctx->iocb = iocb;
 
-       if (to->type & ITER_IOVEC)
+       if (to->type == ITER_IOVEC)
                ctx->should_dirty = true;
 
        rc = setup_aio_ctx_iter(ctx, to, READ);
index b085319..3b147dc 100644 (file)
@@ -810,7 +810,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
 
        if (!pages) {
                pages = vmalloc(max_pages * sizeof(struct page *));
-               if (!bv) {
+               if (!pages) {
                        kvfree(bv);
                        return -ENOMEM;
                }
index 27bc360..a723df3 100644 (file)
@@ -849,8 +849,13 @@ cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
                     struct cifs_fid *fid, __u16 search_flags,
                     struct cifs_search_info *srch_inf)
 {
-       return CIFSFindFirst(xid, tcon, path, cifs_sb,
-                            &fid->netfid, search_flags, srch_inf, true);
+       int rc;
+
+       rc = CIFSFindFirst(xid, tcon, path, cifs_sb,
+                          &fid->netfid, search_flags, srch_inf, true);
+       if (rc)
+               cifs_dbg(FYI, "find first failed=%d\n", rc);
+       return rc;
 }
 
 static int
index c586918..7e48561 100644 (file)
@@ -982,7 +982,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
        rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
        kfree(utf16_path);
        if (rc) {
-               cifs_dbg(VFS, "open dir failed\n");
+               cifs_dbg(FYI, "open dir failed rc=%d\n", rc);
                return rc;
        }
 
@@ -992,7 +992,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
        rc = SMB2_query_directory(xid, tcon, fid->persistent_fid,
                                  fid->volatile_fid, 0, srch_inf);
        if (rc) {
-               cifs_dbg(VFS, "query directory failed\n");
+               cifs_dbg(FYI, "query directory failed rc=%d\n", rc);
                SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
        }
        return rc;
@@ -1809,7 +1809,8 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
 
        sg = init_sg(rqst, sign);
        if (!sg) {
-               cifs_dbg(VFS, "%s: Failed to init sg %d", __func__, rc);
+               cifs_dbg(VFS, "%s: Failed to init sg", __func__);
+               rc = -ENOMEM;
                goto free_req;
        }
 
@@ -1817,6 +1818,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
        iv = kzalloc(iv_len, GFP_KERNEL);
        if (!iv) {
                cifs_dbg(VFS, "%s: Failed to alloc IV", __func__);
+               rc = -ENOMEM;
                goto free_sg;
        }
        iv[0] = 3;
index 3cb5c9e..de50e74 100644 (file)
@@ -188,8 +188,6 @@ static int cifs_creation_time_get(struct dentry *dentry, struct inode *inode,
        pcreatetime = (__u64 *)value;
        *pcreatetime = CIFS_I(inode)->createtime;
        return sizeof(__u64);
-
-       return rc;
 }
 
 
index 8b2a994..a66f662 100644 (file)
@@ -138,6 +138,14 @@ struct config_item *config_item_get(struct config_item *item)
 }
 EXPORT_SYMBOL(config_item_get);
 
+struct config_item *config_item_get_unless_zero(struct config_item *item)
+{
+       if (item && kref_get_unless_zero(&item->ci_kref))
+               return item;
+       return NULL;
+}
+EXPORT_SYMBOL(config_item_get_unless_zero);
+
 static void config_item_cleanup(struct config_item *item)
 {
        struct config_item_type *t = item->ci_type;
index a6ab012..c8aabba 100644 (file)
@@ -83,14 +83,13 @@ static int create_link(struct config_item *parent_item,
        ret = -ENOMEM;
        sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
        if (sl) {
-               sl->sl_target = config_item_get(item);
                spin_lock(&configfs_dirent_lock);
                if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
                        spin_unlock(&configfs_dirent_lock);
-                       config_item_put(item);
                        kfree(sl);
                        return -ENOENT;
                }
+               sl->sl_target = config_item_get(item);
                list_add(&sl->sl_list, &target_sd->s_links);
                spin_unlock(&configfs_dirent_lock);
                ret = configfs_create_link(sl, parent_item->ci_dentry,
index a409a84..6181e95 100644 (file)
@@ -129,7 +129,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
                        goto errout;
                }
                err = submit_bio_wait(bio);
-               if ((err == 0) && bio->bi_error)
+               if (err == 0 && bio->bi_status)
                        err = -EIO;
                bio_put(bio);
                if (err)
index 2a6889b..9187f3b 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -859,6 +859,7 @@ int dax_writeback_mapping_range(struct address_space *mapping,
                        if (ret < 0)
                                goto out;
                }
+               start_index = indices[pvec.nr - 1] + 1;
        }
 out:
        put_dax(dax_dev);
index cddf397..a9f995f 100644 (file)
@@ -1494,7 +1494,7 @@ static void check_and_drop(void *_data)
 {
        struct detach_data *data = _data;
 
-       if (!data->mountpoint && !data->select.found)
+       if (!data->mountpoint && list_empty(&data->select.dispose))
                __d_drop(data->select.start);
 }
 
@@ -1536,17 +1536,15 @@ void d_invalidate(struct dentry *dentry)
 
                d_walk(dentry, &data, detach_and_collect, check_and_drop);
 
-               if (data.select.found)
+               if (!list_empty(&data.select.dispose))
                        shrink_dentry_list(&data.select.dispose);
+               else if (!data.mountpoint)
+                       return;
 
                if (data.mountpoint) {
                        detach_mounts(data.mountpoint);
                        dput(data.mountpoint);
                }
-
-               if (!data.mountpoint && !data.select.found)
-                       break;
-
                cond_resched();
        }
 }
index a04ebea..08cf278 100644 (file)
@@ -294,7 +294,7 @@ static void dio_aio_complete_work(struct work_struct *work)
        dio_complete(dio, 0, true);
 }
 
-static int dio_bio_complete(struct dio *dio, struct bio *bio);
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);
 
 /*
  * Asynchronous IO callback. 
@@ -348,13 +348,12 @@ static void dio_bio_end_io(struct bio *bio)
 /**
  * dio_end_io - handle the end io action for the given bio
  * @bio: The direct io bio thats being completed
- * @error: Error if there was one
  *
  * This is meant to be called by any filesystem that uses their own dio_submit_t
  * so that the DIO specific endio actions are dealt with after the filesystem
  * has done it's completion work.
  */
-void dio_end_io(struct bio *bio, int error)
+void dio_end_io(struct bio *bio)
 {
        struct dio *dio = bio->bi_private;
 
@@ -386,6 +385,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
        else
                bio->bi_end_io = dio_bio_end_io;
 
+       bio->bi_write_hint = dio->iocb->ki_hint;
+
        sdio->bio = bio;
        sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
 }
@@ -474,17 +475,20 @@ static struct bio *dio_await_one(struct dio *dio)
 /*
  * Process one completed BIO.  No locks are held.
  */
-static int dio_bio_complete(struct dio *dio, struct bio *bio)
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
 {
        struct bio_vec *bvec;
        unsigned i;
-       int err;
+       blk_status_t err = bio->bi_status;
 
-       if (bio->bi_error)
-               dio->io_error = -EIO;
+       if (err) {
+               if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT))
+                       dio->io_error = -EAGAIN;
+               else
+                       dio->io_error = -EIO;
+       }
 
        if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
-               err = bio->bi_error;
                bio_check_pages_dirty(bio);     /* transfers ownership */
        } else {
                bio_for_each_segment_all(bvec, bio, i) {
@@ -495,7 +499,6 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
                                set_page_dirty_lock(page);
                        put_page(page);
                }
-               err = bio->bi_error;
                bio_put(bio);
        }
        return err;
@@ -539,7 +542,7 @@ static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
                        bio = dio->bio_list;
                        dio->bio_list = bio->bi_private;
                        spin_unlock_irqrestore(&dio->bio_lock, flags);
-                       ret2 = dio_bio_complete(dio, bio);
+                       ret2 = blk_status_to_errno(dio_bio_complete(dio, bio));
                        if (ret == 0)
                                ret = ret2;
                }
@@ -1197,6 +1200,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
        if (iov_iter_rw(iter) == WRITE) {
                dio->op = REQ_OP_WRITE;
                dio->op_flags = REQ_SYNC | REQ_IDLE;
+               if (iocb->ki_flags & IOCB_NOWAIT)
+                       dio->op_flags |= REQ_NOWAIT;
        } else {
                dio->op = REQ_OP_READ;
        }
index 72934df..9041990 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -220,8 +220,26 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 
        if (write) {
                unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
+               unsigned long ptr_size;
                struct rlimit *rlim;
 
+               /*
+                * Since the stack will hold pointers to the strings, we
+                * must account for them as well.
+                *
+                * The size calculation is the entire vma while each arg page is
+                * built, so each time we get here it's calculating how far it
+                * is currently (rather than each call being just the newly
+                * added size from the arg page).  As a result, we need to
+                * always add the entire size of the pointers, so that on the
+                * last call to get_arg_page() we'll actually have the entire
+                * correct size.
+                */
+               ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+               if (ptr_size > ULONG_MAX - size)
+                       goto fail;
+               size += ptr_size;
+
                acct_arg_size(bprm, size / PAGE_SIZE);
 
                /*
@@ -239,13 +257,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
                 *    to work from.
                 */
                rlim = current->signal->rlim;
-               if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
-                       put_page(page);
-                       return NULL;
-               }
+               if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4)
+                       goto fail;
        }
 
        return page;
+
+fail:
+       put_page(page);
+       return NULL;
 }
 
 static void put_arg_page(struct page *page)
index 02ce7e7..58e2eea 100644 (file)
@@ -37,7 +37,11 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
        struct inode *inode = file_inode(iocb->ki_filp);
        ssize_t ret;
 
-       inode_lock_shared(inode);
+       if (!inode_trylock_shared(inode)) {
+               if (iocb->ki_flags & IOCB_NOWAIT)
+                       return -EAGAIN;
+               inode_lock_shared(inode);
+       }
        /*
         * Recheck under inode lock - at this point we are sure it cannot
         * change anymore
@@ -179,7 +183,11 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
        struct inode *inode = file_inode(iocb->ki_filp);
        ssize_t ret;
 
-       inode_lock(inode);
+       if (!inode_trylock(inode)) {
+               if (iocb->ki_flags & IOCB_NOWAIT)
+                       return -EAGAIN;
+               inode_lock(inode);
+       }
        ret = ext4_write_checks(iocb, from);
        if (ret <= 0)
                goto out;
@@ -216,7 +224,12 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                return ext4_dax_write_iter(iocb, from);
 #endif
 
-       inode_lock(inode);
+       if (!inode_trylock(inode)) {
+               if (iocb->ki_flags & IOCB_NOWAIT)
+                       return -EAGAIN;
+               inode_lock(inode);
+       }
+
        ret = ext4_write_checks(iocb, from);
        if (ret <= 0)
                goto out;
@@ -235,9 +248,15 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
        iocb->private = &overwrite;
        /* Check whether we do a DIO overwrite or not */
-       if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio &&
-           ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)))
-               overwrite = 1;
+       if (o_direct && !unaligned_aio) {
+               if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
+                       if (ext4_should_dioread_nolock(inode))
+                               overwrite = 1;
+               } else if (iocb->ki_flags & IOCB_NOWAIT) {
+                       ret = -EAGAIN;
+                       goto out;
+               }
+       }
 
        ret = __generic_file_write_iter(iocb, from);
        inode_unlock(inode);
@@ -435,6 +454,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
                if (ret < 0)
                        return ret;
        }
+
+       /* Set the flags to support nowait AIO */
+       filp->f_mode |= FMODE_AIO_NOWAIT;
+
        return dquot_file_open(inode, filp);
 }
 
index 1a82138..c2fce44 100644 (file)
@@ -85,7 +85,7 @@ static void ext4_finish_bio(struct bio *bio)
                }
 #endif
 
-               if (bio->bi_error) {
+               if (bio->bi_status) {
                        SetPageError(page);
                        mapping_set_error(page->mapping, -EIO);
                }
@@ -104,7 +104,7 @@ static void ext4_finish_bio(struct bio *bio)
                                continue;
                        }
                        clear_buffer_async_write(bh);
-                       if (bio->bi_error)
+                       if (bio->bi_status)
                                buffer_io_error(bh);
                } while ((bh = bh->b_this_page) != head);
                bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
@@ -303,24 +303,25 @@ static void ext4_end_bio(struct bio *bio)
                      bdevname(bio->bi_bdev, b),
                      (long long) bio->bi_iter.bi_sector,
                      (unsigned) bio_sectors(bio),
-                     bio->bi_error)) {
+                     bio->bi_status)) {
                ext4_finish_bio(bio);
                bio_put(bio);
                return;
        }
        bio->bi_end_io = NULL;
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                struct inode *inode = io_end->inode;
 
                ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
                             "(offset %llu size %ld starting block %llu)",
-                            bio->bi_error, inode->i_ino,
+                            bio->bi_status, inode->i_ino,
                             (unsigned long long) io_end->offset,
                             (long) io_end->size,
                             (unsigned long long)
                             bi_sector >> (inode->i_blkbits - 9));
-               mapping_set_error(inode->i_mapping, bio->bi_error);
+               mapping_set_error(inode->i_mapping,
+                               blk_status_to_errno(bio->bi_status));
        }
 
        if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
@@ -349,6 +350,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
        if (bio) {
                int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
                                  REQ_SYNC : 0;
+               io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
                bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
                submit_bio(io->io_bio);
        }
@@ -396,6 +398,7 @@ submit_and_retry:
                ret = io_submit_init_bio(io, bh);
                if (ret)
                        return ret;
+               io->io_bio->bi_write_hint = inode->i_write_hint;
        }
        ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
        if (ret != bh->b_size)
index a81b829..40a5497 100644 (file)
@@ -73,7 +73,7 @@ static void mpage_end_io(struct bio *bio)
        int i;
 
        if (ext4_bio_encrypted(bio)) {
-               if (bio->bi_error) {
+               if (bio->bi_status) {
                        fscrypt_release_ctx(bio->bi_private);
                } else {
                        fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -83,7 +83,7 @@ static void mpage_end_io(struct bio *bio)
        bio_for_each_segment_all(bv, bio, i) {
                struct page *page = bv->bv_page;
 
-               if (!bio->bi_error) {
+               if (!bio->bi_status) {
                        SetPageUptodate(page);
                } else {
                        ClearPageUptodate(page);
index d37c81f..9006cb5 100644 (file)
@@ -3950,7 +3950,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                sb->s_qcop = &ext4_qctl_operations;
        sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
 #endif
-       memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
+       memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
 
        INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
        mutex_init(&sbi->s_orphan_lock);
index 7c0f6bd..36fe820 100644 (file)
@@ -58,12 +58,12 @@ static void f2fs_read_end_io(struct bio *bio)
 #ifdef CONFIG_F2FS_FAULT_INJECTION
        if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
                f2fs_show_injection_info(FAULT_IO);
-               bio->bi_error = -EIO;
+               bio->bi_status = BLK_STS_IOERR;
        }
 #endif
 
        if (f2fs_bio_encrypted(bio)) {
-               if (bio->bi_error) {
+               if (bio->bi_status) {
                        fscrypt_release_ctx(bio->bi_private);
                } else {
                        fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -74,7 +74,7 @@ static void f2fs_read_end_io(struct bio *bio)
        bio_for_each_segment_all(bvec, bio, i) {
                struct page *page = bvec->bv_page;
 
-               if (!bio->bi_error) {
+               if (!bio->bi_status) {
                        if (!PageUptodate(page))
                                SetPageUptodate(page);
                } else {
@@ -102,14 +102,14 @@ static void f2fs_write_end_io(struct bio *bio)
                        unlock_page(page);
                        mempool_free(page, sbi->write_io_dummy);
 
-                       if (unlikely(bio->bi_error))
+                       if (unlikely(bio->bi_status))
                                f2fs_stop_checkpoint(sbi, true);
                        continue;
                }
 
                fscrypt_pullback_bio_page(&page, true);
 
-               if (unlikely(bio->bi_error)) {
+               if (unlikely(bio->bi_status)) {
                        mapping_set_error(page->mapping, -EIO);
                        f2fs_stop_checkpoint(sbi, true);
                }
index 2185c7a..fd2e651 100644 (file)
@@ -1078,6 +1078,7 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address,
 {
        SHASH_DESC_ON_STACK(shash, sbi->s_chksum_driver);
        u32 *ctx = (u32 *)shash_desc_ctx(shash);
+       u32 retval;
        int err;
 
        shash->tfm = sbi->s_chksum_driver;
@@ -1087,7 +1088,9 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address,
        err = crypto_shash_update(shash, address, length);
        BUG_ON(err);
 
-       return *ctx;
+       retval = *ctx;
+       barrier_data(ctx);
+       return retval;
 }
 
 static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc,
index 9684585..ea9f455 100644 (file)
@@ -749,7 +749,7 @@ static void f2fs_submit_discard_endio(struct bio *bio)
 {
        struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
 
-       dc->error = bio->bi_error;
+       dc->error = blk_status_to_errno(bio->bi_status);
        dc->state = D_DONE;
        complete(&dc->wait);
        bio_put(bio);
index 83355ec..0b89b0b 100644 (file)
@@ -1937,7 +1937,7 @@ try_onemore:
        sb->s_time_gran = 1;
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
                (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
-       memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
+       memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
 
        /* init f2fs-specific super block info */
        sbi->valid_super_block = valid_super_block;
index f4e7267..ed051f8 100644 (file)
@@ -243,6 +243,67 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
 }
 #endif
 
+static bool rw_hint_valid(enum rw_hint hint)
+{
+       switch (hint) {
+       case RWF_WRITE_LIFE_NOT_SET:
+       case RWH_WRITE_LIFE_NONE:
+       case RWH_WRITE_LIFE_SHORT:
+       case RWH_WRITE_LIFE_MEDIUM:
+       case RWH_WRITE_LIFE_LONG:
+       case RWH_WRITE_LIFE_EXTREME:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static long fcntl_rw_hint(struct file *file, unsigned int cmd,
+                         unsigned long arg)
+{
+       struct inode *inode = file_inode(file);
+       u64 *argp = (u64 __user *)arg;
+       enum rw_hint hint;
+       u64 h;
+
+       switch (cmd) {
+       case F_GET_FILE_RW_HINT:
+               h = file_write_hint(file);
+               if (copy_to_user(argp, &h, sizeof(*argp)))
+                       return -EFAULT;
+               return 0;
+       case F_SET_FILE_RW_HINT:
+               if (copy_from_user(&h, argp, sizeof(h)))
+                       return -EFAULT;
+               hint = (enum rw_hint) h;
+               if (!rw_hint_valid(hint))
+                       return -EINVAL;
+
+               spin_lock(&file->f_lock);
+               file->f_write_hint = hint;
+               spin_unlock(&file->f_lock);
+               return 0;
+       case F_GET_RW_HINT:
+               h = inode->i_write_hint;
+               if (copy_to_user(argp, &h, sizeof(*argp)))
+                       return -EFAULT;
+               return 0;
+       case F_SET_RW_HINT:
+               if (copy_from_user(&h, argp, sizeof(h)))
+                       return -EFAULT;
+               hint = (enum rw_hint) h;
+               if (!rw_hint_valid(hint))
+                       return -EINVAL;
+
+               inode_lock(inode);
+               inode->i_write_hint = hint;
+               inode_unlock(inode);
+               return 0;
+       default:
+               return -EINVAL;
+       }
+}
+
 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
                struct file *filp)
 {
@@ -337,6 +398,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
        case F_GET_SEALS:
                err = shmem_fcntl(filp, cmd, arg);
                break;
+       case F_GET_RW_HINT:
+       case F_SET_RW_HINT:
+       case F_GET_FILE_RW_HINT:
+       case F_SET_FILE_RW_HINT:
+               err = fcntl_rw_hint(filp, cmd, arg);
+               break;
        default:
                break;
        }
index b7cf65d..aa3d445 100644 (file)
@@ -815,7 +815,6 @@ struct gfs2_sbd {
        atomic_t sd_log_in_flight;
        struct bio *sd_log_bio;
        wait_queue_head_t sd_log_flush_wait;
-       int sd_log_error;
 
        atomic_t sd_reserving_log;
        wait_queue_head_t sd_reserving_log_wait;
index b1f9144..885d36e 100644 (file)
@@ -170,7 +170,7 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
  */
 
 static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
-                                 int error)
+                                 blk_status_t error)
 {
        struct buffer_head *bh, *next;
        struct page *page = bvec->bv_page;
@@ -209,15 +209,13 @@ static void gfs2_end_log_write(struct bio *bio)
        struct page *page;
        int i;
 
-       if (bio->bi_error) {
-               sdp->sd_log_error = bio->bi_error;
-               fs_err(sdp, "Error %d writing to log\n", bio->bi_error);
-       }
+       if (bio->bi_status)
+               fs_err(sdp, "Error %d writing to log\n", bio->bi_status);
 
        bio_for_each_segment_all(bvec, bio, i) {
                page = bvec->bv_page;
                if (page_has_buffers(page))
-                       gfs2_end_log_write_bh(sdp, bvec, bio->bi_error);
+                       gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
                else
                        mempool_free(page, gfs2_page_pool);
        }
index 663ffc1..fabe161 100644 (file)
@@ -201,7 +201,7 @@ static void gfs2_meta_read_endio(struct bio *bio)
                do {
                        struct buffer_head *next = bh->b_this_page;
                        len -= bh->b_size;
-                       bh->b_end_io(bh, !bio->bi_error);
+                       bh->b_end_io(bh, !bio->bi_status);
                        bh = next;
                } while (bh && len);
        }
index ed67548..e76058d 100644 (file)
@@ -176,10 +176,10 @@ static void end_bio_io_page(struct bio *bio)
 {
        struct page *page = bio->bi_private;
 
-       if (!bio->bi_error)
+       if (!bio->bi_status)
                SetPageUptodate(page);
        else
-               pr_warn("error %d reading superblock\n", bio->bi_error);
+               pr_warn("error %d reading superblock\n", bio->bi_status);
        unlock_page(page);
 }
 
@@ -203,7 +203,7 @@ static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
 
        memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
        memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
-       memcpy(s->s_uuid, str->sb_uuid, 16);
+       memcpy(&s->s_uuid, str->sb_uuid, 16);
 }
 
 /**
index 7a51534..e77bc52 100644 (file)
@@ -71,25 +71,14 @@ static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
        return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
 }
 
-static int gfs2_uuid_valid(const u8 *uuid)
-{
-       int i;
-
-       for (i = 0; i < 16; i++) {
-               if (uuid[i])
-                       return 1;
-       }
-       return 0;
-}
-
 static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
 {
        struct super_block *s = sdp->sd_vfs;
-       const u8 *uuid = s->s_uuid;
+
        buf[0] = '\0';
-       if (!gfs2_uuid_valid(uuid))
+       if (uuid_is_null(&s->s_uuid))
                return 0;
-       return snprintf(buf, PAGE_SIZE, "%pUB\n", uuid);
+       return snprintf(buf, PAGE_SIZE, "%pUB\n", &s->s_uuid);
 }
 
 static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
@@ -712,14 +701,13 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
 {
        struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
        struct super_block *s = sdp->sd_vfs;
-       const u8 *uuid = s->s_uuid;
 
        add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
        add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
        if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags))
                add_uevent_var(env, "JOURNALID=%d", sdp->sd_lockstruct.ls_jid);
-       if (gfs2_uuid_valid(uuid))
-               add_uevent_var(env, "UUID=%pUB", uuid);
+       if (!uuid_is_null(&s->s_uuid))
+               add_uevent_var(env, "UUID=%pUB", &s->s_uuid);
        return 0;
 }
 
index dde8613..d44f545 100644 (file)
@@ -200,7 +200,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
                addr = ALIGN(addr, huge_page_size(h));
                vma = find_vma(mm, addr);
                if (TASK_SIZE - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                        return addr;
        }
 
index db59147..f0e5fc7 100644 (file)
@@ -146,6 +146,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
        i_gid_write(inode, 0);
        atomic_set(&inode->i_writecount, 0);
        inode->i_size = 0;
+       inode->i_write_hint = WRITE_LIFE_NOT_SET;
        inode->i_blocks = 0;
        inode->i_bytes = 0;
        inode->i_generation = 0;
index 4b10892..fa6cd5b 100644 (file)
@@ -672,8 +672,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
        struct iomap_dio *dio = bio->bi_private;
        bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
 
-       if (bio->bi_error)
-               iomap_dio_set_error(dio, bio->bi_error);
+       if (bio->bi_status)
+               iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
 
        if (atomic_dec_and_test(&dio->ref)) {
                if (is_sync_kiocb(dio->iocb)) {
@@ -793,6 +793,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
                bio->bi_bdev = iomap->bdev;
                bio->bi_iter.bi_sector =
                        iomap->blkno + ((pos - iomap->offset) >> 9);
+               bio->bi_write_hint = dio->iocb->ki_hint;
                bio->bi_private = dio;
                bio->bi_end_io = iomap_dio_bio_end_io;
 
@@ -881,6 +882,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                flags |= IOMAP_WRITE;
        }
 
+       if (iocb->ki_flags & IOCB_NOWAIT) {
+               if (filemap_range_has_page(mapping, start, end)) {
+                       ret = -EAGAIN;
+                       goto out_free_dio;
+               }
+               flags |= IOMAP_NOWAIT;
+       }
+
        ret = filemap_write_and_wait_range(mapping, start, end);
        if (ret)
                goto out_free_dio;
index bb1da1f..a21f0e9 100644 (file)
@@ -2205,7 +2205,7 @@ static void lbmIODone(struct bio *bio)
 
        bp->l_flag |= lbmDONE;
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                bp->l_flag |= lbmERROR;
 
                jfs_err("lbmIODone: I/O error in JFS log");
index 489aaa1..ce93db3 100644 (file)
@@ -280,7 +280,7 @@ static void metapage_read_end_io(struct bio *bio)
 {
        struct page *page = bio->bi_private;
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                printk(KERN_ERR "metapage_read_end_io: I/O error\n");
                SetPageError(page);
        }
@@ -337,7 +337,7 @@ static void metapage_write_end_io(struct bio *bio)
 
        BUG_ON(!PagePrivate(page));
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                printk(KERN_ERR "metapage_write_end_io: I/O error\n");
                SetPageError(page);
        }
index baff8f8..d6d1486 100644 (file)
@@ -50,7 +50,8 @@ static void mpage_end_io(struct bio *bio)
 
        bio_for_each_segment_all(bv, bio, i) {
                struct page *page = bv->bv_page;
-               page_endio(page, op_is_write(bio_op(bio)), bio->bi_error);
+               page_endio(page, op_is_write(bio_op(bio)),
+                               blk_status_to_errno(bio->bi_status));
        }
 
        bio_put(bio);
@@ -614,6 +615,7 @@ alloc_new:
                        goto confused;
 
                wbc_init_bio(wbc, bio);
+               bio->bi_write_hint = inode->i_write_hint;
        }
 
        /*
index 8bd3e4d..5a44384 100644 (file)
@@ -3488,6 +3488,8 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
                return err;
        }
 
+       put_mnt_ns(old_mnt_ns);
+
        /* Update the pwd and root */
        set_fs_pwd(fs, &root);
        set_fs_root(fs, &root);
index 0ca370d..d8863a8 100644 (file)
@@ -188,7 +188,7 @@ static void bl_end_io_read(struct bio *bio)
 {
        struct parallel_io *par = bio->bi_private;
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                struct nfs_pgio_header *header = par->data;
 
                if (!header->pnfs_error)
@@ -319,7 +319,7 @@ static void bl_end_io_write(struct bio *bio)
        struct parallel_io *par = bio->bi_private;
        struct nfs_pgio_header *header = par->data;
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                if (!header->pnfs_error)
                        header->pnfs_error = -EIO;
                pnfs_set_lo_fail(header->lseg);
index c14758e..390ac9c 100644 (file)
@@ -753,7 +753,6 @@ static void nfs4_callback_free_slot(struct nfs4_session *session,
         * A single slot, so highest used slotid is either 0 or -1
         */
        nfs4_free_slot(tbl, slot);
-       nfs4_slot_tbl_drain_complete(tbl);
        spin_unlock(&tbl->slot_tbl_lock);
 }
 
index 32ccd77..2ac00bf 100644 (file)
@@ -1946,29 +1946,6 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 }
 EXPORT_SYMBOL_GPL(nfs_link);
 
-static void
-nfs_complete_rename(struct rpc_task *task, struct nfs_renamedata *data)
-{
-       struct dentry *old_dentry = data->old_dentry;
-       struct dentry *new_dentry = data->new_dentry;
-       struct inode *old_inode = d_inode(old_dentry);
-       struct inode *new_inode = d_inode(new_dentry);
-
-       nfs_mark_for_revalidate(old_inode);
-
-       switch (task->tk_status) {
-       case 0:
-               if (new_inode != NULL)
-                       nfs_drop_nlink(new_inode);
-               d_move(old_dentry, new_dentry);
-               nfs_set_verifier(new_dentry,
-                                       nfs_save_change_attribute(data->new_dir));
-               break;
-       case -ENOENT:
-               nfs_dentry_handle_enoent(old_dentry);
-       }
-}
-
 /*
  * RENAME
  * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -1999,7 +1976,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 {
        struct inode *old_inode = d_inode(old_dentry);
        struct inode *new_inode = d_inode(new_dentry);
-       struct dentry *dentry = NULL;
+       struct dentry *dentry = NULL, *rehash = NULL;
        struct rpc_task *task;
        int error = -EBUSY;
 
@@ -2022,8 +1999,10 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 * To prevent any new references to the target during the
                 * rename, we unhash the dentry in advance.
                 */
-               if (!d_unhashed(new_dentry))
+               if (!d_unhashed(new_dentry)) {
                        d_drop(new_dentry);
+                       rehash = new_dentry;
+               }
 
                if (d_count(new_dentry) > 2) {
                        int err;
@@ -2040,6 +2019,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                                goto out;
 
                        new_dentry = dentry;
+                       rehash = NULL;
                        new_inode = NULL;
                }
        }
@@ -2048,8 +2028,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (new_inode != NULL)
                NFS_PROTO(new_inode)->return_delegation(new_inode);
 
-       task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
-                                       nfs_complete_rename);
+       task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
        if (IS_ERR(task)) {
                error = PTR_ERR(task);
                goto out;
@@ -2059,9 +2038,27 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (error == 0)
                error = task->tk_status;
        rpc_put_task(task);
+       nfs_mark_for_revalidate(old_inode);
 out:
+       if (rehash)
+               d_rehash(rehash);
        trace_nfs_rename_exit(old_dir, old_dentry,
                        new_dir, new_dentry, error);
+       if (!error) {
+               if (new_inode != NULL)
+                       nfs_drop_nlink(new_inode);
+               /*
+                * The d_move() should be here instead of in an async RPC completion
+                * handler because we need the proper locks to move the dentry.  If
+                * we're interrupted by a signal, the async RPC completion handler
+                * should mark the directories for revalidation.
+                */
+               d_move(old_dentry, new_dentry);
+               nfs_set_verifier(new_dentry,
+                                       nfs_save_change_attribute(new_dir));
+       } else if (error == -ENOENT)
+               nfs_dentry_handle_enoent(old_dentry);
+
        /* new dentry created? */
        if (dentry)
                dput(dentry);
index c08c46a..dbfa189 100644 (file)
@@ -2589,7 +2589,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata,
 
        /* Except MODE, it seems harmless of setting twice. */
        if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE &&
-               attrset[1] & FATTR4_WORD1_MODE)
+               (attrset[1] & FATTR4_WORD1_MODE ||
+                attrset[2] & FATTR4_WORD2_MODE_UMASK))
                sattr->ia_valid &= ~ATTR_MODE;
 
        if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL)
@@ -8416,6 +8417,7 @@ static void nfs4_layoutget_release(void *calldata)
        size_t max_pages = max_response_pages(server);
 
        dprintk("--> %s\n", __func__);
+       nfs4_sequence_free_slot(&lgp->res.seq_res);
        nfs4_free_pages(lgp->args.layout.pages, max_pages);
        pnfs_put_layout_hdr(NFS_I(inode)->layout);
        put_nfs_open_context(lgp->args.ctx);
@@ -8490,7 +8492,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
        /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
        if (status == 0 && lgp->res.layoutp->len)
                lseg = pnfs_layout_process(lgp);
-       nfs4_sequence_free_slot(&lgp->res.seq_res);
        rpc_put_task(task);
        dprintk("<-- %s status=%d\n", __func__, status);
        if (status)
index b34de03..cbf82b0 100644 (file)
@@ -2134,6 +2134,8 @@ again:
        put_rpccred(cred);
        switch (status) {
        case 0:
+       case -EINTR:
+       case -ERESTARTSYS:
                break;
        case -ETIMEDOUT:
                if (clnt->cl_softrtry)
index fb5213a..c862c24 100644 (file)
@@ -219,6 +219,9 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
        u8 *buf, *d, type, assoc;
        int error;
 
+       if (WARN_ON_ONCE(!blk_queue_scsi_passthrough(q)))
+               return -EINVAL;
+
        buf = kzalloc(bufflen, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;
@@ -229,7 +232,6 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
                goto out_free_buf;
        }
        req = scsi_req(rq);
-       scsi_req_init(rq);
 
        error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
        if (error)
index e71f11b..3bc08c3 100644 (file)
@@ -486,7 +486,7 @@ secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }
 #endif
 
 static inline int
-uuid_parse(char **mesg, char *buf, unsigned char **puuid)
+nfsd_uuid_parse(char **mesg, char *buf, unsigned char **puuid)
 {
        int len;
 
@@ -586,7 +586,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
                        if (strcmp(buf, "fsloc") == 0)
                                err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
                        else if (strcmp(buf, "uuid") == 0)
-                               err = uuid_parse(&mesg, buf, &exp.ex_uuid);
+                               err = nfsd_uuid_parse(&mesg, buf, &exp.ex_uuid);
                        else if (strcmp(buf, "secinfo") == 0)
                                err = secinfo_parse(&mesg, buf, &exp);
                        else
index 6f87b2a..e73c86d 100644 (file)
@@ -338,7 +338,7 @@ static void nilfs_end_bio_write(struct bio *bio)
 {
        struct nilfs_segment_buffer *segbuf = bio->bi_private;
 
-       if (bio->bi_error)
+       if (bio->bi_status)
                atomic_inc(&segbuf->sb_err);
 
        bio_put(bio);
index 0da0332..ffe0039 100644 (file)
@@ -516,9 +516,9 @@ static void o2hb_bio_end_io(struct bio *bio)
 {
        struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
 
-       if (bio->bi_error) {
-               mlog(ML_ERROR, "IO Error %d\n", bio->bi_error);
-               wc->wc_error = bio->bi_error;
+       if (bio->bi_status) {
+               mlog(ML_ERROR, "IO Error %d\n", bio->bi_status);
+               wc->wc_error = blk_status_to_errno(bio->bi_status);
        }
 
        o2hb_bio_wait_dec(wc, 1);
index 3b7c937..4689940 100644 (file)
@@ -2591,6 +2591,10 @@ void ocfs2_inode_unlock_tracker(struct inode *inode,
        struct ocfs2_lock_res *lockres;
 
        lockres = &OCFS2_I(inode)->ip_inode_lockres;
+       /* had_lock means that the currect process already takes the cluster
+        * lock previously. If had_lock is 1, we have nothing to do here, and
+        * it will get unlocked where we got the lock.
+        */
        if (!had_lock) {
                ocfs2_remove_holder(lockres, oh);
                ocfs2_inode_unlock(inode, ex);
index ca1646f..83005f4 100644 (file)
@@ -2062,7 +2062,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
        cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
        bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
        sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
-       memcpy(sb->s_uuid, di->id2.i_super.s_uuid,
+       memcpy(&sb->s_uuid, di->id2.i_super.s_uuid,
               sizeof(di->id2.i_super.s_uuid));
 
        osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
index 3c5384d..f70c377 100644 (file)
@@ -1328,20 +1328,21 @@ static int ocfs2_xattr_get(struct inode *inode,
                           void *buffer,
                           size_t buffer_size)
 {
-       int ret;
+       int ret, had_lock;
        struct buffer_head *di_bh = NULL;
+       struct ocfs2_lock_holder oh;
 
-       ret = ocfs2_inode_lock(inode, &di_bh, 0);
-       if (ret < 0) {
-               mlog_errno(ret);
-               return ret;
+       had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh);
+       if (had_lock < 0) {
+               mlog_errno(had_lock);
+               return had_lock;
        }
        down_read(&OCFS2_I(inode)->ip_xattr_sem);
        ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
                                     name, buffer, buffer_size);
        up_read(&OCFS2_I(inode)->ip_xattr_sem);
 
-       ocfs2_inode_unlock(inode, 0);
+       ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
 
        brelse(di_bh);
 
@@ -3537,11 +3538,12 @@ int ocfs2_xattr_set(struct inode *inode,
 {
        struct buffer_head *di_bh = NULL;
        struct ocfs2_dinode *di;
-       int ret, credits, ref_meta = 0, ref_credits = 0;
+       int ret, credits, had_lock, ref_meta = 0, ref_credits = 0;
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        struct inode *tl_inode = osb->osb_tl_inode;
        struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
        struct ocfs2_refcount_tree *ref_tree = NULL;
+       struct ocfs2_lock_holder oh;
 
        struct ocfs2_xattr_info xi = {
                .xi_name_index = name_index,
@@ -3572,8 +3574,9 @@ int ocfs2_xattr_set(struct inode *inode,
                return -ENOMEM;
        }
 
-       ret = ocfs2_inode_lock(inode, &di_bh, 1);
-       if (ret < 0) {
+       had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh);
+       if (had_lock < 0) {
+               ret = had_lock;
                mlog_errno(ret);
                goto cleanup_nolock;
        }
@@ -3670,7 +3673,7 @@ cleanup:
                if (ret)
                        mlog_errno(ret);
        }
-       ocfs2_inode_unlock(inode, 1);
+       ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
 cleanup_nolock:
        brelse(di_bh);
        brelse(xbs.xattr_bh);
index cd0c5be..3fe0c4a 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -759,6 +759,7 @@ static int do_dentry_open(struct file *f,
             likely(f->f_op->write || f->f_op->write_iter))
                f->f_mode |= FMODE_CAN_WRITE;
 
+       f->f_write_hint = WRITE_LIFE_NOT_SET;
        f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
        file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
index 7a44533..e5869f9 100644 (file)
@@ -233,7 +233,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
        return err;
 }
 
-static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_be *uuid)
+static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
 {
        struct ovl_fh *fh;
        int fh_type, fh_len, dwords;
@@ -284,7 +284,6 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
                          struct dentry *upper)
 {
        struct super_block *sb = lower->d_sb;
-       uuid_be *uuid = (uuid_be *) &sb->s_uuid;
        const struct ovl_fh *fh = NULL;
        int err;
 
@@ -294,8 +293,8 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
         * up and a pure upper inode.
         */
        if (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
-           uuid_be_cmp(*uuid, NULL_UUID_BE)) {
-               fh = ovl_encode_fh(lower, uuid);
+           !uuid_is_null(&sb->s_uuid)) {
+               fh = ovl_encode_fh(lower, &sb->s_uuid);
                if (IS_ERR(fh))
                        return PTR_ERR(fh);
        }
@@ -330,15 +329,9 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
                .link = link
        };
 
-       upper = lookup_one_len(dentry->d_name.name, upperdir,
-                              dentry->d_name.len);
-       err = PTR_ERR(upper);
-       if (IS_ERR(upper))
-               goto out;
-
        err = security_inode_copy_up(dentry, &new_creds);
        if (err < 0)
-               goto out1;
+               goto out;
 
        if (new_creds)
                old_creds = override_creds(new_creds);
@@ -362,7 +355,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
        }
 
        if (err)
-               goto out2;
+               goto out;
 
        if (S_ISREG(stat->mode)) {
                struct path upperpath;
@@ -398,10 +391,23 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
        /*
         * Store identifier of lower inode in upper inode xattr to
         * allow lookup of the copy up origin inode.
+        *
+        * Don't set origin when we are breaking the association with a lower
+        * hard link.
         */
-       err = ovl_set_origin(dentry, lowerpath->dentry, temp);
-       if (err)
+       if (S_ISDIR(stat->mode) || stat->nlink == 1) {
+               err = ovl_set_origin(dentry, lowerpath->dentry, temp);
+               if (err)
+                       goto out_cleanup;
+       }
+
+       upper = lookup_one_len(dentry->d_name.name, upperdir,
+                              dentry->d_name.len);
+       if (IS_ERR(upper)) {
+               err = PTR_ERR(upper);
+               upper = NULL;
                goto out_cleanup;
+       }
 
        if (tmpfile)
                err = ovl_do_link(temp, udir, upper, true);
@@ -416,17 +422,15 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 
        /* Restore timestamps on parent (best effort) */
        ovl_set_timestamps(upperdir, pstat);
-out2:
+out:
        dput(temp);
-out1:
        dput(upper);
-out:
        return err;
 
 out_cleanup:
        if (!tmpfile)
                ovl_cleanup(wdir, temp);
-       goto out2;
+       goto out;
 }
 
 /*
index f3136c3..de0d4f7 100644 (file)
@@ -135,7 +135,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
         * Make sure that the stored uuid matches the uuid of the lower
         * layer where file handle will be decoded.
         */
-       if (uuid_be_cmp(fh->uuid, *(uuid_be *) &mnt->mnt_sb->s_uuid))
+       if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
                goto out;
 
        origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
index 0623ceb..10863b4 100644 (file)
@@ -56,7 +56,7 @@ struct ovl_fh {
        u8 len;         /* size of this header + size of fid */
        u8 flags;       /* OVL_FH_FLAG_* */
        u8 type;        /* fid_type of fid */
-       uuid_be uuid;   /* uuid of filesystem */
+       uuid_t uuid;    /* uuid of filesystem */
        u8 fid[0];      /* file identifier */
 } __packed;
 
index f0c8b33..520802d 100644 (file)
@@ -300,11 +300,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 
        /* We don't show the stack guard page in /proc/maps */
        start = vma->vm_start;
-       if (stack_guard_page_start(vma, start))
-               start += PAGE_SIZE;
        end = vma->vm_end;
-       if (stack_guard_page_end(vma, end))
-               end -= PAGE_SIZE;
 
        seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
        seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
index 47c1d44..d591eee 100644 (file)
@@ -678,16 +678,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
        struct kiocb kiocb;
        ssize_t ret;
 
-       if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC))
-               return -EOPNOTSUPP;
-
        init_sync_kiocb(&kiocb, filp);
-       if (flags & RWF_HIPRI)
-               kiocb.ki_flags |= IOCB_HIPRI;
-       if (flags & RWF_DSYNC)
-               kiocb.ki_flags |= IOCB_DSYNC;
-       if (flags & RWF_SYNC)
-               kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+       ret = kiocb_set_rw_flags(&kiocb, flags);
+       if (ret)
+               return ret;
        kiocb.ki_pos = *ppos;
 
        if (type == READ)
@@ -1285,7 +1279,7 @@ static size_t compat_writev(struct file *file,
        if (!(file->f_mode & FMODE_CAN_WRITE))
                goto out;
 
-       ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, 0);
+       ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, flags);
 
 out:
        if (ret > 0)
index d642cc0..f80be4c 100644 (file)
@@ -400,10 +400,12 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
        /*
         * There is not enough space for user on the device
         */
-       if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) {
-               mutex_unlock(&UFS_SB(sb)->s_lock);
-               UFSD("EXIT (FAILED)\n");
-               return 0;
+       if (unlikely(ufs_freefrags(uspi) <= uspi->s_root_blocks)) {
+               if (!capable(CAP_SYS_RESOURCE)) {
+                       mutex_unlock(&UFS_SB(sb)->s_lock);
+                       UFSD("EXIT (FAILED)\n");
+                       return 0;
+               }
        }
 
        if (goal >= uspi->s_size) 
@@ -421,12 +423,12 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
                if (result) {
                        ufs_clear_frags(inode, result + oldcount,
                                        newcount - oldcount, locked_page != NULL);
+                       *err = 0;
                        write_seqlock(&UFS_I(inode)->meta_lock);
                        ufs_cpu_to_data_ptr(sb, p, result);
-                       write_sequnlock(&UFS_I(inode)->meta_lock);
-                       *err = 0;
                        UFS_I(inode)->i_lastfrag =
                                max(UFS_I(inode)->i_lastfrag, fragment + count);
+                       write_sequnlock(&UFS_I(inode)->meta_lock);
                }
                mutex_unlock(&UFS_SB(sb)->s_lock);
                UFSD("EXIT, result %llu\n", (unsigned long long)result);
@@ -439,8 +441,10 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
        result = ufs_add_fragments(inode, tmp, oldcount, newcount);
        if (result) {
                *err = 0;
+               read_seqlock_excl(&UFS_I(inode)->meta_lock);
                UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
                                                fragment + count);
+               read_sequnlock_excl(&UFS_I(inode)->meta_lock);
                ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
                                locked_page != NULL);
                mutex_unlock(&UFS_SB(sb)->s_lock);
@@ -451,39 +455,29 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
        /*
         * allocate new block and move data
         */
-       switch (fs32_to_cpu(sb, usb1->fs_optim)) {
-           case UFS_OPTSPACE:
+       if (fs32_to_cpu(sb, usb1->fs_optim) == UFS_OPTSPACE) {
                request = newcount;
-               if (uspi->s_minfree < 5 || uspi->cs_total.cs_nffree
-                   > uspi->s_dsize * uspi->s_minfree / (2 * 100))
-                       break;
-               usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
-               break;
-           default:
-               usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
-       
-           case UFS_OPTTIME:
+               if (uspi->cs_total.cs_nffree < uspi->s_space_to_time)
+                       usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
+       } else {
                request = uspi->s_fpb;
-               if (uspi->cs_total.cs_nffree < uspi->s_dsize *
-                   (uspi->s_minfree - 2) / 100)
-                       break;
-               usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
-               break;
+               if (uspi->cs_total.cs_nffree > uspi->s_time_to_space)
+                       usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTSPACE);
        }
        result = ufs_alloc_fragments (inode, cgno, goal, request, err);
        if (result) {
                ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
                                locked_page != NULL);
+               mutex_unlock(&UFS_SB(sb)->s_lock);
                ufs_change_blocknr(inode, fragment - oldcount, oldcount,
                                   uspi->s_sbbase + tmp,
                                   uspi->s_sbbase + result, locked_page);
+               *err = 0;
                write_seqlock(&UFS_I(inode)->meta_lock);
                ufs_cpu_to_data_ptr(sb, p, result);
-               write_sequnlock(&UFS_I(inode)->meta_lock);
-               *err = 0;
                UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
                                                fragment + count);
-               mutex_unlock(&UFS_SB(sb)->s_lock);
+               write_sequnlock(&UFS_I(inode)->meta_lock);
                if (newcount < request)
                        ufs_free_fragments (inode, result + newcount, request - newcount);
                ufs_free_fragments (inode, tmp, oldcount);
index da553ff..f36d6a5 100644 (file)
@@ -401,13 +401,20 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
        u64 phys64 = 0;
        unsigned frag = fragment & uspi->s_fpbmask;
 
-       if (!create) {
-               phys64 = ufs_frag_map(inode, offsets, depth);
-               if (phys64)
-                       map_bh(bh_result, sb, phys64 + frag);
-               return 0;
-       }
+       phys64 = ufs_frag_map(inode, offsets, depth);
+       if (!create)
+               goto done;
 
+       if (phys64) {
+               if (fragment >= UFS_NDIR_FRAGMENT)
+                       goto done;
+               read_seqlock_excl(&UFS_I(inode)->meta_lock);
+               if (fragment < UFS_I(inode)->i_lastfrag) {
+                       read_sequnlock_excl(&UFS_I(inode)->meta_lock);
+                       goto done;
+               }
+               read_sequnlock_excl(&UFS_I(inode)->meta_lock);
+       }
         /* This code entered only while writing ....? */
 
        mutex_lock(&UFS_I(inode)->truncate_mutex);
@@ -451,6 +458,11 @@ out:
        }
        mutex_unlock(&UFS_I(inode)->truncate_mutex);
        return err;
+
+done:
+       if (phys64)
+               map_bh(bh_result, sb, phys64 + frag);
+       return 0;
 }
 
 static int ufs_writepage(struct page *page, struct writeback_control *wbc)
@@ -554,10 +566,8 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
         */
        inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode);
        set_nlink(inode, fs16_to_cpu(sb, ufs_inode->ui_nlink));
-       if (inode->i_nlink == 0) {
-               ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
-               return -1;
-       }
+       if (inode->i_nlink == 0)
+               return -ESTALE;
 
        /*
         * Linux now has 32-bit uid and gid, so we can support EFT.
@@ -566,9 +576,9 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
        i_gid_write(inode, ufs_get_inode_gid(sb, ufs_inode));
 
        inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size);
-       inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
-       inode->i_ctime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec);
-       inode->i_mtime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec);
+       inode->i_atime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
+       inode->i_ctime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec);
+       inode->i_mtime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec);
        inode->i_mtime.tv_nsec = 0;
        inode->i_atime.tv_nsec = 0;
        inode->i_ctime.tv_nsec = 0;
@@ -602,10 +612,8 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
         */
        inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode);
        set_nlink(inode, fs16_to_cpu(sb, ufs2_inode->ui_nlink));
-       if (inode->i_nlink == 0) {
-               ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
-               return -1;
-       }
+       if (inode->i_nlink == 0)
+               return -ESTALE;
 
         /*
          * Linux now has 32-bit uid and gid, so we can support EFT.
@@ -645,7 +653,7 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino)
        struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
        struct buffer_head * bh;
        struct inode *inode;
-       int err;
+       int err = -EIO;
 
        UFSD("ENTER, ino %lu\n", ino);
 
@@ -680,9 +688,10 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino)
                err = ufs1_read_inode(inode,
                                      ufs_inode + ufs_inotofsbo(inode->i_ino));
        }
-
+       brelse(bh);
        if (err)
                goto bad_inode;
+
        inode->i_version++;
        ufsi->i_lastfrag =
                (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift;
@@ -691,15 +700,13 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino)
 
        ufs_set_inode_ops(inode);
 
-       brelse(bh);
-
        UFSD("EXIT\n");
        unlock_new_inode(inode);
        return inode;
 
 bad_inode:
        iget_failed(inode);
-       return ERR_PTR(-EIO);
+       return ERR_PTR(err);
 }
 
 static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode)
@@ -874,7 +881,6 @@ static inline void free_data(struct to_free *ctx, u64 from, unsigned count)
        ctx->to = from + count;
 }
 
-#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift)
 #define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
 
 static void ufs_trunc_direct(struct inode *inode)
@@ -1112,19 +1118,24 @@ static void ufs_truncate_blocks(struct inode *inode)
        struct super_block *sb = inode->i_sb;
        struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
        unsigned offsets[4];
-       int depth = ufs_block_to_path(inode, DIRECT_BLOCK, offsets);
+       int depth;
        int depth2;
        unsigned i;
        struct ufs_buffer_head *ubh[3];
        void *p;
        u64 block;
 
-       if (!depth)
-               return;
+       if (inode->i_size) {
+               sector_t last = (inode->i_size - 1) >> uspi->s_bshift;
+               depth = ufs_block_to_path(inode, last, offsets);
+               if (!depth)
+                       return;
+       } else {
+               depth = 1;
+       }
 
-       /* find the last non-zero in offsets[] */
        for (depth2 = depth - 1; depth2; depth2--)
-               if (offsets[depth2])
+               if (offsets[depth2] != uspi->s_apb - 1)
                        break;
 
        mutex_lock(&ufsi->truncate_mutex);
@@ -1133,9 +1144,8 @@ static void ufs_truncate_blocks(struct inode *inode)
                offsets[0] = UFS_IND_BLOCK;
        } else {
                /* get the blocks that should be partially emptied */
-               p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]);
+               p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]++);
                for (i = 0; i < depth2; i++) {
-                       offsets[i]++;   /* next branch is fully freed */
                        block = ufs_data_ptr_to_cpu(sb, p);
                        if (!block)
                                break;
@@ -1146,7 +1156,7 @@ static void ufs_truncate_blocks(struct inode *inode)
                                write_sequnlock(&ufsi->meta_lock);
                                break;
                        }
-                       p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]);
+                       p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]++);
                }
                while (i--)
                        free_branch_tail(inode, offsets[i + 1], ubh[i], depth - i - 1);
@@ -1161,7 +1171,9 @@ static void ufs_truncate_blocks(struct inode *inode)
                        free_full_branch(inode, block, i - UFS_IND_BLOCK + 1);
                }
        }
+       read_seqlock_excl(&ufsi->meta_lock);
        ufsi->i_lastfrag = DIRECT_FRAGMENT;
+       read_sequnlock_excl(&ufsi->meta_lock);
        mark_inode_dirty(inode);
        mutex_unlock(&ufsi->truncate_mutex);
 }
index 878cc62..0a4f58a 100644 (file)
@@ -480,7 +480,7 @@ static void ufs_setup_cstotal(struct super_block *sb)
        usb3 = ubh_get_usb_third(uspi);
 
        if ((mtype == UFS_MOUNT_UFSTYPE_44BSD &&
-            (usb1->fs_flags & UFS_FLAGS_UPDATED)) ||
+            (usb2->fs_un.fs_u2.fs_maxbsize == usb1->fs_bsize)) ||
            mtype == UFS_MOUNT_UFSTYPE_UFS2) {
                /*we have statistic in different place, then usual*/
                uspi->cs_total.cs_ndir = fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir);
@@ -596,9 +596,7 @@ static void ufs_put_cstotal(struct super_block *sb)
        usb2 = ubh_get_usb_second(uspi);
        usb3 = ubh_get_usb_third(uspi);
 
-       if ((mtype == UFS_MOUNT_UFSTYPE_44BSD &&
-            (usb1->fs_flags & UFS_FLAGS_UPDATED)) ||
-           mtype == UFS_MOUNT_UFSTYPE_UFS2) {
+       if (mtype == UFS_MOUNT_UFSTYPE_UFS2) {
                /*we have statistic in different place, then usual*/
                usb2->fs_un.fs_u2.cs_ndir =
                        cpu_to_fs64(sb, uspi->cs_total.cs_ndir);
@@ -608,16 +606,26 @@ static void ufs_put_cstotal(struct super_block *sb)
                        cpu_to_fs64(sb, uspi->cs_total.cs_nifree);
                usb3->fs_un1.fs_u2.cs_nffree =
                        cpu_to_fs64(sb, uspi->cs_total.cs_nffree);
-       } else {
-               usb1->fs_cstotal.cs_ndir =
-                       cpu_to_fs32(sb, uspi->cs_total.cs_ndir);
-               usb1->fs_cstotal.cs_nbfree =
-                       cpu_to_fs32(sb, uspi->cs_total.cs_nbfree);
-               usb1->fs_cstotal.cs_nifree =
-                       cpu_to_fs32(sb, uspi->cs_total.cs_nifree);
-               usb1->fs_cstotal.cs_nffree =
-                       cpu_to_fs32(sb, uspi->cs_total.cs_nffree);
+               goto out;
+       }
+
+       if (mtype == UFS_MOUNT_UFSTYPE_44BSD &&
+            (usb2->fs_un.fs_u2.fs_maxbsize == usb1->fs_bsize)) {
+               /* store stats in both old and new places */
+               usb2->fs_un.fs_u2.cs_ndir =
+                       cpu_to_fs64(sb, uspi->cs_total.cs_ndir);
+               usb2->fs_un.fs_u2.cs_nbfree =
+                       cpu_to_fs64(sb, uspi->cs_total.cs_nbfree);
+               usb3->fs_un1.fs_u2.cs_nifree =
+                       cpu_to_fs64(sb, uspi->cs_total.cs_nifree);
+               usb3->fs_un1.fs_u2.cs_nffree =
+                       cpu_to_fs64(sb, uspi->cs_total.cs_nffree);
        }
+       usb1->fs_cstotal.cs_ndir = cpu_to_fs32(sb, uspi->cs_total.cs_ndir);
+       usb1->fs_cstotal.cs_nbfree = cpu_to_fs32(sb, uspi->cs_total.cs_nbfree);
+       usb1->fs_cstotal.cs_nifree = cpu_to_fs32(sb, uspi->cs_total.cs_nifree);
+       usb1->fs_cstotal.cs_nffree = cpu_to_fs32(sb, uspi->cs_total.cs_nffree);
+out:
        ubh_mark_buffer_dirty(USPI_UBH(uspi));
        ufs_print_super_stuff(sb, usb1, usb2, usb3);
        UFSD("EXIT\n");
@@ -996,6 +1004,13 @@ again:
                flags |=  UFS_ST_SUN;
        }
 
+       if ((flags & UFS_ST_MASK) == UFS_ST_44BSD &&
+           uspi->s_postblformat == UFS_42POSTBLFMT) {
+               if (!silent)
+                       pr_err("this is not a 44bsd filesystem");
+               goto failed;
+       }
+
        /*
         * Check ufs magic number
         */
@@ -1143,8 +1158,8 @@ magic_found:
        uspi->s_cgmask = fs32_to_cpu(sb, usb1->fs_cgmask);
 
        if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
-               uspi->s_u2_size  = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size);
-               uspi->s_u2_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
+               uspi->s_size  = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size);
+               uspi->s_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
        } else {
                uspi->s_size  =  fs32_to_cpu(sb, usb1->fs_size);
                uspi->s_dsize =  fs32_to_cpu(sb, usb1->fs_dsize);
@@ -1193,6 +1208,18 @@ magic_found:
        uspi->s_postbloff = fs32_to_cpu(sb, usb3->fs_postbloff);
        uspi->s_rotbloff = fs32_to_cpu(sb, usb3->fs_rotbloff);
 
+       uspi->s_root_blocks = mul_u64_u32_div(uspi->s_dsize,
+                                             uspi->s_minfree, 100);
+       if (uspi->s_minfree <= 5) {
+               uspi->s_time_to_space = ~0ULL;
+               uspi->s_space_to_time = 0;
+               usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTSPACE);
+       } else {
+               uspi->s_time_to_space = (uspi->s_root_blocks / 2) + 1;
+               uspi->s_space_to_time = mul_u64_u32_div(uspi->s_dsize,
+                                             uspi->s_minfree - 2, 100) - 1;
+       }
+
        /*
         * Compute another frequently used values
         */
@@ -1382,19 +1409,17 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
        mutex_lock(&UFS_SB(sb)->s_lock);
        usb3 = ubh_get_usb_third(uspi);
        
-       if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
+       if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
                buf->f_type = UFS2_MAGIC;
-               buf->f_blocks = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
-       } else {
+       else
                buf->f_type = UFS_MAGIC;
-               buf->f_blocks = uspi->s_dsize;
-       }
-       buf->f_bfree = ufs_blkstofrags(uspi->cs_total.cs_nbfree) +
-               uspi->cs_total.cs_nffree;
+
+       buf->f_blocks = uspi->s_dsize;
+       buf->f_bfree = ufs_freefrags(uspi);
        buf->f_ffree = uspi->cs_total.cs_nifree;
        buf->f_bsize = sb->s_blocksize;
-       buf->f_bavail = (buf->f_bfree > (((long)buf->f_blocks / 100) * uspi->s_minfree))
-               ? (buf->f_bfree - (((long)buf->f_blocks / 100) * uspi->s_minfree)) : 0;
+       buf->f_bavail = (buf->f_bfree > uspi->s_root_blocks)
+               ? (buf->f_bfree - uspi->s_root_blocks) : 0;
        buf->f_files = uspi->s_ncg * uspi->s_ipg;
        buf->f_namelen = UFS_MAXNAMLEN;
        buf->f_fsid.val[0] = (u32)id;
index 0cbd5d3..150eef6 100644 (file)
@@ -733,10 +733,8 @@ struct ufs_sb_private_info {
        __u32   s_dblkno;       /* offset of first data after cg */
        __u32   s_cgoffset;     /* cylinder group offset in cylinder */
        __u32   s_cgmask;       /* used to calc mod fs_ntrak */
-       __u32   s_size;         /* number of blocks (fragments) in fs */
-       __u32   s_dsize;        /* number of data blocks in fs */
-       __u64   s_u2_size;      /* ufs2: number of blocks (fragments) in fs */
-       __u64   s_u2_dsize;     /*ufs2:  number of data blocks in fs */
+       __u64   s_size;         /* number of blocks (fragments) in fs */
+       __u64   s_dsize;        /* number of data blocks in fs */
        __u32   s_ncg;          /* number of cylinder groups */
        __u32   s_bsize;        /* size of basic blocks */
        __u32   s_fsize;        /* size of fragments */
@@ -793,6 +791,9 @@ struct ufs_sb_private_info {
        __u32   s_maxsymlinklen;/* upper limit on fast symlinks' size */
        __s32   fs_magic;       /* filesystem magic */
        unsigned int s_dirblksize;
+       __u64   s_root_blocks;
+       __u64   s_time_to_space;
+       __u64   s_space_to_time;
 };
 
 /*
index f41ad0a..02497a4 100644 (file)
@@ -243,9 +243,8 @@ ufs_set_inode_dev(struct super_block *sb, struct ufs_inode_info *ufsi, dev_t dev
 struct page *ufs_get_locked_page(struct address_space *mapping,
                                 pgoff_t index)
 {
-       struct page *page;
-
-       page = find_lock_page(mapping, index);
+       struct inode *inode = mapping->host;
+       struct page *page = find_lock_page(mapping, index);
        if (!page) {
                page = read_mapping_page(mapping, index, NULL);
 
@@ -253,7 +252,7 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
                        printk(KERN_ERR "ufs_change_blocknr: "
                               "read_mapping_page error: ino %lu, index: %lu\n",
                               mapping->host->i_ino, index);
-                       goto out;
+                       return page;
                }
 
                lock_page(page);
@@ -262,8 +261,7 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
                        /* Truncate got there first */
                        unlock_page(page);
                        put_page(page);
-                       page = NULL;
-                       goto out;
+                       return NULL;
                }
 
                if (!PageUptodate(page) || PageError(page)) {
@@ -272,11 +270,12 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
 
                        printk(KERN_ERR "ufs_change_blocknr: "
                               "can not read page: ino %lu, index: %lu\n",
-                              mapping->host->i_ino, index);
+                              inode->i_ino, index);
 
-                       page = ERR_PTR(-EIO);
+                       return ERR_PTR(-EIO);
                }
        }
-out:
+       if (!page_has_buffers(page))
+               create_empty_buffers(page, 1 << inode->i_blkbits, 0);
        return page;
 }
index 398019f..9fc7119 100644 (file)
@@ -350,16 +350,11 @@ static inline void *ubh_get_data_ptr(struct ufs_sb_private_info *uspi,
 #define ubh_blkmap(ubh,begin,bit) \
        ((*ubh_get_addr(ubh, (begin) + ((bit) >> 3)) >> ((bit) & 7)) & (0xff >> (UFS_MAXFRAG - uspi->s_fpb)))
 
-/*
- * Determine the number of available frags given a
- * percentage to hold in reserve.
- */
 static inline u64
-ufs_freespace(struct ufs_sb_private_info *uspi, int percentreserved)
+ufs_freefrags(struct ufs_sb_private_info *uspi)
 {
        return ufs_blkstofrags(uspi->cs_total.cs_nbfree) +
-               uspi->cs_total.cs_nffree -
-               (uspi->s_dsize * (percentreserved) / 100);
+               uspi->cs_total.cs_nffree;
 }
 
 /*
index f7555fc..1d622f2 100644 (file)
@@ -340,9 +340,28 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
        bool must_wait, return_to_userland;
        long blocking_state;
 
-       BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
-
        ret = VM_FAULT_SIGBUS;
+
+       /*
+        * We don't do userfault handling for the final child pid update.
+        *
+        * We also don't do userfault handling during
+        * coredumping. hugetlbfs has the special
+        * follow_hugetlb_page() to skip missing pages in the
+        * FOLL_DUMP case, anon memory also checks for FOLL_DUMP with
+        * the no_page_table() helper in follow_page_mask(), but the
+        * shmem_vm_ops->fault method is invoked even during
+        * coredumping without mmap_sem and it ends up here.
+        */
+       if (current->flags & (PF_EXITING|PF_DUMPCORE))
+               goto out;
+
+       /*
+        * Coredumping runs without mmap_sem so we can only check that
+        * the mmap_sem is held, if PF_DUMPCORE was not set.
+        */
+       WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem));
+
        ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
        if (!ctx)
                goto out;
@@ -361,12 +380,6 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
                goto out;
 
        /*
-        * We don't do userfault handling for the final child pid update.
-        */
-       if (current->flags & PF_EXITING)
-               goto out;
-
-       /*
         * Check that we can return VM_FAULT_RETRY.
         *
         * NOTE: it should become possible to return VM_FAULT_RETRY
index 5c90f82..a6e955b 100644 (file)
@@ -98,8 +98,7 @@ xfs-y                         += xfs_aops.o \
                                   xfs_sysfs.o \
                                   xfs_trans.o \
                                   xfs_xattr.o \
-                                  kmem.o \
-                                  uuid.o
+                                  kmem.o
 
 # low-level transaction/log code
 xfs-y                          += xfs_log.o \
diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c
deleted file mode 100644 (file)
index b83f76b..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <xfs.h>
-
-/* IRIX interpretation of an uuid_t */
-typedef struct {
-       __be32  uu_timelow;
-       __be16  uu_timemid;
-       __be16  uu_timehi;
-       __be16  uu_clockseq;
-       __be16  uu_node[3];
-} xfs_uu_t;
-
-/*
- * uuid_getnodeuniq - obtain the node unique fields of a UUID.
- *
- * This is not in any way a standard or condoned UUID function;
- * it just something that's needed for user-level file handles.
- */
-void
-uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
-{
-       xfs_uu_t *uup = (xfs_uu_t *)uuid;
-
-       fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
-                  be16_to_cpu(uup->uu_timemid);
-       fsid[1] = be32_to_cpu(uup->uu_timelow);
-}
-
-int
-uuid_is_nil(uuid_t *uuid)
-{
-       int     i;
-       char    *cp = (char *)uuid;
-
-       if (uuid == NULL)
-               return 0;
-       /* implied check of version number here... */
-       for (i = 0; i < sizeof *uuid; i++)
-               if (*cp++) return 0;    /* not nil */
-       return 1;       /* is nil */
-}
-
-int
-uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
-{
-       return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
-}
diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h
deleted file mode 100644 (file)
index 104db0f..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_UUID_H__
-#define __XFS_SUPPORT_UUID_H__
-
-typedef struct {
-       unsigned char   __u_bits[16];
-} uuid_t;
-
-extern int uuid_is_nil(uuid_t *uuid);
-extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
-extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
-
-static inline void
-uuid_copy(uuid_t *dst, uuid_t *src)
-{
-       memcpy(dst, src, sizeof(uuid_t));
-}
-
-#endif /* __XFS_SUPPORT_UUID_H__ */
index 09af0f7..d20c29b 100644 (file)
@@ -276,7 +276,7 @@ xfs_end_io(
        struct xfs_inode        *ip = XFS_I(ioend->io_inode);
        xfs_off_t               offset = ioend->io_offset;
        size_t                  size = ioend->io_size;
-       int                     error = ioend->io_bio->bi_error;
+       int                     error;
 
        /*
         * Just clean up the in-memory strutures if the fs has been shut down.
@@ -289,6 +289,7 @@ xfs_end_io(
        /*
         * Clean up any COW blocks on an I/O error.
         */
+       error = blk_status_to_errno(ioend->io_bio->bi_status);
        if (unlikely(error)) {
                switch (ioend->io_type) {
                case XFS_IO_COW:
@@ -332,7 +333,7 @@ xfs_end_bio(
        else if (ioend->io_append_trans)
                queue_work(mp->m_data_workqueue, &ioend->io_work);
        else
-               xfs_destroy_ioend(ioend, bio->bi_error);
+               xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
 }
 
 STATIC int
@@ -500,11 +501,12 @@ xfs_submit_ioend(
         * time.
         */
        if (status) {
-               ioend->io_bio->bi_error = status;
+               ioend->io_bio->bi_status = errno_to_blk_status(status);
                bio_endio(ioend->io_bio);
                return status;
        }
 
+       ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
        submit_bio(ioend->io_bio);
        return 0;
 }
@@ -564,6 +566,7 @@ xfs_chain_bio(
        bio_chain(ioend->io_bio, new);
        bio_get(ioend->io_bio);         /* for xfs_destroy_ioend */
        ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+       ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
        submit_bio(ioend->io_bio);
        ioend->io_bio = new;
 }
@@ -1316,9 +1319,12 @@ xfs_vm_bmap(
         * The swap code (ab-)uses ->bmap to get a block mapping and then
         * bypasseÑ• the file system for actual I/O.  We really can't allow
         * that on reflinks inodes, so we have to skip out here.  And yes,
-        * 0 is the magic code for a bmap error..
+        * 0 is the magic code for a bmap error.
+        *
+        * Since we don't pass back blockdev info, we can't return bmap
+        * information for rt files either.
         */
-       if (xfs_is_reflink_inode(ip))
+       if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
                return 0;
 
        filemap_write_and_wait(mapping);
index 07b77b7..438505f 100644 (file)
@@ -117,7 +117,7 @@ static inline void
 __xfs_buf_ioacct_dec(
        struct xfs_buf  *bp)
 {
-       ASSERT(spin_is_locked(&bp->b_lock));
+       lockdep_assert_held(&bp->b_lock);
 
        if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
                bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
@@ -1227,8 +1227,11 @@ xfs_buf_bio_end_io(
         * don't overwrite existing errors - otherwise we can lose errors on
         * buffers that require multiple bios to complete.
         */
-       if (bio->bi_error)
-               cmpxchg(&bp->b_io_error, 0, bio->bi_error);
+       if (bio->bi_status) {
+               int error = blk_status_to_errno(bio->bi_status);
+
+               cmpxchg(&bp->b_io_error, 0, error);
+       }
 
        if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
                invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
index 5fb5a09..17f27a2 100644 (file)
@@ -237,7 +237,11 @@ xfs_file_dax_read(
        if (!count)
                return 0; /* skip atime */
 
-       xfs_ilock(ip, XFS_IOLOCK_SHARED);
+       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+               if (iocb->ki_flags & IOCB_NOWAIT)
+                       return -EAGAIN;
+               xfs_ilock(ip, XFS_IOLOCK_SHARED);
+       }
        ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
@@ -541,7 +545,11 @@ xfs_file_dio_aio_write(
                iolock = XFS_IOLOCK_SHARED;
        }
 
-       xfs_ilock(ip, iolock);
+       if (!xfs_ilock_nowait(ip, iolock)) {
+               if (iocb->ki_flags & IOCB_NOWAIT)
+                       return -EAGAIN;
+               xfs_ilock(ip, iolock);
+       }
 
        ret = xfs_file_aio_write_checks(iocb, from, &iolock);
        if (ret)
@@ -553,9 +561,15 @@ xfs_file_dio_aio_write(
         * otherwise demote the lock if we had to take the exclusive lock
         * for other reasons in xfs_file_aio_write_checks.
         */
-       if (unaligned_io)
-               inode_dio_wait(inode);
-       else if (iolock == XFS_IOLOCK_EXCL) {
+       if (unaligned_io) {
+               /* If we are going to wait for other DIO to finish, bail */
+               if (iocb->ki_flags & IOCB_NOWAIT) {
+                       if (atomic_read(&inode->i_dio_count))
+                               return -EAGAIN;
+               } else {
+                       inode_dio_wait(inode);
+               }
+       } else if (iolock == XFS_IOLOCK_EXCL) {
                xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
                iolock = XFS_IOLOCK_SHARED;
        }
@@ -585,7 +599,12 @@ xfs_file_dax_write(
        size_t                  count;
        loff_t                  pos;
 
-       xfs_ilock(ip, iolock);
+       if (!xfs_ilock_nowait(ip, iolock)) {
+               if (iocb->ki_flags & IOCB_NOWAIT)
+                       return -EAGAIN;
+               xfs_ilock(ip, iolock);
+       }
+
        ret = xfs_file_aio_write_checks(iocb, from, &iolock);
        if (ret)
                goto out;
@@ -892,6 +911,7 @@ xfs_file_open(
                return -EFBIG;
        if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
                return -EIO;
+       file->f_mode |= FMODE_AIO_NOWAIT;
        return 0;
 }
 
index f61c84f..990210f 100644 (file)
@@ -66,7 +66,6 @@ xfs_inode_alloc(
 
        XFS_STATS_INC(mp, vn_active);
        ASSERT(atomic_read(&ip->i_pincount) == 0);
-       ASSERT(!spin_is_locked(&ip->i_flags_lock));
        ASSERT(!xfs_isiflocked(ip));
        ASSERT(ip->i_ino == 0);
 
@@ -190,7 +189,7 @@ xfs_perag_set_reclaim_tag(
 {
        struct xfs_mount        *mp = pag->pag_mount;
 
-       ASSERT(spin_is_locked(&pag->pag_ici_lock));
+       lockdep_assert_held(&pag->pag_ici_lock);
        if (pag->pag_ici_reclaimable++)
                return;
 
@@ -212,7 +211,7 @@ xfs_perag_clear_reclaim_tag(
 {
        struct xfs_mount        *mp = pag->pag_mount;
 
-       ASSERT(spin_is_locked(&pag->pag_ici_lock));
+       lockdep_assert_held(&pag->pag_ici_lock);
        if (--pag->pag_ici_reclaimable)
                return;
 
index 08cb7d1..013cc78 100644 (file)
@@ -834,9 +834,7 @@ xfs_inode_item_format_convert(
                in_f->ilf_dsize = in_f32->ilf_dsize;
                in_f->ilf_ino = in_f32->ilf_ino;
                /* copy biggest field of ilf_u */
-               memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
-                      in_f32->ilf_u.ilfu_uuid.__u_bits,
-                      sizeof(uuid_t));
+               uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid);
                in_f->ilf_blkno = in_f32->ilf_blkno;
                in_f->ilf_len = in_f32->ilf_len;
                in_f->ilf_boffset = in_f32->ilf_boffset;
@@ -851,9 +849,7 @@ xfs_inode_item_format_convert(
                in_f->ilf_dsize = in_f64->ilf_dsize;
                in_f->ilf_ino = in_f64->ilf_ino;
                /* copy biggest field of ilf_u */
-               memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
-                      in_f64->ilf_u.ilfu_uuid.__u_bits,
-                      sizeof(uuid_t));
+               uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f64->ilf_u.ilfu_uuid);
                in_f->ilf_blkno = in_f64->ilf_blkno;
                in_f->ilf_len = in_f64->ilf_len;
                in_f->ilf_boffset = in_f64->ilf_boffset;
index 94e5bdf..05dc87e 100644 (file)
@@ -995,6 +995,11 @@ xfs_file_iomap_begin(
                lockmode = xfs_ilock_data_map_shared(ip);
        }
 
+       if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) {
+               error = -EAGAIN;
+               goto out_unlock;
+       }
+
        ASSERT(offset <= mp->m_super->s_maxbytes);
        if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
                length = mp->m_super->s_maxbytes - offset;
@@ -1016,6 +1021,15 @@ xfs_file_iomap_begin(
 
        if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
                if (flags & IOMAP_DIRECT) {
+                       /*
+                        * A reflinked inode will result in CoW alloc.
+                        * FIXME: It could still overwrite on unshared extents
+                        * and not need allocation.
+                        */
+                       if (flags & IOMAP_NOWAIT) {
+                               error = -EAGAIN;
+                               goto out_unlock;
+                       }
                        /* may drop and re-acquire the ilock */
                        error = xfs_reflink_allocate_cow(ip, &imap, &shared,
                                        &lockmode);
@@ -1033,6 +1047,14 @@ xfs_file_iomap_begin(
 
        if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
                /*
+                * If nowait is set bail since we are going to make
+                * allocations.
+                */
+               if (flags & IOMAP_NOWAIT) {
+                       error = -EAGAIN;
+                       goto out_unlock;
+               }
+               /*
                 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
                 * pages to keep the chunks of work done where somewhat symmetric
                 * with the work writeback does. This is a completely arbitrary
index 044fb0e..2d167fe 100644 (file)
@@ -19,6 +19,7 @@
 #define __XFS_LINUX__
 
 #include <linux/types.h>
+#include <linux/uuid.h>
 
 /*
  * Kernel specific type declarations for XFS
@@ -42,7 +43,6 @@ typedef __u32                 xfs_nlink_t;
 
 #include "kmem.h"
 #include "mrlock.h"
-#include "uuid.h"
 
 #include <linux/semaphore.h>
 #include <linux/mm.h>
index cd0b077..8cec1e5 100644 (file)
@@ -352,13 +352,13 @@ xlog_header_check_mount(
 {
        ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
 
-       if (uuid_is_nil(&head->h_fs_uuid)) {
+       if (uuid_is_null(&head->h_fs_uuid)) {
                /*
                 * IRIX doesn't write the h_fs_uuid or h_fmt fields. If
-                * h_fs_uuid is nil, we assume this log was last mounted
+                * h_fs_uuid is null, we assume this log was last mounted
                 * by IRIX and continue.
                 */
-               xfs_warn(mp, "nil uuid in log - IRIX style log");
+               xfs_warn(mp, "null uuid in log - IRIX style log");
        } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
                xfs_warn(mp, "log has mismatched uuid - can't recover");
                xlog_header_check_dump(mp, head);
index 2eaf818..d249546 100644 (file)
@@ -74,20 +74,19 @@ xfs_uuid_mount(
        int                     hole, i;
 
        /* Publish UUID in struct super_block */
-       BUILD_BUG_ON(sizeof(mp->m_super->s_uuid) != sizeof(uuid_t));
-       memcpy(&mp->m_super->s_uuid, uuid, sizeof(uuid_t));
+       uuid_copy(&mp->m_super->s_uuid, uuid);
 
        if (mp->m_flags & XFS_MOUNT_NOUUID)
                return 0;
 
-       if (uuid_is_nil(uuid)) {
-               xfs_warn(mp, "Filesystem has nil UUID - can't mount");
+       if (uuid_is_null(uuid)) {
+               xfs_warn(mp, "Filesystem has null UUID - can't mount");
                return -EINVAL;
        }
 
        mutex_lock(&xfs_uuid_table_mutex);
        for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
-               if (uuid_is_nil(&xfs_uuid_table[i])) {
+               if (uuid_is_null(&xfs_uuid_table[i])) {
                        hole = i;
                        continue;
                }
@@ -124,7 +123,7 @@ xfs_uuid_unmount(
 
        mutex_lock(&xfs_uuid_table_mutex);
        for (i = 0; i < xfs_uuid_table_size; i++) {
-               if (uuid_is_nil(&xfs_uuid_table[i]))
+               if (uuid_is_null(&xfs_uuid_table[i]))
                        continue;
                if (!uuid_equal(uuid, &xfs_uuid_table[i]))
                        continue;
@@ -793,7 +792,10 @@ xfs_mountfs(
         *  Copies the low order bits of the timestamp and the randomly
         *  set "sequence" number out of a UUID.
         */
-       uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid);
+       mp->m_fixedfsid[0] =
+               (get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) |
+                get_unaligned_be16(&sbp->sb_uuid.b[4]);
+       mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
 
        mp->m_dmevmask = 0;     /* not persistent; set after each mount */
 
index 455a575..97df4db 100644 (file)
@@ -1766,7 +1766,8 @@ STATIC int __init
 xfs_init_zones(void)
 {
        xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
-                       offsetof(struct xfs_ioend, io_inline_bio));
+                       offsetof(struct xfs_ioend, io_inline_bio),
+                       BIOSET_NEED_BVECS);
        if (!xfs_ioend_bioset)
                goto out;
 
index 197f3ff..c1b163c 100644 (file)
@@ -61,17 +61,18 @@ bool acpi_ata_match(acpi_handle handle);
 bool acpi_bay_match(acpi_handle handle);
 bool acpi_dock_match(acpi_handle handle);
 
-bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs);
-union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
+bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs);
+union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
                        u64 rev, u64 func, union acpi_object *argv4);
 
 static inline union acpi_object *
-acpi_evaluate_dsm_typed(acpi_handle handle, const u8 *uuid, u64 rev, u64 func,
-                       union acpi_object *argv4, acpi_object_type type)
+acpi_evaluate_dsm_typed(acpi_handle handle, const guid_t *guid, u64 rev,
+                       u64 func, union acpi_object *argv4,
+                       acpi_object_type type)
 {
        union acpi_object *obj;
 
-       obj = acpi_evaluate_dsm(handle, uuid, rev, func, argv4);
+       obj = acpi_evaluate_dsm(handle, guid, rev, func, argv4);
        if (obj && obj->type != type) {
                ACPI_FREE(obj);
                obj = NULL;
@@ -210,7 +211,8 @@ struct acpi_device_flags {
        u32 of_compatible_ok:1;
        u32 coherent_dma:1;
        u32 cca_seen:1;
-       u32 reserved:20;
+       u32 spi_i2c_slave:1;
+       u32 reserved:19;
 };
 
 /* File System */
index d92543f..bdc55c0 100644 (file)
@@ -374,6 +374,20 @@ struct acpi_table_desc {
        u16 validation_count;
 };
 
+/*
+ * Maximum value of the validation_count field in struct acpi_table_desc.
+ * When reached, validation_count cannot be changed any more and the table will
+ * be permanently regarded as validated.
+ *
+ * This is to prevent situations in which unbalanced table get/put operations
+ * may cause premature table unmapping in the OS to happen.
+ *
+ * The maximum validation count can be defined to any value, but should be
+ * greater than the maximum number of OS early stage mapping slots to avoid
+ * leaking early stage table mappings to the late stage.
+ */
+#define ACPI_MAX_TABLE_VALIDATIONS          ACPI_UINT16_MAX
+
 /* Masks for Flags field above */
 
 #define ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL  (0)        /* Virtual address, external maintained */
index 370c0a0..d66432c 100644 (file)
@@ -43,6 +43,8 @@
 #ifndef _DT_BINDINGS_CLK_SUN50I_A64_H_
 #define _DT_BINDINGS_CLK_SUN50I_A64_H_
 
+#define CLK_PLL_PERIPH0                11
+
 #define CLK_BUS_MIPI_DSI       28
 #define CLK_BUS_CE             29
 #define CLK_BUS_DMA            30
index c2afc41..e139fe5 100644 (file)
@@ -43,6 +43,8 @@
 #ifndef _DT_BINDINGS_CLK_SUN8I_H3_H_
 #define _DT_BINDINGS_CLK_SUN8I_H3_H_
 
+#define CLK_PLL_PERIPH0                9
+
 #define CLK_CPUX               14
 
 #define CLK_BUS_CE             20
index 137e4a3..cafdfb8 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/resource_ext.h>
 #include <linux/device.h>
 #include <linux/property.h>
+#include <linux/uuid.h>
 
 #ifndef _LINUX
 #define _LINUX
@@ -457,7 +458,6 @@ struct acpi_osc_context {
        struct acpi_buffer ret;         /* free by caller if success */
 };
 
-acpi_status acpi_str_to_uuid(char *str, u8 *uuid);
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 
 /* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */
@@ -741,7 +741,7 @@ static inline bool acpi_driver_match_device(struct device *dev,
 }
 
 static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle,
-                                                  const u8 *uuid,
+                                                  const guid_t *guid,
                                                   int rev, int func,
                                                   union acpi_object *argv4)
 {
index d1b04b0..664a27d 100644 (file)
@@ -118,7 +118,6 @@ static inline void *bio_data(struct bio *bio)
 /*
  * will die
  */
-#define bio_to_phys(bio)       (page_to_phys(bio_page((bio))) + (unsigned long) bio_offset((bio)))
 #define bvec_to_phys(bv)       (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
 
 /*
@@ -373,8 +372,11 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
        return bio_split(bio, sectors, gfp, bs);
 }
 
-extern struct bio_set *bioset_create(unsigned int, unsigned int);
-extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int);
+extern struct bio_set *bioset_create(unsigned int, unsigned int, int flags);
+enum {
+       BIOSET_NEED_BVECS = BIT(0),
+       BIOSET_NEED_RESCUER = BIT(1),
+};
 extern void bioset_free(struct bio_set *);
 extern mempool_t *biovec_create_pool(int pool_entries);
 
@@ -392,11 +394,6 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
        return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
 }
 
-static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
-{
-       return bio_clone_bioset(bio, gfp_mask, fs_bio_set);
-}
-
 static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 {
        return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
@@ -414,7 +411,13 @@ extern void bio_endio(struct bio *);
 
 static inline void bio_io_error(struct bio *bio)
 {
-       bio->bi_error = -EIO;
+       bio->bi_status = BLK_STS_IOERR;
+       bio_endio(bio);
+}
+
+static inline void bio_wouldblock_error(struct bio *bio)
+{
+       bio->bi_status = BLK_STS_AGAIN;
        bio_endio(bio);
 }
 
@@ -426,6 +429,7 @@ extern void bio_advance(struct bio *, unsigned);
 
 extern void bio_init(struct bio *bio, struct bio_vec *table,
                     unsigned short max_vecs);
+extern void bio_uninit(struct bio *);
 extern void bio_reset(struct bio *);
 void bio_chain(struct bio *, struct bio *);
 
index fcd6410..23d32ff 100644 (file)
@@ -39,8 +39,6 @@ struct blk_mq_hw_ctx {
        struct blk_mq_tags      *tags;
        struct blk_mq_tags      *sched_tags;
 
-       struct srcu_struct      queue_rq_srcu;
-
        unsigned long           queued;
        unsigned long           run;
 #define BLK_MQ_MAX_DISPATCH_ORDER      7
@@ -62,6 +60,9 @@ struct blk_mq_hw_ctx {
        struct dentry           *debugfs_dir;
        struct dentry           *sched_debugfs_dir;
 #endif
+
+       /* Must be the last member - see also blk_mq_hw_ctx_size(). */
+       struct srcu_struct      queue_rq_srcu[0];
 };
 
 struct blk_mq_tag_set {
@@ -87,7 +88,8 @@ struct blk_mq_queue_data {
        bool last;
 };
 
-typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
+typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
+               const struct blk_mq_queue_data *);
 typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
 typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
@@ -142,6 +144,8 @@ struct blk_mq_ops {
        init_request_fn         *init_request;
        exit_request_fn         *exit_request;
        reinit_request_fn       *reinit_request;
+       /* Called from inside blk_get_request() */
+       void (*initialize_rq_fn)(struct request *rq);
 
        map_queues_fn           *map_queues;
 
@@ -155,10 +159,6 @@ struct blk_mq_ops {
 };
 
 enum {
-       BLK_MQ_RQ_QUEUE_OK      = 0,    /* queued fine */
-       BLK_MQ_RQ_QUEUE_BUSY    = 1,    /* requeue IO for later */
-       BLK_MQ_RQ_QUEUE_ERROR   = 2,    /* end IO with error */
-
        BLK_MQ_F_SHOULD_MERGE   = 1 << 0,
        BLK_MQ_F_TAG_SHARED     = 1 << 1,
        BLK_MQ_F_SG_MERGE       = 1 << 2,
@@ -204,10 +204,10 @@ enum {
        BLK_MQ_REQ_INTERNAL     = (1 << 2), /* allocate internal/sched tag */
 };
 
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
                unsigned int flags);
-struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op,
-               unsigned int flags, unsigned int hctx_idx);
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
+               unsigned int op, unsigned int flags, unsigned int hctx_idx);
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 
 enum {
@@ -230,8 +230,8 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
 
 int blk_mq_request_started(struct request *rq);
 void blk_mq_start_request(struct request *rq);
-void blk_mq_end_request(struct request *rq, int error);
-void __blk_mq_end_request(struct request *rq, int error);
+void blk_mq_end_request(struct request *rq, blk_status_t error);
+void __blk_mq_end_request(struct request *rq, blk_status_t error);
 
 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
@@ -247,6 +247,8 @@ void blk_mq_stop_hw_queues(struct request_queue *q);
 void blk_mq_start_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
+void blk_mq_quiesce_queue(struct request_queue *q);
+void blk_mq_unquiesce_queue(struct request_queue *q);
 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
@@ -264,6 +266,8 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
 int blk_mq_map_queues(struct blk_mq_tag_set *set);
 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
 
+void blk_mq_quiesce_queue_nowait(struct request_queue *q);
+
 /*
  * Driver command data is immediately after the request. So subtract request
  * size to get back to the original request, add request size to get the PDU.
index 61339bc..d2eb87c 100644 (file)
@@ -17,6 +17,27 @@ struct io_context;
 struct cgroup_subsys_state;
 typedef void (bio_end_io_t) (struct bio *);
 
+/*
+ * Block error status values.  See block/blk-core:blk_errors for the details.
+ */
+typedef u8 __bitwise blk_status_t;
+#define        BLK_STS_OK 0
+#define BLK_STS_NOTSUPP                ((__force blk_status_t)1)
+#define BLK_STS_TIMEOUT                ((__force blk_status_t)2)
+#define BLK_STS_NOSPC          ((__force blk_status_t)3)
+#define BLK_STS_TRANSPORT      ((__force blk_status_t)4)
+#define BLK_STS_TARGET         ((__force blk_status_t)5)
+#define BLK_STS_NEXUS          ((__force blk_status_t)6)
+#define BLK_STS_MEDIUM         ((__force blk_status_t)7)
+#define BLK_STS_PROTECTION     ((__force blk_status_t)8)
+#define BLK_STS_RESOURCE       ((__force blk_status_t)9)
+#define BLK_STS_IOERR          ((__force blk_status_t)10)
+
+/* hack for device mapper, don't use elsewhere: */
+#define BLK_STS_DM_REQUEUE    ((__force blk_status_t)11)
+
+#define BLK_STS_AGAIN          ((__force blk_status_t)12)
+
 struct blk_issue_stat {
        u64 stat;
 };
@@ -28,13 +49,14 @@ struct blk_issue_stat {
 struct bio {
        struct bio              *bi_next;       /* request queue link */
        struct block_device     *bi_bdev;
-       int                     bi_error;
+       blk_status_t            bi_status;
        unsigned int            bi_opf;         /* bottom bits req flags,
                                                 * top bits REQ_OP. Use
                                                 * accessors.
                                                 */
        unsigned short          bi_flags;       /* status, etc and bvec pool number */
        unsigned short          bi_ioprio;
+       unsigned short          bi_write_hint;
 
        struct bvec_iter        bi_iter;
 
@@ -205,6 +227,7 @@ enum req_flag_bits {
        /* command specific flags for REQ_OP_WRITE_ZEROES: */
        __REQ_NOUNMAP,          /* do not free blocks when zeroing */
 
+       __REQ_NOWAIT,           /* Don't wait if request will block */
        __REQ_NR_BITS,          /* stops here */
 };
 
@@ -223,6 +246,7 @@ enum req_flag_bits {
 #define REQ_BACKGROUND         (1ULL << __REQ_BACKGROUND)
 
 #define REQ_NOUNMAP            (1ULL << __REQ_NOUNMAP)
+#define REQ_NOWAIT             (1ULL << __REQ_NOWAIT)
 
 #define REQ_FAILFAST_MASK \
        (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
index ab92c4e..25f6a0c 100644 (file)
@@ -55,7 +55,7 @@ struct blk_stat_callback;
  */
 #define BLKCG_MAX_POLS         3
 
-typedef void (rq_end_io_fn)(struct request *, int);
+typedef void (rq_end_io_fn)(struct request *, blk_status_t);
 
 #define BLK_RL_SYNCFULL                (1U << 0)
 #define BLK_RL_ASYNCFULL       (1U << 1)
@@ -225,6 +225,8 @@ struct request {
 
        unsigned int extra_len; /* length of alignment and padding */
 
+       unsigned short write_hint;
+
        unsigned long deadline;
        struct list_head timeout_list;
 
@@ -391,6 +393,8 @@ struct request_queue {
        int                     nr_rqs[2];      /* # allocated [a]sync rqs */
        int                     nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
 
+       atomic_t                shared_hctx_restart;
+
        struct blk_queue_stats  *stats;
        struct rq_wb            *rq_wb;
 
@@ -410,8 +414,12 @@ struct request_queue {
        rq_timed_out_fn         *rq_timed_out_fn;
        dma_drain_needed_fn     *dma_drain_needed;
        lld_busy_fn             *lld_busy_fn;
+       /* Called just after a request is allocated */
        init_rq_fn              *init_rq_fn;
+       /* Called just before a request is freed */
        exit_rq_fn              *exit_rq_fn;
+       /* Called from inside blk_get_request() */
+       void (*initialize_rq_fn)(struct request *rq);
 
        const struct blk_mq_ops *mq_ops;
 
@@ -586,6 +594,11 @@ struct request_queue {
 
        size_t                  cmd_size;
        void                    *rq_alloc_data;
+
+       struct work_struct      release_work;
+
+#define BLK_MAX_WRITE_HINTS    5
+       u64                     write_hints[BLK_MAX_WRITE_HINTS];
 };
 
 #define QUEUE_FLAG_QUEUED      1       /* uses generic tag queueing */
@@ -618,6 +631,8 @@ struct request_queue {
 #define QUEUE_FLAG_STATS       27      /* track rq completion times */
 #define QUEUE_FLAG_POLL_STATS  28      /* collecting stats for hybrid polling */
 #define QUEUE_FLAG_REGISTERED  29      /* queue has been registered to a disk */
+#define QUEUE_FLAG_SCSI_PASSTHROUGH 30 /* queue supports SCSI commands */
+#define QUEUE_FLAG_QUIESCED    31      /* queue has been quiesced */
 
 #define QUEUE_FLAG_DEFAULT     ((1 << QUEUE_FLAG_IO_STAT) |            \
                                 (1 << QUEUE_FLAG_STACKABLE)    |       \
@@ -629,6 +644,13 @@ struct request_queue {
                                 (1 << QUEUE_FLAG_SAME_COMP)    |       \
                                 (1 << QUEUE_FLAG_POLL))
 
+/*
+ * @q->queue_lock is set while a queue is being initialized. Since we know
+ * that no other threads access the queue object before @q->queue_lock has
+ * been set, it is safe to manipulate queue flags without holding the
+ * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and
+ * blk_init_allocated_queue().
+ */
 static inline void queue_lockdep_assert_held(struct request_queue *q)
 {
        if (q->queue_lock)
@@ -708,10 +730,13 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 #define blk_queue_secure_erase(q) \
        (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
 #define blk_queue_dax(q)       test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
+#define blk_queue_scsi_passthrough(q)  \
+       test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
 
 #define blk_noretry_request(rq) \
        ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
                             REQ_FAILFAST_DRIVER))
+#define blk_queue_quiesced(q)  test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
 
 static inline bool blk_account_rq(struct request *rq)
 {
@@ -810,7 +835,8 @@ static inline bool rq_mergeable(struct request *rq)
 
 static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
 {
-       if (bio_data(a) == bio_data(b))
+       if (bio_page(a) == bio_page(b) &&
+           bio_offset(a) == bio_offset(b))
                return true;
 
        return false;
@@ -858,19 +884,6 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn;
 #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ)
 #define BLK_MIN_SG_TIMEOUT     (7 * HZ)
 
-#ifdef CONFIG_BOUNCE
-extern int init_emergency_isa_pool(void);
-extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
-#else
-static inline int init_emergency_isa_pool(void)
-{
-       return 0;
-}
-static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
-{
-}
-#endif /* CONFIG_MMU */
-
 struct rq_map_data {
        struct page **pages;
        int page_order;
@@ -929,7 +942,8 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
-extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
+extern struct request *blk_get_request(struct request_queue *, unsigned int op,
+                                      gfp_t gfp_mask);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
@@ -937,12 +951,11 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
                             int (*bio_ctr)(struct bio *, struct bio *, void *),
                             void *data);
 extern void blk_rq_unprep_clone(struct request *rq);
-extern int blk_insert_cloned_request(struct request_queue *q,
+extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
                                     struct request *rq);
 extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
-extern void blk_queue_split(struct request_queue *, struct bio **,
-                           struct bio_set *);
+extern void blk_queue_split(struct request_queue *, struct bio **);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
 extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
 extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
@@ -963,7 +976,6 @@ extern void __blk_run_queue(struct request_queue *q);
 extern void __blk_run_queue_uncond(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
 extern void blk_run_queue_async(struct request_queue *q);
-extern void blk_mq_quiesce_queue(struct request_queue *q);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
                           struct rq_map_data *, void __user *, unsigned long,
                           gfp_t);
@@ -977,6 +989,9 @@ extern void blk_execute_rq(struct request_queue *, struct gendisk *,
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
                                  struct request *, int, rq_end_io_fn *);
 
+int blk_status_to_errno(blk_status_t status);
+blk_status_t errno_to_blk_status(int errno);
+
 bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
@@ -1109,16 +1124,16 @@ extern struct request *blk_fetch_request(struct request_queue *q);
  * blk_end_request() for parts of the original function.
  * This prevents code duplication in drivers.
  */
-extern bool blk_update_request(struct request *rq, int error,
+extern bool blk_update_request(struct request *rq, blk_status_t error,
                               unsigned int nr_bytes);
-extern void blk_finish_request(struct request *rq, int error);
-extern bool blk_end_request(struct request *rq, int error,
+extern void blk_finish_request(struct request *rq, blk_status_t error);
+extern bool blk_end_request(struct request *rq, blk_status_t error,
                            unsigned int nr_bytes);
-extern void blk_end_request_all(struct request *rq, int error);
-extern bool __blk_end_request(struct request *rq, int error,
+extern void blk_end_request_all(struct request *rq, blk_status_t error);
+extern bool __blk_end_request(struct request *rq, blk_status_t error,
                              unsigned int nr_bytes);
-extern void __blk_end_request_all(struct request *rq, int error);
-extern bool __blk_end_request_cur(struct request *rq, int error);
+extern void __blk_end_request_all(struct request *rq, blk_status_t error);
+extern bool __blk_end_request_cur(struct request *rq, blk_status_t error);
 
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
@@ -1370,11 +1385,6 @@ enum blk_default_limits {
 
 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 
-static inline unsigned long queue_bounce_pfn(struct request_queue *q)
-{
-       return q->limits.bounce_pfn;
-}
-
 static inline unsigned long queue_segment_boundary(struct request_queue *q)
 {
        return q->limits.seg_boundary_mask;
@@ -1776,7 +1786,7 @@ struct blk_integrity_iter {
        const char              *disk_name;
 };
 
-typedef int (integrity_processing_fn) (struct blk_integrity_iter *);
+typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
 
 struct blk_integrity_profile {
        integrity_processing_fn         *generate_fn;
index fccf7f4..bbb3712 100644 (file)
@@ -27,7 +27,7 @@ struct cleancache_filekey {
 
 struct cleancache_ops {
        int (*init_fs)(size_t);
-       int (*init_shared_fs)(char *uuid, size_t);
+       int (*init_shared_fs)(uuid_t *uuid, size_t);
        int (*get_page)(int, struct cleancache_filekey,
                        pgoff_t, struct page *);
        void (*put_page)(int, struct cleancache_filekey,
index 2319b8c..c967090 100644 (file)
@@ -74,7 +74,8 @@ extern void config_item_init_type_name(struct config_item *item,
                                       const char *name,
                                       struct config_item_type *type);
 
-extern struct config_item * config_item_get(struct config_item *);
+extern struct config_item *config_item_get(struct config_item *);
+extern struct config_item *config_item_get_unless_zero(struct config_item *);
 extern void config_item_put(struct config_item *);
 
 struct config_item_type {
index f4c639c..456da50 100644 (file)
@@ -72,9 +72,9 @@ typedef void (*dm_release_clone_request_fn) (struct request *clone);
  * 2   : The target wants to push back the io
  */
 typedef int (*dm_endio_fn) (struct dm_target *ti,
-                           struct bio *bio, int error);
+                           struct bio *bio, blk_status_t *error);
 typedef int (*dm_request_endio_fn) (struct dm_target *ti,
-                                   struct request *clone, int error,
+                                   struct request *clone, blk_status_t error,
                                    union map_info *map_context);
 
 typedef void (*dm_presuspend_fn) (struct dm_target *ti);
index 5e9c74c..9bbf21a 100644 (file)
@@ -136,7 +136,7 @@ static inline int dmi_name_in_vendors(const char *s) { return 0; }
 static inline int dmi_name_in_serial(const char *s) { return 0; }
 #define dmi_available 0
 static inline int dmi_walk(void (*decode)(const struct dmi_header *, void *),
-       void *private_data) { return -1; }
+       void *private_data) { return -ENXIO; }
 static inline bool dmi_match(enum dmi_field f, const char *str)
        { return false; }
 static inline void dmi_memdev_name(u16 handle, const char **bank,
index 0e306c5..5bc8f86 100644 (file)
@@ -104,8 +104,9 @@ struct elevator_mq_ops {
        int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
        void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
        void (*requests_merged)(struct request_queue *, struct request *, struct request *);
-       struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
-       void (*put_request)(struct request *);
+       void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
+       void (*prepare_request)(struct request *, struct bio *bio);
+       void (*finish_request)(struct request *);
        void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
        struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
        bool (*has_work)(struct blk_mq_hw_ctx *);
@@ -114,8 +115,6 @@ struct elevator_mq_ops {
        void (*requeue_request)(struct request *);
        struct request *(*former_request)(struct request_queue *, struct request *);
        struct request *(*next_request)(struct request_queue *, struct request *);
-       int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *);
-       void (*put_rq_priv)(struct request_queue *, struct request *);
        void (*init_icq)(struct io_cq *);
        void (*exit_icq)(struct io_cq *);
 };
index 803e5a9..65adbdd 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/rwsem.h>
 #include <linux/capability.h>
 #include <linux/semaphore.h>
+#include <linux/fcntl.h>
 #include <linux/fiemap.h>
 #include <linux/rculist_bl.h>
 #include <linux/atomic.h>
@@ -30,6 +31,7 @@
 #include <linux/percpu-rwsem.h>
 #include <linux/workqueue.h>
 #include <linux/delayed_call.h>
+#include <linux/uuid.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -142,6 +144,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY         ((__force fmode_t)0x4000000)
 
+/* File is capable of returning -EAGAIN if AIO will block */
+#define FMODE_AIO_NOWAIT       ((__force fmode_t)0x8000000)
+
 /*
  * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
  * that indicates that they should check the contents of the iovec are
@@ -261,6 +266,18 @@ struct page;
 struct address_space;
 struct writeback_control;
 
+/*
+ * Write life time hint values.
+ */
+enum rw_hint {
+       WRITE_LIFE_NOT_SET      = 0,
+       WRITE_LIFE_NONE         = RWH_WRITE_LIFE_NONE,
+       WRITE_LIFE_SHORT        = RWH_WRITE_LIFE_SHORT,
+       WRITE_LIFE_MEDIUM       = RWH_WRITE_LIFE_MEDIUM,
+       WRITE_LIFE_LONG         = RWH_WRITE_LIFE_LONG,
+       WRITE_LIFE_EXTREME      = RWH_WRITE_LIFE_EXTREME,
+};
+
 #define IOCB_EVENTFD           (1 << 0)
 #define IOCB_APPEND            (1 << 1)
 #define IOCB_DIRECT            (1 << 2)
@@ -268,6 +285,7 @@ struct writeback_control;
 #define IOCB_DSYNC             (1 << 4)
 #define IOCB_SYNC              (1 << 5)
 #define IOCB_WRITE             (1 << 6)
+#define IOCB_NOWAIT            (1 << 7)
 
 struct kiocb {
        struct file             *ki_filp;
@@ -275,6 +293,7 @@ struct kiocb {
        void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
        void                    *private;
        int                     ki_flags;
+       enum rw_hint            ki_hint;
 };
 
 static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -282,16 +301,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb)
        return kiocb->ki_complete == NULL;
 }
 
-static inline int iocb_flags(struct file *file);
-
-static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
-{
-       *kiocb = (struct kiocb) {
-               .ki_filp = filp,
-               .ki_flags = iocb_flags(filp),
-       };
-}
-
 /*
  * "descriptor" for what we're up to with a read.
  * This allows us to use the same read code yet
@@ -592,6 +601,7 @@ struct inode {
        spinlock_t              i_lock; /* i_blocks, i_bytes, maybe i_size */
        unsigned short          i_bytes;
        unsigned int            i_blkbits;
+       enum rw_hint            i_write_hint;
        blkcnt_t                i_blocks;
 
 #ifdef __NEED_I_SIZE_ORDERED
@@ -846,6 +856,7 @@ struct file {
         * Must not be taken from IRQ context.
         */
        spinlock_t              f_lock;
+       enum rw_hint            f_write_hint;
        atomic_long_t           f_count;
        unsigned int            f_flags;
        fmode_t                 f_mode;
@@ -1021,8 +1032,6 @@ struct file_lock_context {
 #define OFFT_OFFSET_MAX        INT_LIMIT(off_t)
 #endif
 
-#include <linux/fcntl.h>
-
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
 /*
@@ -1328,8 +1337,8 @@ struct super_block {
 
        struct sb_writers       s_writers;
 
-       char s_id[32];                          /* Informational name */
-       u8 s_uuid[16];                          /* UUID */
+       char                    s_id[32];       /* Informational name */
+       uuid_t                  s_uuid;         /* UUID */
 
        void                    *s_fs_info;     /* Filesystem private info */
        unsigned int            s_max_links;
@@ -1873,6 +1882,25 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode)
        return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
 }
 
+static inline enum rw_hint file_write_hint(struct file *file)
+{
+       if (file->f_write_hint != WRITE_LIFE_NOT_SET)
+               return file->f_write_hint;
+
+       return file_inode(file)->i_write_hint;
+}
+
+static inline int iocb_flags(struct file *file);
+
+static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
+{
+       *kiocb = (struct kiocb) {
+               .ki_filp = filp,
+               .ki_flags = iocb_flags(filp),
+               .ki_hint = file_write_hint(filp),
+       };
+}
+
 /*
  * Inode state bits.  Protected by inode->i_lock
  *
@@ -2517,6 +2545,8 @@ extern int filemap_fdatawait(struct address_space *);
 extern void filemap_fdatawait_keep_errors(struct address_space *);
 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
                                   loff_t lend);
+extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
+                                 loff_t lend);
 extern int filemap_write_and_wait(struct address_space *mapping);
 extern int filemap_write_and_wait_range(struct address_space *mapping,
                                        loff_t lstart, loff_t lend);
@@ -2843,7 +2873,7 @@ enum {
        DIO_SKIP_DIO_COUNT = 0x08,
 };
 
-void dio_end_io(struct bio *bio, int error);
+void dio_end_io(struct bio *bio);
 
 ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
                             struct block_device *bdev, struct iov_iter *iter,
@@ -3056,6 +3086,25 @@ static inline int iocb_flags(struct file *file)
        return res;
 }
 
+static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags)
+{
+       if (unlikely(flags & ~RWF_SUPPORTED))
+               return -EOPNOTSUPP;
+
+       if (flags & RWF_NOWAIT) {
+               if (!(ki->ki_filp->f_mode & FMODE_AIO_NOWAIT))
+                       return -EOPNOTSUPP;
+               ki->ki_flags |= IOCB_NOWAIT;
+       }
+       if (flags & RWF_HIPRI)
+               ki->ki_flags |= IOCB_HIPRI;
+       if (flags & RWF_DSYNC)
+               ki->ki_flags |= IOCB_DSYNC;
+       if (flags & RWF_SYNC)
+               ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+       return 0;
+}
+
 static inline ino_t parent_ino(struct dentry *dentry)
 {
        ino_t res;
index acff943..e619fae 100644 (file)
@@ -219,12 +219,6 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part)
        return NULL;
 }
 
-static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to)
-{
-       uuid_be_to_bin(uuid_str, (uuid_be *)to);
-       return 0;
-}
-
 static inline int disk_max_parts(struct gendisk *disk)
 {
        if (disk->flags & GENHD_FL_EXT_DEVT)
@@ -736,11 +730,6 @@ static inline dev_t blk_lookup_devt(const char *name, int partno)
        dev_t devt = MKDEV(0, 0);
        return devt;
 }
-
-static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to)
-{
-       return -EINVAL;
-}
 #endif /* CONFIG_BLOCK */
 
 #endif /* _LINUX_GENHD_H */
index 661e5c2..082dc1b 100644 (file)
@@ -167,7 +167,6 @@ static inline void hash_del_rcu(struct hlist_node *node)
 /**
  * hash_for_each_possible_rcu - iterate over all possible objects hashing to the
  * same bucket in an rcu enabled hashtable
- * in a rcu enabled hashtable
  * @name: hashtable to iterate
  * @obj: the type * to use as a loop cursor for each entry
  * @member: the name of the hlist_node within the struct
index 6980ca3..dc152e4 100644 (file)
@@ -671,7 +671,7 @@ struct ide_port_ops {
        void    (*init_dev)(ide_drive_t *);
        void    (*set_pio_mode)(struct hwif_s *, ide_drive_t *);
        void    (*set_dma_mode)(struct hwif_s *, ide_drive_t *);
-       int     (*reset_poll)(ide_drive_t *);
+       blk_status_t (*reset_poll)(ide_drive_t *);
        void    (*pre_reset)(ide_drive_t *);
        void    (*resetproc)(ide_drive_t *);
        void    (*maskproc)(ide_drive_t *, int);
@@ -1092,7 +1092,7 @@ int generic_ide_ioctl(ide_drive_t *, struct block_device *, unsigned, unsigned l
 extern int ide_vlb_clk;
 extern int ide_pci_clk;
 
-int ide_end_rq(ide_drive_t *, struct request *, int, unsigned int);
+int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int);
 void ide_kill_rq(ide_drive_t *, struct request *);
 
 void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int);
@@ -1123,7 +1123,7 @@ extern int ide_devset_execute(ide_drive_t *drive,
                              const struct ide_devset *setting, int arg);
 
 void ide_complete_cmd(ide_drive_t *, struct ide_cmd *, u8, u8);
-int ide_complete_rq(ide_drive_t *, int, unsigned int);
+int ide_complete_rq(ide_drive_t *, blk_status_t, unsigned int);
 
 void ide_tf_readback(ide_drive_t *drive, struct ide_cmd *cmd);
 void ide_tf_dump(const char *, struct ide_cmd *);
index f753e78..69f4e94 100644 (file)
@@ -52,6 +52,7 @@ struct iomap {
 #define IOMAP_REPORT           (1 << 2) /* report extent status, e.g. FIEMAP */
 #define IOMAP_FAULT            (1 << 3) /* mapping for page fault */
 #define IOMAP_DIRECT           (1 << 4) /* direct I/O */
+#define IOMAP_NOWAIT           (1 << 5) /* Don't wait for writeback */
 
 struct iomap_ops {
        /*
index b892e95..6f543a4 100644 (file)
@@ -1393,12 +1393,6 @@ int clear_page_dirty_for_io(struct page *page);
 
 int get_cmdline(struct task_struct *task, char *buffer, int buflen);
 
-/* Is the vma a continuation of the stack vma above it? */
-static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
-{
-       return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
-}
-
 static inline bool vma_is_anonymous(struct vm_area_struct *vma)
 {
        return !vma->vm_ops;
@@ -1414,28 +1408,6 @@ bool vma_is_shmem(struct vm_area_struct *vma);
 static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
 #endif
 
-static inline int stack_guard_page_start(struct vm_area_struct *vma,
-                                            unsigned long addr)
-{
-       return (vma->vm_flags & VM_GROWSDOWN) &&
-               (vma->vm_start == addr) &&
-               !vma_growsdown(vma->vm_prev, addr);
-}
-
-/* Is the vma a continuation of the stack vma below it? */
-static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
-{
-       return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
-}
-
-static inline int stack_guard_page_end(struct vm_area_struct *vma,
-                                          unsigned long addr)
-{
-       return (vma->vm_flags & VM_GROWSUP) &&
-               (vma->vm_end == addr) &&
-               !vma_growsup(vma->vm_next, addr);
-}
-
 int vma_is_stack_for_current(struct vm_area_struct *vma);
 
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
@@ -2222,6 +2194,7 @@ void page_cache_async_readahead(struct address_space *mapping,
                                pgoff_t offset,
                                unsigned long size);
 
+extern unsigned long stack_guard_gap;
 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
 
@@ -2250,6 +2223,30 @@ static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * m
        return vma;
 }
 
+static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
+{
+       unsigned long vm_start = vma->vm_start;
+
+       if (vma->vm_flags & VM_GROWSDOWN) {
+               vm_start -= stack_guard_gap;
+               if (vm_start > vma->vm_start)
+                       vm_start = 0;
+       }
+       return vm_start;
+}
+
+static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
+{
+       unsigned long vm_end = vma->vm_end;
+
+       if (vma->vm_flags & VM_GROWSUP) {
+               vm_end += stack_guard_gap;
+               if (vm_end < vma->vm_end)
+                       vm_end = -PAGE_SIZE;
+       }
+       return vm_end;
+}
+
 static inline unsigned long vma_pages(struct vm_area_struct *vma)
 {
        return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
index 6be1949..1ee7b30 100644 (file)
@@ -457,7 +457,7 @@ enum hwparam_type {
        hwparam_ioport,         /* Module parameter configures an I/O port */
        hwparam_iomem,          /* Module parameter configures an I/O mem address */
        hwparam_ioport_or_iomem, /* Module parameter could be either, depending on other option */
-       hwparam_irq,            /* Module parameter configures an I/O port */
+       hwparam_irq,            /* Module parameter configures an IRQ */
        hwparam_dma,            /* Module parameter configures a DMA channel */
        hwparam_dma_addr,       /* Module parameter configures a DMA buffer address */
        hwparam_other,          /* Module parameter configures some other value */
index 3f39d27..4ed952c 100644 (file)
@@ -914,8 +914,7 @@ struct xfrmdev_ops {
  *
  * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu);
  *     Called when a user wants to change the Maximum Transfer Unit
- *     of a device. If not defined, any request to change MTU will
- *     will return an error.
+ *     of a device.
  *
  * void (*ndo_tx_timeout)(struct net_device *dev);
  *     Callback used when the transmitter has not made any progress
@@ -1596,8 +1595,8 @@ enum netdev_priv_flags {
  *     @rtnl_link_state:       This enum represents the phases of creating
  *                             a new link
  *
- *     @destructor:            Called from unregister,
- *                             can be used to call free_netdev
+ *     @needs_free_netdev:     Should unregister perform free_netdev?
+ *     @priv_destructor:       Called from unregister
  *     @npinfo:                XXX: need comments on this one
  *     @nd_net:                Network namespace this network device is inside
  *
@@ -1858,7 +1857,8 @@ struct net_device {
                RTNL_LINK_INITIALIZING,
        } rtnl_link_state:16;
 
-       void (*destructor)(struct net_device *dev);
+       bool needs_free_netdev;
+       void (*priv_destructor)(struct net_device *dev);
 
 #ifdef CONFIG_NETPOLL
        struct netpoll_info __rcu       *npinfo;
@@ -4261,6 +4261,11 @@ static inline const char *netdev_name(const struct net_device *dev)
        return dev->name;
 }
 
+static inline bool netdev_unregistering(const struct net_device *dev)
+{
+       return dev->reg_state == NETREG_UNREGISTERING;
+}
+
 static inline const char *netdev_reg_state(const struct net_device *dev)
 {
        switch (dev->reg_state) {
index e997c4a..bc711a1 100644 (file)
@@ -177,7 +177,6 @@ struct fcnvme_lsdesc_rjt {
 };
 
 
-#define FCNVME_ASSOC_HOSTID_LEN                16
 #define FCNVME_ASSOC_HOSTNQN_LEN       256
 #define FCNVME_ASSOC_SUBNQN_LEN                256
 
@@ -191,7 +190,7 @@ struct fcnvme_lsdesc_cr_assoc_cmd {
        __be16  cntlid;
        __be16  sqsize;
        __be32  rsvd52;
-       u8      hostid[FCNVME_ASSOC_HOSTID_LEN];
+       uuid_t  hostid;
        u8      hostnqn[FCNVME_ASSOC_HOSTNQN_LEN];
        u8      subnqn[FCNVME_ASSOC_SUBNQN_LEN];
        u8      rsvd632[384];
index b625bac..6b8ee9e 100644 (file)
@@ -16,6 +16,7 @@
 #define _LINUX_NVME_H
 
 #include <linux/types.h>
+#include <linux/uuid.h>
 
 /* NQN names in commands fields specified one size */
 #define NVMF_NQN_FIELD_LEN     256
@@ -86,7 +87,7 @@ enum {
        NVMF_RDMA_CMS_RDMA_CM   = 1, /* Sockets based endpoint addressing */
 };
 
-#define NVMF_AQ_DEPTH          32
+#define NVME_AQ_DEPTH          32
 
 enum {
        NVME_REG_CAP    = 0x0000,       /* Controller Capabilities */
@@ -101,6 +102,7 @@ enum {
        NVME_REG_ACQ    = 0x0030,       /* Admin CQ Base Address */
        NVME_REG_CMBLOC = 0x0038,       /* Controller Memory Buffer Location */
        NVME_REG_CMBSZ  = 0x003c,       /* Controller Memory Buffer Size */
+       NVME_REG_DBS    = 0x1000,       /* SQ 0 Tail Doorbell */
 };
 
 #define NVME_CAP_MQES(cap)     ((cap) & 0xffff)
@@ -207,9 +209,15 @@ struct nvme_id_ctrl {
        __u8                    tnvmcap[16];
        __u8                    unvmcap[16];
        __le32                  rpmbs;
-       __u8                    rsvd316[4];
+       __le16                  edstt;
+       __u8                    dsto;
+       __u8                    fwug;
        __le16                  kas;
-       __u8                    rsvd322[190];
+       __le16                  hctma;
+       __le16                  mntmt;
+       __le16                  mxtmt;
+       __le32                  sanicap;
+       __u8                    rsvd332[180];
        __u8                    sqes;
        __u8                    cqes;
        __le16                  maxcmd;
@@ -245,6 +253,7 @@ enum {
        NVME_CTRL_ONCS_WRITE_ZEROES             = 1 << 3,
        NVME_CTRL_VWC_PRESENT                   = 1 << 0,
        NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
+       NVME_CTRL_OACS_DIRECTIVES               = 1 << 5,
        NVME_CTRL_OACS_DBBUF_SUPP               = 1 << 7,
 };
 
@@ -274,7 +283,7 @@ struct nvme_id_ns {
        __le16                  nabsn;
        __le16                  nabo;
        __le16                  nabspf;
-       __u16                   rsvd46;
+       __le16                  noiob;
        __u8                    nvmcap[16];
        __u8                    rsvd64[40];
        __u8                    nguid[16];
@@ -288,6 +297,7 @@ enum {
        NVME_ID_CNS_NS                  = 0x00,
        NVME_ID_CNS_CTRL                = 0x01,
        NVME_ID_CNS_NS_ACTIVE_LIST      = 0x02,
+       NVME_ID_CNS_NS_DESC_LIST        = 0x03,
        NVME_ID_CNS_NS_PRESENT_LIST     = 0x10,
        NVME_ID_CNS_NS_PRESENT          = 0x11,
        NVME_ID_CNS_CTRL_NS_LIST        = 0x12,
@@ -295,6 +305,19 @@ enum {
 };
 
 enum {
+       NVME_DIR_IDENTIFY               = 0x00,
+       NVME_DIR_STREAMS                = 0x01,
+       NVME_DIR_SND_ID_OP_ENABLE       = 0x01,
+       NVME_DIR_SND_ST_OP_REL_ID       = 0x01,
+       NVME_DIR_SND_ST_OP_REL_RSC      = 0x02,
+       NVME_DIR_RCV_ID_OP_PARAM        = 0x01,
+       NVME_DIR_RCV_ST_OP_PARAM        = 0x01,
+       NVME_DIR_RCV_ST_OP_STATUS       = 0x02,
+       NVME_DIR_RCV_ST_OP_RESOURCE     = 0x03,
+       NVME_DIR_ENDIR                  = 0x01,
+};
+
+enum {
        NVME_NS_FEAT_THIN       = 1 << 0,
        NVME_NS_FLBAS_LBA_MASK  = 0xf,
        NVME_NS_FLBAS_META_EXT  = 0x10,
@@ -314,6 +337,22 @@ enum {
        NVME_NS_DPS_PI_TYPE3    = 3,
 };
 
+struct nvme_ns_id_desc {
+       __u8 nidt;
+       __u8 nidl;
+       __le16 reserved;
+};
+
+#define NVME_NIDT_EUI64_LEN    8
+#define NVME_NIDT_NGUID_LEN    16
+#define NVME_NIDT_UUID_LEN     16
+
+enum {
+       NVME_NIDT_EUI64         = 0x01,
+       NVME_NIDT_NGUID         = 0x02,
+       NVME_NIDT_UUID          = 0x03,
+};
+
 struct nvme_smart_log {
        __u8                    critical_warning;
        __u8                    temperature[2];
@@ -535,6 +574,7 @@ enum {
        NVME_RW_PRINFO_PRCHK_APP        = 1 << 11,
        NVME_RW_PRINFO_PRCHK_GUARD      = 1 << 12,
        NVME_RW_PRINFO_PRACT            = 1 << 13,
+       NVME_RW_DTYPE_STREAMS           = 1 << 4,
 };
 
 struct nvme_dsm_cmd {
@@ -586,6 +626,11 @@ struct nvme_feat_auto_pst {
        __le64 entries[32];
 };
 
+enum {
+       NVME_HOST_MEM_ENABLE    = (1 << 0),
+       NVME_HOST_MEM_RETURN    = (1 << 1),
+};
+
 /* Admin commands */
 
 enum nvme_admin_opcode {
@@ -604,6 +649,8 @@ enum nvme_admin_opcode {
        nvme_admin_download_fw          = 0x11,
        nvme_admin_ns_attach            = 0x15,
        nvme_admin_keep_alive           = 0x18,
+       nvme_admin_directive_send       = 0x19,
+       nvme_admin_directive_recv       = 0x1a,
        nvme_admin_dbbuf                = 0x7C,
        nvme_admin_format_nvm           = 0x80,
        nvme_admin_security_send        = 0x81,
@@ -658,6 +705,8 @@ struct nvme_identify {
        __u32                   rsvd11[5];
 };
 
+#define NVME_IDENTIFY_DATA_SIZE 4096
+
 struct nvme_features {
        __u8                    opcode;
        __u8                    flags;
@@ -667,7 +716,16 @@ struct nvme_features {
        union nvme_data_ptr     dptr;
        __le32                  fid;
        __le32                  dword11;
-       __u32                   rsvd12[4];
+       __le32                  dword12;
+       __le32                  dword13;
+       __le32                  dword14;
+       __le32                  dword15;
+};
+
+struct nvme_host_mem_buf_desc {
+       __le64                  addr;
+       __le32                  size;
+       __u32                   rsvd;
 };
 
 struct nvme_create_cq {
@@ -756,6 +814,24 @@ struct nvme_get_log_page_command {
        __u32                   rsvd14[2];
 };
 
+struct nvme_directive_cmd {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __le32                  nsid;
+       __u64                   rsvd2[2];
+       union nvme_data_ptr     dptr;
+       __le32                  numd;
+       __u8                    doper;
+       __u8                    dtype;
+       __le16                  dspec;
+       __u8                    endir;
+       __u8                    tdtype;
+       __u16                   rsvd15;
+
+       __u32                   rsvd16[3];
+};
+
 /*
  * Fabrics subcommands.
  */
@@ -843,7 +919,7 @@ struct nvmf_connect_command {
 };
 
 struct nvmf_connect_data {
-       __u8            hostid[16];
+       uuid_t          hostid;
        __le16          cntlid;
        char            resv4[238];
        char            subsysnqn[NVMF_NQN_FIELD_LEN];
@@ -886,6 +962,18 @@ struct nvme_dbbuf {
        __u32                   rsvd12[6];
 };
 
+struct streams_directive_params {
+       __u16   msl;
+       __u16   nssa;
+       __u16   nsso;
+       __u8    rsvd[10];
+       __u32   sws;
+       __u16   sgs;
+       __u16   nsa;
+       __u16   nso;
+       __u8    rsvd2[6];
+};
+
 struct nvme_command {
        union {
                struct nvme_common_command common;
@@ -906,6 +994,7 @@ struct nvme_command {
                struct nvmf_property_set_command prop_set;
                struct nvmf_property_get_command prop_get;
                struct nvme_dbbuf dbbuf;
+               struct nvme_directive_cmd directive;
        };
 };
 
@@ -1050,4 +1139,8 @@ struct nvme_completion {
 #define NVME_VS(major, minor, tertiary) \
        (((major) << 16) | ((minor) << 8) | (tertiary))
 
+#define NVME_MAJOR(ver)                ((ver) >> 16)
+#define NVME_MINOR(ver)                (((ver) >> 8) & 0xff)
+#define NVME_TERTIARY(ver)     ((ver) & 0xff)
+
 #endif /* _LINUX_NVME_H */
index 7a4e83a..dd86c97 100644 (file)
@@ -105,7 +105,7 @@ static inline void acpiphp_remove_slots(struct pci_bus *bus) { }
 static inline void acpiphp_check_host_bridge(struct acpi_device *adev) { }
 #endif
 
-extern const u8 pci_acpi_dsm_uuid[];
+extern const guid_t pci_acpi_dsm_guid;
 #define DEVICE_LABEL_DSM       0x07
 #define RESET_DELAY_DSM                0x08
 #define FUNCTION_DELAY_DSM     0x09
index cb3c8fe..4b3286a 100644 (file)
@@ -278,6 +278,8 @@ size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents,
                            const void *buf, size_t buflen, off_t skip);
 size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
                          void *buf, size_t buflen, off_t skip);
+size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
+                      size_t buflen, off_t skip);
 
 /*
  * Maximum number of entries that will be allocated in one piece, if
index 07ef550..93315d6 100644 (file)
@@ -84,6 +84,7 @@ struct kmem_cache {
        int red_left_pad;       /* Left redzone padding size */
 #ifdef CONFIG_SYSFS
        struct kobject kobj;    /* For sysfs */
+       struct work_struct kobj_remove_work;
 #endif
 #ifdef CONFIG_MEMCG
        struct memcg_cache_params memcg_params;
index 110f453..f7043cc 100644 (file)
@@ -29,7 +29,6 @@
  */
 struct tk_read_base {
        struct clocksource      *clock;
-       u64                     (*read)(struct clocksource *cs);
        u64                     mask;
        u64                     cycle_last;
        u32                     mult;
@@ -58,7 +57,7 @@ struct tk_read_base {
  *                     interval.
  * @xtime_remainder:   Shifted nano seconds left over when rounding
  *                     @cycle_interval
- * @raw_interval:      Raw nano seconds accumulated per NTP interval.
+ * @raw_interval:      Shifted raw nano seconds accumulated per NTP interval.
  * @ntp_error:         Difference between accumulated time and NTP time in ntp
  *                     shifted nano seconds.
  * @ntp_error_shift:   Shift conversion between clock shifted nano seconds and
@@ -100,7 +99,7 @@ struct timekeeper {
        u64                     cycle_interval;
        u64                     xtime_interval;
        s64                     xtime_remainder;
-       u32                     raw_interval;
+       u64                     raw_interval;
        /* The ntp_tick_length() value currently being used.
         * This cached copy ensures we consistently apply the tick
         * length for an entire tick, as ntp_tick_length may change
index 4dff73a..d1defe4 100644 (file)
 
 #include <uapi/linux/uuid.h>
 
-/*
- * V1 (time-based) UUID definition [RFC 4122].
- * - the timestamp is a 60-bit value, split 32/16/12, and goes in 100ns
- *   increments since midnight 15th October 1582
- *   - add AFS_UUID_TO_UNIX_TIME to convert unix time in 100ns units to UUID
- *     time
- * - the clock sequence is a 14-bit counter to avoid duplicate times
- */
-struct uuid_v1 {
-       __be32          time_low;                       /* low part of timestamp */
-       __be16          time_mid;                       /* mid part of timestamp */
-       __be16          time_hi_and_version;            /* high part of timestamp and version  */
-#define UUID_TO_UNIX_TIME      0x01b21dd213814000ULL
-#define UUID_TIMEHI_MASK       0x0fff
-#define UUID_VERSION_TIME      0x1000  /* time-based UUID */
-#define UUID_VERSION_NAME      0x3000  /* name-based UUID */
-#define UUID_VERSION_RANDOM    0x4000  /* (pseudo-)random generated UUID */
-       u8              clock_seq_hi_and_reserved;      /* clock seq hi and variant */
-#define UUID_CLOCKHI_MASK      0x3f
-#define UUID_VARIANT_STD       0x80
-       u8              clock_seq_low;                  /* clock seq low */
-       u8              node[6];                        /* spatially unique node ID (MAC addr) */
-};
+typedef struct {
+       __u8 b[16];
+} uuid_t;
+
+#define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)                     \
+((uuid_t)                                                              \
+{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \
+   ((b) >> 8) & 0xff, (b) & 0xff,                                      \
+   ((c) >> 8) & 0xff, (c) & 0xff,                                      \
+   (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
 
 /*
  * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee")
@@ -48,27 +35,73 @@ struct uuid_v1 {
  */
 #define        UUID_STRING_LEN         36
 
-static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2)
+extern const guid_t guid_null;
+extern const uuid_t uuid_null;
+
+static inline bool guid_equal(const guid_t *u1, const guid_t *u2)
+{
+       return memcmp(u1, u2, sizeof(guid_t)) == 0;
+}
+
+static inline void guid_copy(guid_t *dst, const guid_t *src)
+{
+       memcpy(dst, src, sizeof(guid_t));
+}
+
+static inline bool guid_is_null(const guid_t *guid)
+{
+       return guid_equal(guid, &guid_null);
+}
+
+static inline bool uuid_equal(const uuid_t *u1, const uuid_t *u2)
+{
+       return memcmp(u1, u2, sizeof(uuid_t)) == 0;
+}
+
+static inline void uuid_copy(uuid_t *dst, const uuid_t *src)
 {
-       return memcmp(&u1, &u2, sizeof(uuid_le));
+       memcpy(dst, src, sizeof(uuid_t));
 }
 
-static inline int uuid_be_cmp(const uuid_be u1, const uuid_be u2)
+static inline bool uuid_is_null(const uuid_t *uuid)
 {
-       return memcmp(&u1, &u2, sizeof(uuid_be));
+       return uuid_equal(uuid, &uuid_null);
 }
 
 void generate_random_uuid(unsigned char uuid[16]);
 
-extern void uuid_le_gen(uuid_le *u);
-extern void uuid_be_gen(uuid_be *u);
+extern void guid_gen(guid_t *u);
+extern void uuid_gen(uuid_t *u);
 
 bool __must_check uuid_is_valid(const char *uuid);
 
-extern const u8 uuid_le_index[16];
-extern const u8 uuid_be_index[16];
+extern const u8 guid_index[16];
+extern const u8 uuid_index[16];
+
+int guid_parse(const char *uuid, guid_t *u);
+int uuid_parse(const char *uuid, uuid_t *u);
+
+/* backwards compatibility, don't use in new code */
+typedef uuid_t uuid_be;
+#define UUID_BE(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
+       UUID_INIT(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7)
+#define NULL_UUID_BE                                                   \
+       UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,     \
+            0x00, 0x00, 0x00, 0x00)
 
-int uuid_le_to_bin(const char *uuid, uuid_le *u);
-int uuid_be_to_bin(const char *uuid, uuid_be *u);
+#define uuid_le_gen(u)         guid_gen(u)
+#define uuid_be_gen(u)         uuid_gen(u)
+#define uuid_le_to_bin(guid, u)        guid_parse(guid, u)
+#define uuid_be_to_bin(uuid, u)        uuid_parse(uuid, u)
+
+static inline int uuid_le_cmp(const guid_t u1, const guid_t u2)
+{
+       return memcmp(&u1, &u2, sizeof(guid_t));
+}
+
+static inline int uuid_be_cmp(const uuid_t u1, const uuid_t u2)
+{
+       return memcmp(&u1, &u2, sizeof(uuid_t));
+}
 
 #endif
index 413335c..298f996 100644 (file)
@@ -106,6 +106,16 @@ static inline void cec_notifier_set_phys_addr_from_edid(struct cec_notifier *n,
 {
 }
 
+static inline void cec_notifier_register(struct cec_notifier *n,
+                        struct cec_adapter *adap,
+                        void (*callback)(struct cec_adapter *adap, u16 pa))
+{
+}
+
+static inline void cec_notifier_unregister(struct cec_notifier *n)
+{
+}
+
 #endif
 
 #endif
index bfa88d4..201f060 100644 (file)
@@ -206,7 +206,7 @@ static inline bool cec_is_sink(const struct cec_adapter *adap)
 #define cec_phys_addr_exp(pa) \
        ((pa) >> 12), ((pa) >> 8) & 0xf, ((pa) >> 4) & 0xf, (pa) & 0xf
 
-#if IS_ENABLED(CONFIG_CEC_CORE)
+#if IS_REACHABLE(CONFIG_CEC_CORE)
 struct cec_adapter *cec_allocate_adapter(const struct cec_adap_ops *ops,
                void *priv, const char *name, u32 caps, u8 available_las);
 int cec_register_adapter(struct cec_adapter *adap, struct device *parent);
index 3459119..454ff76 100644 (file)
@@ -6,7 +6,7 @@
 struct net;
 
 #ifdef CONFIG_WEXT_CORE
-int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
                      void __user *arg);
 int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
                             unsigned long arg);
@@ -14,7 +14,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
 struct iw_statistics *get_wireless_stats(struct net_device *dev);
 int call_commit_handler(struct net_device *dev);
 #else
-static inline int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+static inline int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
                                    void __user *arg)
 {
        return -EINVAL;
index 7e7e2b0..62f5a25 100644 (file)
@@ -1850,8 +1850,9 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
 }
 #endif
 
-#ifdef CONFIG_XFRM_OFFLOAD
 void __net_init xfrm_dev_init(void);
+
+#ifdef CONFIG_XFRM_OFFLOAD
 int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features);
 int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
                       struct xfrm_user_offload *xuo);
@@ -1877,10 +1878,6 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
        }
 }
 #else
-static inline void __net_init xfrm_dev_init(void)
-{
-}
-
 static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
 {
        return 0;
index a09cca8..a29d308 100644 (file)
@@ -157,7 +157,7 @@ struct osd_request {
 
        osd_req_done_fn *async_done;
        void *async_private;
-       int async_error;
+       blk_status_t async_error;
        int req_errors;
 };
 
index b379f93..da9bf2b 100644 (file)
@@ -166,6 +166,7 @@ extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
 extern void scsi_kunmap_atomic_sg(void *virt);
 
 extern int scsi_init_io(struct scsi_cmnd *cmd);
+extern void scsi_initialize_rq(struct request *rq);
 
 extern int scsi_dma_map(struct scsi_cmnd *cmd);
 extern void scsi_dma_unmap(struct scsi_cmnd *cmd);
index f0c76f9..e0afa44 100644 (file)
@@ -27,6 +27,6 @@ static inline void scsi_req_free_cmd(struct scsi_request *req)
                kfree(req->cmd);
 }
 
-void scsi_req_init(struct request *);
+void scsi_req_init(struct scsi_request *req);
 
 #endif /* _SCSI_SCSI_REQUEST_H */
index 7caf44c..295cd3e 100644 (file)
@@ -112,24 +112,7 @@ enum machine_type {
 #define N_TXTADDR(x) (N_MAGIC(x) == QMAGIC ? PAGE_SIZE : 0)
 #endif
 
-/* Address of data segment in memory after it is loaded.
-   Note that it is up to you to define SEGMENT_SIZE
-   on machines not listed here.  */
-#if defined(vax) || defined(hp300) || defined(pyr)
-#define SEGMENT_SIZE page_size
-#endif
-#ifdef sony
-#define        SEGMENT_SIZE    0x2000
-#endif /* Sony.  */
-#ifdef is68k
-#define SEGMENT_SIZE 0x20000
-#endif
-#if defined(m68k) && defined(PORTAR)
-#define PAGE_SIZE 0x400
-#define SEGMENT_SIZE PAGE_SIZE
-#endif
-
-#ifdef linux
+/* Address of data segment in memory after it is loaded. */
 #ifndef __KERNEL__
 #include <unistd.h>
 #endif
@@ -142,7 +125,6 @@ enum machine_type {
 #endif
 #endif
 #endif
-#endif
 
 #define _N_SEGMENT_ROUND(x) ALIGN(x, SEGMENT_SIZE)
 
@@ -260,13 +242,7 @@ struct relocation_info
   unsigned int r_extern:1;
   /* Four bits that aren't used, but when writing an object file
      it is desirable to clear them.  */
-#ifdef NS32K
-  unsigned r_bsr:1;
-  unsigned r_disp:1;
-  unsigned r_pad:2;
-#else
   unsigned int r_pad:4;
-#endif
 };
 #endif /* no N_RELOCATION_INFO_DECLARED.  */
 
index bb2554f..a2d4a8a 100644 (file)
@@ -79,7 +79,7 @@ struct io_event {
 struct iocb {
        /* these are internal to the kernel/libc. */
        __u64   aio_data;       /* data to be returned in event's data */
-       __u32   PADDED(aio_key, aio_reserved1);
+       __u32   PADDED(aio_key, aio_rw_flags);
                                /* the kernel sets aio_key to the req # */
 
        /* common fields */
index 4bf9f1e..2f6c77a 100644 (file)
@@ -267,9 +267,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY    _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR       4
-#define DM_VERSION_MINOR       35
+#define DM_VERSION_MINOR       36
 #define DM_VERSION_PATCHLEVEL  0
-#define DM_VERSION_EXTRA       "-ioctl (2016-06-23)"
+#define DM_VERSION_EXTRA       "-ioctl (2017-06-09)"
 
 /* Status bits */
 #define DM_READONLY_FLAG       (1 << 0) /* In/Out */
index d179d77..7d4a594 100644 (file)
@@ -1486,8 +1486,10 @@ enum ethtool_link_mode_bit_indices {
  * it was forced up into this mode or autonegotiated.
  */
 
-/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal. */
-/* Update drivers/net/phy/phy.c:phy_speed_to_str() when adding new values */
+/* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal.
+ * Update drivers/net/phy/phy.c:phy_speed_to_str() and
+ * drivers/net/bonding/bond_3ad.c:__get_link_speed() when adding new values.
+ */
 #define SPEED_10               10
 #define SPEED_100              100
 #define SPEED_1000             1000
index 813afd6..ec69d55 100644 (file)
 /* (1U << 31) is reserved for signed error codes */
 
 /*
+ * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
+ * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
+ * the specific file.
+ */
+#define F_GET_RW_HINT          (F_LINUX_SPECIFIC_BASE + 11)
+#define F_SET_RW_HINT          (F_LINUX_SPECIFIC_BASE + 12)
+#define F_GET_FILE_RW_HINT     (F_LINUX_SPECIFIC_BASE + 13)
+#define F_SET_FILE_RW_HINT     (F_LINUX_SPECIFIC_BASE + 14)
+
+/*
+ * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
+ * used to clear any hints previously set.
+ */
+#define RWF_WRITE_LIFE_NOT_SET 0
+#define RWH_WRITE_LIFE_NONE    1
+#define RWH_WRITE_LIFE_SHORT   2
+#define RWH_WRITE_LIFE_MEDIUM  3
+#define RWH_WRITE_LIFE_LONG    4
+#define RWH_WRITE_LIFE_EXTREME 5
+
+/*
  * Types of directory notifications that may be requested.
  */
 #define DN_ACCESS      0x00000001      /* File accessed */
index 24e61a5..27d8c36 100644 (file)
@@ -360,5 +360,9 @@ struct fscrypt_key {
 #define RWF_HIPRI                      0x00000001 /* high priority request, poll if possible */
 #define RWF_DSYNC                      0x00000002 /* per-IO O_DSYNC */
 #define RWF_SYNC                       0x00000004 /* per-IO O_SYNC */
+#define RWF_NOWAIT                     0x00000008 /* per-IO, return -EAGAIN if operation would block */
+
+#define RWF_SUPPORTED                  (RWF_HIPRI | RWF_DSYNC | RWF_SYNC |\
+                                        RWF_NOWAIT)
 
 #endif /* _UAPI_LINUX_FS_H */
index c8125ec..a3960f9 100644 (file)
@@ -22,6 +22,7 @@ enum {
        LO_FLAGS_AUTOCLEAR      = 4,
        LO_FLAGS_PARTSCAN       = 8,
        LO_FLAGS_DIRECT_IO      = 16,
+       LO_FLAGS_BLOCKSIZE      = 32,
 };
 
 #include <asm/posix_types.h>   /* for __kernel_old_dev_t */
@@ -59,6 +60,8 @@ struct loop_info64 {
        __u64              lo_init[2];
 };
 
+#define LO_INFO_BLOCKSIZE(l) (l)->lo_init[0]
+
 /*
  * Loop filter types
  */
index 155e33f..a50527e 100644 (file)
@@ -41,10 +41,14 @@ enum {
 #define NBD_FLAG_HAS_FLAGS     (1 << 0) /* nbd-server supports flags */
 #define NBD_FLAG_READ_ONLY     (1 << 1) /* device is read-only */
 #define NBD_FLAG_SEND_FLUSH    (1 << 2) /* can flush writeback cache */
+#define NBD_FLAG_SEND_FUA      (1 << 3) /* send FUA (forced unit access) */
 /* there is a gap here to match userspace */
 #define NBD_FLAG_SEND_TRIM     (1 << 5) /* send trim/discard */
 #define NBD_FLAG_CAN_MULTI_CONN        (1 << 8)        /* Server supports multiple connections per export. */
 
+/* values for cmd flags in the upper 16 bits of request type */
+#define NBD_CMD_FLAG_FUA       (1 << 16) /* FUA (forced unit access) op */
+
 /* These are client behavior specific flags. */
 #define NBD_CFLAG_DESTROY_ON_DISCONNECT        (1 << 0) /* delete the nbd device on
                                                    disconnect. */
index 61b7d36..156ee4c 100644 (file)
@@ -343,6 +343,7 @@ enum ovs_key_attr {
 #define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
 
 enum ovs_tunnel_key_attr {
+       /* OVS_TUNNEL_KEY_ATTR_NONE, standard nl API requires this attribute! */
        OVS_TUNNEL_KEY_ATTR_ID,                 /* be64 Tunnel ID */
        OVS_TUNNEL_KEY_ATTR_IPV4_SRC,           /* be32 src IP address. */
        OVS_TUNNEL_KEY_ATTR_IPV4_DST,           /* be32 dst IP address. */
index 3738e5f..8ef82f4 100644 (file)
 
 typedef struct {
        __u8 b[16];
-} uuid_le;
+} guid_t;
 
-typedef struct {
-       __u8 b[16];
-} uuid_be;
-
-#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)               \
-((uuid_le)                                                             \
+#define GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)                     \
+((guid_t)                                                              \
 {{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \
    (b) & 0xff, ((b) >> 8) & 0xff,                                      \
    (c) & 0xff, ((c) >> 8) & 0xff,                                      \
    (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
 
-#define UUID_BE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)               \
-((uuid_be)                                                             \
-{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \
-   ((b) >> 8) & 0xff, (b) & 0xff,                                      \
-   ((c) >> 8) & 0xff, (c) & 0xff,                                      \
-   (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }})
-
+/* backwards compatibility, don't use in new code */
+typedef guid_t uuid_le;
+#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)               \
+       GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)
 #define NULL_UUID_LE                                                   \
        UUID_LE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,     \
-               0x00, 0x00, 0x00, 0x00)
-
-#define NULL_UUID_BE                                                   \
-       UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,     \
-               0x00, 0x00, 0x00, 0x00)
-
+            0x00, 0x00, 0x00, 0x00)
 
 #endif /* _UAPI_LINUX_UUID_H_ */
index 339c8a1..a8a7256 100644 (file)
@@ -989,6 +989,11 @@ static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn)
        if (err)
                return err;
 
+       if (is_pointer_value(env, insn->src_reg)) {
+               verbose("R%d leaks addr into mem\n", insn->src_reg);
+               return -EACCES;
+       }
+
        /* check whether atomic_add can read the memory */
        err = check_mem_access(env, insn->dst_reg, insn->off,
                               BPF_SIZE(insn->code), BPF_READ, -1);
index 2831480..ee97196 100644 (file)
@@ -580,7 +580,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
        int ret = -ENOMEM, max_order = 0;
 
        if (!has_aux(event))
-               return -ENOTSUPP;
+               return -EOPNOTSUPP;
 
        if (event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) {
                /*
index 070be98..425170d 100644 (file)
@@ -1312,8 +1312,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
                        ret = __irq_set_trigger(desc,
                                                new->flags & IRQF_TRIGGER_MASK);
 
-                       if (ret)
+                       if (ret) {
+                               irq_release_resources(desc);
                                goto out_mask;
+                       }
                }
 
                desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
index ae1a3ba..154ffb4 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/compiler.h>
 #include <linux/hugetlb.h>
+#include <linux/frame.h>
 
 #include <asm/page.h>
 #include <asm/sections.h>
@@ -874,7 +875,7 @@ int kexec_load_disabled;
  * only when panic_cpu holds the current CPU number; this is the only CPU
  * which processes crash_kexec routines.
  */
-void __crash_kexec(struct pt_regs *regs)
+void __noclone __crash_kexec(struct pt_regs *regs)
 {
        /* Take the kexec_mutex here to prevent sys_kexec_load
         * running on one cpu from replacing the crash kernel
@@ -896,6 +897,7 @@ void __crash_kexec(struct pt_regs *regs)
                mutex_unlock(&kexec_mutex);
        }
 }
+STACK_FRAME_NON_STANDARD(__crash_kexec);
 
 void crash_kexec(struct pt_regs *regs)
 {
index f826903..52c4e90 100644 (file)
@@ -59,7 +59,11 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 
        ops = container_of(fops, struct klp_ops, fops);
 
-       rcu_read_lock();
+       /*
+        * A variant of synchronize_sched() is used to allow patching functions
+        * where RCU is not watching, see klp_synchronize_transition().
+        */
+       preempt_disable_notrace();
 
        func = list_first_or_null_rcu(&ops->func_stack, struct klp_func,
                                      stack_node);
@@ -115,7 +119,7 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 
        klp_arch_set_pc(regs, (unsigned long)func->new_func);
 unlock:
-       rcu_read_unlock();
+       preempt_enable_notrace();
 }
 
 /*
index adc0cc6..b004a1f 100644 (file)
@@ -49,6 +49,28 @@ static void klp_transition_work_fn(struct work_struct *work)
 static DECLARE_DELAYED_WORK(klp_transition_work, klp_transition_work_fn);
 
 /*
+ * This function is just a stub to implement a hard force
+ * of synchronize_sched(). This requires synchronizing
+ * tasks even in userspace and idle.
+ */
+static void klp_sync(struct work_struct *work)
+{
+}
+
+/*
+ * We allow to patch also functions where RCU is not watching,
+ * e.g. before user_exit(). We can not rely on the RCU infrastructure
+ * to do the synchronization. Instead hard force the sched synchronization.
+ *
+ * This approach allows to use RCU functions for manipulating func_stack
+ * safely.
+ */
+static void klp_synchronize_transition(void)
+{
+       schedule_on_each_cpu(klp_sync);
+}
+
+/*
  * The transition to the target patch state is complete.  Clean up the data
  * structures.
  */
@@ -73,7 +95,7 @@ static void klp_complete_transition(void)
                 * func->transition gets cleared, the handler may choose a
                 * removed function.
                 */
-               synchronize_rcu();
+               klp_synchronize_transition();
        }
 
        if (klp_transition_patch->immediate)
@@ -92,7 +114,7 @@ static void klp_complete_transition(void)
 
        /* Prevent klp_ftrace_handler() from seeing KLP_UNDEFINED state */
        if (klp_target_state == KLP_PATCHED)
-               synchronize_rcu();
+               klp_synchronize_transition();
 
        read_lock(&tasklist_lock);
        for_each_process_thread(g, task) {
@@ -136,7 +158,11 @@ void klp_cancel_transition(void)
  */
 void klp_update_patch_state(struct task_struct *task)
 {
-       rcu_read_lock();
+       /*
+        * A variant of synchronize_sched() is used to allow patching functions
+        * where RCU is not watching, see klp_synchronize_transition().
+        */
+       preempt_disable_notrace();
 
        /*
         * This test_and_clear_tsk_thread_flag() call also serves as a read
@@ -153,7 +179,7 @@ void klp_update_patch_state(struct task_struct *task)
        if (test_and_clear_tsk_thread_flag(task, TIF_PATCH_PENDING))
                task->patch_state = READ_ONCE(klp_target_state);
 
-       rcu_read_unlock();
+       preempt_enable_notrace();
 }
 
 /*
@@ -539,7 +565,7 @@ void klp_reverse_transition(void)
                clear_tsk_thread_flag(idle_task(cpu), TIF_PATCH_PENDING);
 
        /* Let any remaining calls to klp_update_patch_state() complete */
-       synchronize_rcu();
+       klp_synchronize_transition();
 
        klp_start_transition();
 }
index f80fd33..57d2257 100644 (file)
@@ -225,14 +225,14 @@ static struct block_device *hib_resume_bdev;
 struct hib_bio_batch {
        atomic_t                count;
        wait_queue_head_t       wait;
-       int                     error;
+       blk_status_t            error;
 };
 
 static void hib_init_batch(struct hib_bio_batch *hb)
 {
        atomic_set(&hb->count, 0);
        init_waitqueue_head(&hb->wait);
-       hb->error = 0;
+       hb->error = BLK_STS_OK;
 }
 
 static void hib_end_io(struct bio *bio)
@@ -240,7 +240,7 @@ static void hib_end_io(struct bio *bio)
        struct hib_bio_batch *hb = bio->bi_private;
        struct page *page = bio->bi_io_vec[0].bv_page;
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
                                imajor(bio->bi_bdev->bd_inode),
                                iminor(bio->bi_bdev->bd_inode),
@@ -253,8 +253,8 @@ static void hib_end_io(struct bio *bio)
                flush_icache_range((unsigned long)page_address(page),
                                   (unsigned long)page_address(page) + PAGE_SIZE);
 
-       if (bio->bi_error && !hb->error)
-               hb->error = bio->bi_error;
+       if (bio->bi_status && !hb->error)
+               hb->error = bio->bi_status;
        if (atomic_dec_and_test(&hb->count))
                wake_up(&hb->wait);
 
@@ -293,10 +293,10 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr,
        return error;
 }
 
-static int hib_wait_io(struct hib_bio_batch *hb)
+static blk_status_t hib_wait_io(struct hib_bio_batch *hb)
 {
        wait_event(hb->wait, atomic_read(&hb->count) == 0);
-       return hb->error;
+       return blk_status_to_errno(hb->error);
 }
 
 /*
index e91138f..5b60f3a 100644 (file)
@@ -5605,7 +5605,7 @@ void idle_task_exit(void)
        BUG_ON(cpu_online(smp_processor_id()));
 
        if (mm != &init_mm) {
-               switch_mm_irqs_off(mm, &init_mm, current);
+               switch_mm(mm, &init_mm, current);
                finish_arch_post_lock_switch();
        }
        mmdrop(mm);
index 622eed1..076a2e3 100644 (file)
@@ -101,9 +101,6 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
        if (sg_policy->next_freq == next_freq)
                return;
 
-       if (sg_policy->next_freq > next_freq)
-               next_freq = (sg_policy->next_freq + next_freq) >> 1;
-
        sg_policy->next_freq = next_freq;
        sg_policy->last_freq_update_time = time;
 
index d711093..c77e4b1 100644 (file)
@@ -3563,7 +3563,7 @@ static inline void check_schedstat_required(void)
                        trace_sched_stat_runtime_enabled())  {
                printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
                             "stat_blocked and stat_runtime require the "
-                            "kernel parameter schedstats=enabled or "
+                            "kernel parameter schedstats=enable or "
                             "kernel.sched_schedstats=1\n");
        }
 #endif
index ca92bcf..45b4c1f 100644 (file)
@@ -510,7 +510,8 @@ int unhandled_signal(struct task_struct *tsk, int sig)
        return !tsk->ptrace;
 }
 
-static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
+static void collect_signal(int sig, struct sigpending *list, siginfo_t *info,
+                          bool *resched_timer)
 {
        struct sigqueue *q, *first = NULL;
 
@@ -532,6 +533,12 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
 still_pending:
                list_del_init(&first->list);
                copy_siginfo(info, &first->info);
+
+               *resched_timer =
+                       (first->flags & SIGQUEUE_PREALLOC) &&
+                       (info->si_code == SI_TIMER) &&
+                       (info->si_sys_private);
+
                __sigqueue_free(first);
        } else {
                /*
@@ -548,12 +555,12 @@ still_pending:
 }
 
 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
-                       siginfo_t *info)
+                       siginfo_t *info, bool *resched_timer)
 {
        int sig = next_signal(pending, mask);
 
        if (sig)
-               collect_signal(sig, pending, info);
+               collect_signal(sig, pending, info, resched_timer);
        return sig;
 }
 
@@ -565,15 +572,16 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
  */
 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
 {
+       bool resched_timer = false;
        int signr;
 
        /* We only dequeue private signals from ourselves, we don't let
         * signalfd steal them
         */
-       signr = __dequeue_signal(&tsk->pending, mask, info);
+       signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer);
        if (!signr) {
                signr = __dequeue_signal(&tsk->signal->shared_pending,
-                                        mask, info);
+                                        mask, info, &resched_timer);
 #ifdef CONFIG_POSIX_TIMERS
                /*
                 * itimer signal ?
@@ -621,7 +629,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
                current->jobctl |= JOBCTL_STOP_DEQUEUED;
        }
 #ifdef CONFIG_POSIX_TIMERS
-       if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
+       if (resched_timer) {
                /*
                 * Release the siglock to ensure proper locking order
                 * of timer locks outside of siglocks.  Note, we leave
index ece4b17..939a158 100644 (file)
@@ -1119,7 +1119,7 @@ static ssize_t bin_uuid(struct file *file,
        /* Only supports reads */
        if (oldval && oldlen) {
                char buf[UUID_STRING_LEN + 1];
-               uuid_be uuid;
+               uuid_t uuid;
 
                result = kernel_read(file, 0, buf, sizeof(buf) - 1);
                if (result < 0)
@@ -1128,7 +1128,7 @@ static ssize_t bin_uuid(struct file *file,
                buf[result] = '\0';
 
                result = -EIO;
-               if (uuid_be_to_bin(buf, &uuid))
+               if (uuid_parse(buf, &uuid))
                        goto out;
 
                if (oldlen > 16)
index 5cb5b00..ee2f420 100644 (file)
@@ -387,7 +387,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start)
 {
        struct alarm_base *base = &alarm_bases[alarm->type];
 
-       start = ktime_add(start, base->gettime());
+       start = ktime_add_safe(start, base->gettime());
        alarm_start(alarm, start);
 }
 EXPORT_SYMBOL_GPL(alarm_start_relative);
@@ -475,7 +475,7 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
                overrun++;
        }
 
-       alarm->node.expires = ktime_add(alarm->node.expires, interval);
+       alarm->node.expires = ktime_add_safe(alarm->node.expires, interval);
        return overrun;
 }
 EXPORT_SYMBOL_GPL(alarm_forward);
@@ -660,13 +660,21 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
 
        /* start the timer */
        timr->it.alarm.interval = timespec64_to_ktime(new_setting->it_interval);
+
+       /*
+        * Rate limit to the tick as a hot fix to prevent DOS. Will be
+        * mopped up later.
+        */
+       if (timr->it.alarm.interval < TICK_NSEC)
+               timr->it.alarm.interval = TICK_NSEC;
+
        exp = timespec64_to_ktime(new_setting->it_value);
        /* Convert (if necessary) to absolute time */
        if (flags != TIMER_ABSTIME) {
                ktime_t now;
 
                now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime();
-               exp = ktime_add(now, exp);
+               exp = ktime_add_safe(now, exp);
        }
 
        alarm_start(&timr->it.alarm.alarmtimer, exp);
index 987e496..b398c2e 100644 (file)
@@ -37,9 +37,11 @@ static int tick_broadcast_forced;
 static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
 
 #ifdef CONFIG_TICK_ONESHOT
+static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 static void tick_broadcast_clear_oneshot(int cpu);
 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 #else
+static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_clear_oneshot(int cpu) { }
 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
 #endif
@@ -867,7 +869,7 @@ static void tick_broadcast_init_next_event(struct cpumask *mask,
 /**
  * tick_broadcast_setup_oneshot - setup the broadcast device
  */
-void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
+static void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 {
        int cpu = smp_processor_id();
 
index f738251..be0ac01 100644 (file)
@@ -126,7 +126,6 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 
 /* Functions related to oneshot broadcasting */
 #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
-extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 extern void tick_broadcast_switch_to_oneshot(void);
 extern void tick_shutdown_broadcast_oneshot(unsigned int cpu);
 extern int tick_broadcast_oneshot_active(void);
@@ -134,7 +133,6 @@ extern void tick_check_oneshot_broadcast_this_cpu(void);
 bool tick_broadcast_oneshot_available(void);
 extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
 #else /* !(BROADCAST && ONESHOT): */
-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
 static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
index 9652bc5..b602c48 100644 (file)
@@ -118,6 +118,26 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
        tk->offs_boot = ktime_add(tk->offs_boot, delta);
 }
 
+/*
+ * tk_clock_read - atomic clocksource read() helper
+ *
+ * This helper is necessary to use in the read paths because, while the
+ * seqlock ensures we don't return a bad value while structures are updated,
+ * it doesn't protect from potential crashes. There is the possibility that
+ * the tkr's clocksource may change between the read reference, and the
+ * clock reference passed to the read function.  This can cause crashes if
+ * the wrong clocksource is passed to the wrong read function.
+ * This isn't necessary to use when holding the timekeeper_lock or doing
+ * a read of the fast-timekeeper tkrs (which is protected by its own locking
+ * and update logic).
+ */
+static inline u64 tk_clock_read(struct tk_read_base *tkr)
+{
+       struct clocksource *clock = READ_ONCE(tkr->clock);
+
+       return clock->read(clock);
+}
+
 #ifdef CONFIG_DEBUG_TIMEKEEPING
 #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
 
@@ -175,7 +195,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
         */
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-               now = tkr->read(tkr->clock);
+               now = tk_clock_read(tkr);
                last = tkr->cycle_last;
                mask = tkr->mask;
                max = tkr->clock->max_cycles;
@@ -209,7 +229,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
        u64 cycle_now, delta;
 
        /* read clocksource */
-       cycle_now = tkr->read(tkr->clock);
+       cycle_now = tk_clock_read(tkr);
 
        /* calculate the delta since the last update_wall_time */
        delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
@@ -238,12 +258,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
        ++tk->cs_was_changed_seq;
        old_clock = tk->tkr_mono.clock;
        tk->tkr_mono.clock = clock;
-       tk->tkr_mono.read = clock->read;
        tk->tkr_mono.mask = clock->mask;
-       tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+       tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
 
        tk->tkr_raw.clock = clock;
-       tk->tkr_raw.read = clock->read;
        tk->tkr_raw.mask = clock->mask;
        tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
 
@@ -262,7 +280,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
        /* Go back from cycles -> shifted ns */
        tk->xtime_interval = interval * clock->mult;
        tk->xtime_remainder = ntpinterval - tk->xtime_interval;
-       tk->raw_interval = (interval * clock->mult) >> clock->shift;
+       tk->raw_interval = interval * clock->mult;
 
         /* if changing clocks, convert xtime_nsec shift units */
        if (old_clock) {
@@ -404,7 +422,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 
                now += timekeeping_delta_to_ns(tkr,
                                clocksource_delta(
-                                       tkr->read(tkr->clock),
+                                       tk_clock_read(tkr),
                                        tkr->cycle_last,
                                        tkr->mask));
        } while (read_seqcount_retry(&tkf->seq, seq));
@@ -461,6 +479,10 @@ static u64 dummy_clock_read(struct clocksource *cs)
        return cycles_at_suspend;
 }
 
+static struct clocksource dummy_clock = {
+       .read = dummy_clock_read,
+};
+
 /**
  * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
  * @tk: Timekeeper to snapshot.
@@ -477,13 +499,13 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
        struct tk_read_base *tkr = &tk->tkr_mono;
 
        memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-       cycles_at_suspend = tkr->read(tkr->clock);
-       tkr_dummy.read = dummy_clock_read;
+       cycles_at_suspend = tk_clock_read(tkr);
+       tkr_dummy.clock = &dummy_clock;
        update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
 
        tkr = &tk->tkr_raw;
        memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-       tkr_dummy.read = dummy_clock_read;
+       tkr_dummy.clock = &dummy_clock;
        update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
 }
 
@@ -649,11 +671,10 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
  */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
-       struct clocksource *clock = tk->tkr_mono.clock;
        u64 cycle_now, delta;
        u64 nsec;
 
-       cycle_now = tk->tkr_mono.read(clock);
+       cycle_now = tk_clock_read(&tk->tkr_mono);
        delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
        tk->tkr_mono.cycle_last = cycle_now;
        tk->tkr_raw.cycle_last  = cycle_now;
@@ -929,8 +950,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-
-               now = tk->tkr_mono.read(tk->tkr_mono.clock);
+               now = tk_clock_read(&tk->tkr_mono);
                systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
                systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
                base_real = ktime_add(tk->tkr_mono.base,
@@ -1108,7 +1128,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
                 * Check whether the system counter value provided by the
                 * device driver is on the current timekeeping interval.
                 */
-               now = tk->tkr_mono.read(tk->tkr_mono.clock);
+               now = tk_clock_read(&tk->tkr_mono);
                interval_start = tk->tkr_mono.cycle_last;
                if (!cycle_between(interval_start, cycles, now)) {
                        clock_was_set_seq = tk->clock_was_set_seq;
@@ -1629,7 +1649,7 @@ void timekeeping_resume(void)
         * The less preferred source will only be tried if there is no better
         * usable source. The rtc part is handled separately in rtc core code.
         */
-       cycle_now = tk->tkr_mono.read(clock);
+       cycle_now = tk_clock_read(&tk->tkr_mono);
        if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
                cycle_now > tk->tkr_mono.cycle_last) {
                u64 nsec, cyc_delta;
@@ -1976,7 +1996,7 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
                                    u32 shift, unsigned int *clock_set)
 {
        u64 interval = tk->cycle_interval << shift;
-       u64 raw_nsecs;
+       u64 snsec_per_sec;
 
        /* If the offset is smaller than a shifted interval, do nothing */
        if (offset < interval)
@@ -1991,14 +2011,15 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
        *clock_set |= accumulate_nsecs_to_secs(tk);
 
        /* Accumulate raw time */
-       raw_nsecs = (u64)tk->raw_interval << shift;
-       raw_nsecs += tk->raw_time.tv_nsec;
-       if (raw_nsecs >= NSEC_PER_SEC) {
-               u64 raw_secs = raw_nsecs;
-               raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
-               tk->raw_time.tv_sec += raw_secs;
+       tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
+       tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
+       snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
+       while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
+               tk->tkr_raw.xtime_nsec -= snsec_per_sec;
+               tk->raw_time.tv_sec++;
        }
-       tk->raw_time.tv_nsec = raw_nsecs;
+       tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift;
+       tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
 
        /* Accumulate error between NTP and clock interval */
        tk->ntp_error += tk->ntp_tick << shift;
@@ -2030,7 +2051,7 @@ void update_wall_time(void)
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
        offset = real_tk->cycle_interval;
 #else
-       offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+       offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
                                   tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 #endif
 
index 193c5f5..bc364f8 100644 (file)
@@ -867,7 +867,7 @@ static void blk_add_trace_split(void *ignore,
 
                __blk_add_trace(bt, bio->bi_iter.bi_sector,
                                bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
-                               BLK_TA_SPLIT, bio->bi_error, sizeof(rpdu),
+                               BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
                                &rpdu);
        }
 }
@@ -900,7 +900,7 @@ static void blk_add_trace_bio_remap(void *ignore,
        r.sector_from = cpu_to_be64(from);
 
        __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
-                       bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_error,
+                       bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
                        sizeof(r), &r);
 }
 
index 9e5841d..b308be3 100644 (file)
@@ -4337,9 +4337,6 @@ static int ftrace_process_regex(struct ftrace_iterator *iter,
 
        command = strsep(&next, ":");
 
-       if (WARN_ON_ONCE(!tr))
-               return -EINVAL;
-
        mutex_lock(&ftrace_cmd_mutex);
        list_for_each_entry(p, &ftrace_commands, list) {
                if (strcmp(p->name, command) == 0) {
index 1122f15..091e801 100644 (file)
@@ -6881,6 +6881,9 @@ ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
        char *number;
        int ret;
 
+       if (!tr)
+               return -ENODEV;
+
        /* hash funcs only work with set_ftrace_filter */
        if (!enable)
                return -EINVAL;
index a3bddbf..a0910c0 100644 (file)
@@ -654,6 +654,9 @@ ftrace_trace_onoff_callback(struct trace_array *tr, struct ftrace_hash *hash,
 {
        struct ftrace_probe_ops *ops;
 
+       if (!tr)
+               return -ENODEV;
+
        /* we register both traceon and traceoff to this callback */
        if (strcmp(cmd, "traceon") == 0)
                ops = param ? &traceon_count_probe_ops : &traceon_probe_ops;
@@ -670,6 +673,9 @@ ftrace_stacktrace_callback(struct trace_array *tr, struct ftrace_hash *hash,
 {
        struct ftrace_probe_ops *ops;
 
+       if (!tr)
+               return -ENODEV;
+
        ops = param ? &stacktrace_count_probe_ops : &stacktrace_probe_ops;
 
        return ftrace_trace_probe_callback(tr, ops, hash, glob, cmd,
@@ -682,6 +688,9 @@ ftrace_dump_callback(struct trace_array *tr, struct ftrace_hash *hash,
 {
        struct ftrace_probe_ops *ops;
 
+       if (!tr)
+               return -ENODEV;
+
        ops = &dump_probe_ops;
 
        /* Only dump once. */
@@ -695,6 +704,9 @@ ftrace_cpudump_callback(struct trace_array *tr, struct ftrace_hash *hash,
 {
        struct ftrace_probe_ops *ops;
 
+       if (!tr)
+               return -ENODEV;
+
        ops = &cpudump_probe_ops;
 
        /* Only dump once. */
index c129fca..b53c8d3 100644 (file)
@@ -707,20 +707,16 @@ static int create_trace_kprobe(int argc, char **argv)
                pr_info("Probe point is not specified.\n");
                return -EINVAL;
        }
-       if (isdigit(argv[1][0])) {
-               /* an address specified */
-               ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
-               if (ret) {
-                       pr_info("Failed to parse address.\n");
-                       return ret;
-               }
-       } else {
+
+       /* try to parse an address. if that fails, try to read the
+        * input as a symbol. */
+       if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
                /* a symbol specified */
                symbol = argv[1];
                /* TODO: support .init module functions */
                ret = traceprobe_split_symbol_offset(symbol, &offset);
                if (ret) {
-                       pr_info("Failed to parse symbol.\n");
+                       pr_info("Failed to parse either an address or a symbol.\n");
                        return ret;
                }
                if (offset && is_return &&
index 76aa04d..b4a751e 100644 (file)
@@ -409,7 +409,9 @@ static const struct file_operations stack_trace_fops = {
 static int
 stack_trace_filter_open(struct inode *inode, struct file *file)
 {
-       return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
+       struct ftrace_ops *ops = inode->i_private;
+
+       return ftrace_regex_open(ops, FTRACE_ITER_FILTER,
                                 inode, file);
 }
 
@@ -476,7 +478,7 @@ static __init int stack_trace_init(void)
                        NULL, &stack_trace_fops);
 
        trace_create_file("stack_trace_filter", 0444, d_tracer,
-                       NULL, &stack_trace_filter_fops);
+                         &trace_ops, &stack_trace_filter_fops);
 
        if (stack_trace_filter_buf[0])
                ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
index 3c6432d..4c0888c 100644 (file)
  *     the values[M, M+1, ..., N] into the ints array in get_options.
  */
 
-static int get_range(char **str, int *pint)
+static int get_range(char **str, int *pint, int n)
 {
        int x, inc_counter, upper_range;
 
        (*str)++;
        upper_range = simple_strtol((*str), NULL, 0);
        inc_counter = upper_range - *pint;
-       for (x = *pint; x < upper_range; x++)
+       for (x = *pint; n && x < upper_range; x++, n--)
                *pint++ = x;
        return inc_counter;
 }
@@ -97,7 +97,7 @@ char *get_options(const char *str, int nints, int *ints)
                        break;
                if (res == 3) {
                        int range_nums;
-                       range_nums = get_range((char **)&str, ints + i);
+                       range_nums = get_range((char **)&str, ints + i, nints - i);
                        if (range_nums < 0)
                                break;
                        /*
index 74a54b7..9f79547 100644 (file)
@@ -43,7 +43,7 @@ static struct crypto_shash *tfm;
 u32 crc32c(u32 crc, const void *address, unsigned int length)
 {
        SHASH_DESC_ON_STACK(shash, tfm);
-       u32 *ctx = (u32 *)shash_desc_ctx(shash);
+       u32 ret, *ctx = (u32 *)shash_desc_ctx(shash);
        int err;
 
        shash->tfm = tfm;
@@ -53,7 +53,9 @@ u32 crc32c(u32 crc, const void *address, unsigned int length)
        err = crypto_shash_update(shash, address, length);
        BUG_ON(err);
 
-       return *ctx;
+       ret = *ctx;
+       barrier_data(ctx);
+       return ret;
 }
 
 EXPORT_SYMBOL(crc32c);
index c6cf822..be7b4dd 100644 (file)
@@ -751,3 +751,38 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
        return sg_copy_buffer(sgl, nents, buf, buflen, skip, true);
 }
 EXPORT_SYMBOL(sg_pcopy_to_buffer);
+
+/**
+ * sg_zero_buffer - Zero-out a part of a SG list
+ * @sgl:                The SG list
+ * @nents:              Number of SG entries
+ * @buflen:             The number of bytes to zero out
+ * @skip:               Number of bytes to skip before zeroing
+ *
+ * Returns the number of bytes zeroed.
+ **/
+size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
+                      size_t buflen, off_t skip)
+{
+       unsigned int offset = 0;
+       struct sg_mapping_iter miter;
+       unsigned int sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG;
+
+       sg_miter_start(&miter, sgl, nents, sg_flags);
+
+       if (!sg_miter_skip(&miter, skip))
+               return false;
+
+       while (offset < buflen && sg_miter_next(&miter)) {
+               unsigned int len;
+
+               len = min(miter.length, buflen - offset);
+               memset(miter.addr, 0, len);
+
+               offset += len;
+       }
+
+       sg_miter_stop(&miter);
+       return offset;
+}
+EXPORT_SYMBOL(sg_zero_buffer);
index 547d312..478c049 100644 (file)
 
 struct test_uuid_data {
        const char *uuid;
-       uuid_le le;
-       uuid_be be;
+       guid_t le;
+       uuid_t be;
 };
 
 static const struct test_uuid_data test_uuid_test_data[] = {
        {
                .uuid = "c33f4995-3701-450e-9fbf-206a2e98e576",
-               .le = UUID_LE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
-               .be = UUID_BE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
+               .le = GUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
+               .be = UUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
        },
        {
                .uuid = "64b4371c-77c1-48f9-8221-29f054fc023b",
-               .le = UUID_LE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
-               .be = UUID_BE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
+               .le = GUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
+               .be = UUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
        },
        {
                .uuid = "0cb4ddff-a545-4401-9d06-688af53e7f84",
-               .le = UUID_LE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
-               .be = UUID_BE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
+               .le = GUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
+               .be = UUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
        },
 };
 
@@ -61,28 +61,28 @@ static void __init test_uuid_failed(const char *prefix, bool wrong, bool be,
 
 static void __init test_uuid_test(const struct test_uuid_data *data)
 {
-       uuid_le le;
-       uuid_be be;
+       guid_t le;
+       uuid_t be;
        char buf[48];
 
        /* LE */
        total_tests++;
-       if (uuid_le_to_bin(data->uuid, &le))
+       if (guid_parse(data->uuid, &le))
                test_uuid_failed("conversion", false, false, data->uuid, NULL);
 
        total_tests++;
-       if (uuid_le_cmp(data->le, le)) {
+       if (!guid_equal(&data->le, &le)) {
                sprintf(buf, "%pUl", &le);
                test_uuid_failed("cmp", false, false, data->uuid, buf);
        }
 
        /* BE */
        total_tests++;
-       if (uuid_be_to_bin(data->uuid, &be))
+       if (uuid_parse(data->uuid, &be))
                test_uuid_failed("conversion", false, true, data->uuid, NULL);
 
        total_tests++;
-       if (uuid_be_cmp(data->be, be)) {
+       if (uuid_equal(&data->be, &be)) {
                sprintf(buf, "%pUb", &be);
                test_uuid_failed("cmp", false, true, data->uuid, buf);
        }
@@ -90,17 +90,17 @@ static void __init test_uuid_test(const struct test_uuid_data *data)
 
 static void __init test_uuid_wrong(const char *data)
 {
-       uuid_le le;
-       uuid_be be;
+       guid_t le;
+       uuid_t be;
 
        /* LE */
        total_tests++;
-       if (!uuid_le_to_bin(data, &le))
+       if (!guid_parse(data, &le))
                test_uuid_failed("negative", true, false, data, NULL);
 
        /* BE */
        total_tests++;
-       if (!uuid_be_to_bin(data, &be))
+       if (!uuid_parse(data, &be))
                test_uuid_failed("negative", true, true, data, NULL);
 }
 
index 37687af..680b9fb 100644 (file)
 #include <linux/uuid.h>
 #include <linux/random.h>
 
-const u8 uuid_le_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
-EXPORT_SYMBOL(uuid_le_index);
-const u8 uuid_be_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
-EXPORT_SYMBOL(uuid_be_index);
+const guid_t guid_null;
+EXPORT_SYMBOL(guid_null);
+const uuid_t uuid_null;
+EXPORT_SYMBOL(uuid_null);
+
+const u8 guid_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
+const u8 uuid_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
 
 /***************************************************************
  * Random UUID interface
@@ -53,21 +56,21 @@ static void __uuid_gen_common(__u8 b[16])
        b[8] = (b[8] & 0x3F) | 0x80;
 }
 
-void uuid_le_gen(uuid_le *lu)
+void guid_gen(guid_t *lu)
 {
        __uuid_gen_common(lu->b);
        /* version 4 : random generation */
        lu->b[7] = (lu->b[7] & 0x0F) | 0x40;
 }
-EXPORT_SYMBOL_GPL(uuid_le_gen);
+EXPORT_SYMBOL_GPL(guid_gen);
 
-void uuid_be_gen(uuid_be *bu)
+void uuid_gen(uuid_t *bu)
 {
        __uuid_gen_common(bu->b);
        /* version 4 : random generation */
        bu->b[6] = (bu->b[6] & 0x0F) | 0x40;
 }
-EXPORT_SYMBOL_GPL(uuid_be_gen);
+EXPORT_SYMBOL_GPL(uuid_gen);
 
 /**
   * uuid_is_valid - checks if UUID string valid
@@ -97,7 +100,7 @@ bool uuid_is_valid(const char *uuid)
 }
 EXPORT_SYMBOL(uuid_is_valid);
 
-static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16])
+static int __uuid_parse(const char *uuid, __u8 b[16], const u8 ei[16])
 {
        static const u8 si[16] = {0,2,4,6,9,11,14,16,19,21,24,26,28,30,32,34};
        unsigned int i;
@@ -115,14 +118,14 @@ static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16])
        return 0;
 }
 
-int uuid_le_to_bin(const char *uuid, uuid_le *u)
+int guid_parse(const char *uuid, guid_t *u)
 {
-       return __uuid_to_bin(uuid, u->b, uuid_le_index);
+       return __uuid_parse(uuid, u->b, guid_index);
 }
-EXPORT_SYMBOL(uuid_le_to_bin);
+EXPORT_SYMBOL(guid_parse);
 
-int uuid_be_to_bin(const char *uuid, uuid_be *u)
+int uuid_parse(const char *uuid, uuid_t *u)
 {
-       return __uuid_to_bin(uuid, u->b, uuid_be_index);
+       return __uuid_parse(uuid, u->b, uuid_index);
 }
-EXPORT_SYMBOL(uuid_be_to_bin);
+EXPORT_SYMBOL(uuid_parse);
index 2d41de3..9f37d62 100644 (file)
@@ -1308,14 +1308,14 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
        char uuid[UUID_STRING_LEN + 1];
        char *p = uuid;
        int i;
-       const u8 *index = uuid_be_index;
+       const u8 *index = uuid_index;
        bool uc = false;
 
        switch (*(++fmt)) {
        case 'L':
                uc = true;              /* fall-through */
        case 'l':
-               index = uuid_le_index;
+               index = guid_index;
                break;
        case 'B':
                uc = true;
index ba5d8f3..f7b9fdc 100644 (file)
@@ -130,7 +130,7 @@ void __cleancache_init_shared_fs(struct super_block *sb)
        int pool_id = CLEANCACHE_NO_BACKEND_SHARED;
 
        if (cleancache_ops) {
-               pool_id = cleancache_ops->init_shared_fs(sb->s_uuid, PAGE_SIZE);
+               pool_id = cleancache_ops->init_shared_fs(&sb->s_uuid, PAGE_SIZE);
                if (pool_id < 0)
                        pool_id = CLEANCACHE_NO_POOL;
        }
index 6f1be57..742034e 100644 (file)
@@ -376,6 +376,38 @@ int filemap_flush(struct address_space *mapping)
 }
 EXPORT_SYMBOL(filemap_flush);
 
+/**
+ * filemap_range_has_page - check if a page exists in range.
+ * @mapping:           address space within which to check
+ * @start_byte:        offset in bytes where the range starts
+ * @end_byte:          offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback.
+ */
+bool filemap_range_has_page(struct address_space *mapping,
+                          loff_t start_byte, loff_t end_byte)
+{
+       pgoff_t index = start_byte >> PAGE_SHIFT;
+       pgoff_t end = end_byte >> PAGE_SHIFT;
+       struct pagevec pvec;
+       bool ret;
+
+       if (end_byte < start_byte)
+               return false;
+
+       if (mapping->nrpages == 0)
+               return false;
+
+       pagevec_init(&pvec, 0);
+       if (!pagevec_lookup(&pvec, mapping, index, 1))
+               return false;
+       ret = (pvec.pages[0]->index <= end);
+       pagevec_release(&pvec);
+       return ret;
+}
+EXPORT_SYMBOL(filemap_range_has_page);
+
 static int __filemap_fdatawait_range(struct address_space *mapping,
                                     loff_t start_byte, loff_t end_byte)
 {
@@ -2038,10 +2070,17 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
                loff_t size;
 
                size = i_size_read(inode);
-               retval = filemap_write_and_wait_range(mapping, iocb->ki_pos,
-                                       iocb->ki_pos + count - 1);
-               if (retval < 0)
-                       goto out;
+               if (iocb->ki_flags & IOCB_NOWAIT) {
+                       if (filemap_range_has_page(mapping, iocb->ki_pos,
+                                                  iocb->ki_pos + count - 1))
+                               return -EAGAIN;
+               } else {
+                       retval = filemap_write_and_wait_range(mapping,
+                                               iocb->ki_pos,
+                                               iocb->ki_pos + count - 1);
+                       if (retval < 0)
+                               goto out;
+               }
 
                file_accessed(file);
 
@@ -2642,6 +2681,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 
        pos = iocb->ki_pos;
 
+       if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+               return -EINVAL;
+
        if (limit != RLIM_INFINITY) {
                if (iocb->ki_pos >= limit) {
                        send_sig(SIGXFSZ, current, 0);
@@ -2710,9 +2752,17 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
        write_len = iov_iter_count(from);
        end = (pos + write_len - 1) >> PAGE_SHIFT;
 
-       written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
-       if (written)
-               goto out;
+       if (iocb->ki_flags & IOCB_NOWAIT) {
+               /* If there are pages to writeback, return */
+               if (filemap_range_has_page(inode->i_mapping, pos,
+                                          pos + iov_iter_count(from)))
+                       return -EAGAIN;
+       } else {
+               written = filemap_write_and_wait_range(mapping, pos,
+                                                       pos + write_len - 1);
+               if (written)
+                       goto out;
+       }
 
        /*
         * After a write we want buffered reads to be sure to go to disk to get
index b3c7214..576c4df 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -387,11 +387,6 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
        /* mlock all present pages, but do not fault in new pages */
        if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
                return -ENOENT;
-       /* For mm_populate(), just skip the stack guard page. */
-       if ((*flags & FOLL_POPULATE) &&
-                       (stack_guard_page_start(vma, address) ||
-                        stack_guard_page_end(vma, address + PAGE_SIZE)))
-               return -ENOENT;
        if (*flags & FOLL_WRITE)
                fault_flags |= FAULT_FLAG_WRITE;
        if (*flags & FOLL_REMOTE)
index a84909c..88c6167 100644 (file)
@@ -1426,8 +1426,11 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
         */
        if (unlikely(pmd_trans_migrating(*vmf->pmd))) {
                page = pmd_page(*vmf->pmd);
+               if (!get_page_unless_zero(page))
+                       goto out_unlock;
                spin_unlock(vmf->ptl);
                wait_on_page_locked(page);
+               put_page(page);
                goto out;
        }
 
@@ -1459,9 +1462,12 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 
        /* Migration could have started since the pmd_trans_migrating check */
        if (!page_locked) {
+               page_nid = -1;
+               if (!get_page_unless_zero(page))
+                       goto out_unlock;
                spin_unlock(vmf->ptl);
                wait_on_page_locked(page);
-               page_nid = -1;
+               put_page(page);
                goto out;
        }
 
index 945fd1c..df4ebdb 100644 (file)
@@ -652,7 +652,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
                        spin_unlock(ptl);
                        free_page_and_swap_cache(src_page);
                }
-               cond_resched();
        }
 }
 
index 342fac9..ecc183f 100644 (file)
@@ -1184,7 +1184,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
         * page_remove_rmap() in try_to_unmap_one(). So to determine page status
         * correctly, we save a copy of the page flags at this time.
         */
-       page_flags = p->flags;
+       if (PageHuge(p))
+               page_flags = hpage->flags;
+       else
+               page_flags = p->flags;
 
        /*
         * unpoison always clear PG_hwpoison inside page lock
index 2e65df1..bb11c47 100644 (file)
@@ -2855,40 +2855,6 @@ out_release:
 }
 
 /*
- * This is like a special single-page "expand_{down|up}wards()",
- * except we must first make sure that 'address{-|+}PAGE_SIZE'
- * doesn't hit another vma.
- */
-static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
-{
-       address &= PAGE_MASK;
-       if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
-               struct vm_area_struct *prev = vma->vm_prev;
-
-               /*
-                * Is there a mapping abutting this one below?
-                *
-                * That's only ok if it's the same stack mapping
-                * that has gotten split..
-                */
-               if (prev && prev->vm_end == address)
-                       return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
-
-               return expand_downwards(vma, address - PAGE_SIZE);
-       }
-       if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
-               struct vm_area_struct *next = vma->vm_next;
-
-               /* As VM_GROWSDOWN but s/below/above/ */
-               if (next && next->vm_start == address + PAGE_SIZE)
-                       return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
-
-               return expand_upwards(vma, address + PAGE_SIZE);
-       }
-       return 0;
-}
-
-/*
  * We enter with non-exclusive mmap_sem (to exclude vma changes,
  * but allow concurrent faults), and pte mapped but not yet locked.
  * We return with mmap_sem still held, but pte unmapped and unlocked.
@@ -2904,10 +2870,6 @@ static int do_anonymous_page(struct vm_fault *vmf)
        if (vma->vm_flags & VM_SHARED)
                return VM_FAULT_SIGBUS;
 
-       /* Check if we need to add a guard page to the stack */
-       if (check_stack_guard_page(vma, vmf->address) < 0)
-               return VM_FAULT_SIGSEGV;
-
        /*
         * Use pte_alloc() instead of pte_alloc_map().  We can't run
         * pte_offset_map() on pmds where a huge pmd might be created
index f82741e..a5e3dcd 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -183,6 +183,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
        unsigned long retval;
        unsigned long newbrk, oldbrk;
        struct mm_struct *mm = current->mm;
+       struct vm_area_struct *next;
        unsigned long min_brk;
        bool populate;
        LIST_HEAD(uf);
@@ -229,7 +230,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
        }
 
        /* Check against existing mmap mappings. */
-       if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
+       next = find_vma(mm, oldbrk);
+       if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
                goto out;
 
        /* Ok, looks good - let it rip. */
@@ -253,10 +255,22 @@ out:
 
 static long vma_compute_subtree_gap(struct vm_area_struct *vma)
 {
-       unsigned long max, subtree_gap;
-       max = vma->vm_start;
-       if (vma->vm_prev)
-               max -= vma->vm_prev->vm_end;
+       unsigned long max, prev_end, subtree_gap;
+
+       /*
+        * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
+        * allow two stack_guard_gaps between them here, and when choosing
+        * an unmapped area; whereas when expanding we only require one.
+        * That's a little inconsistent, but keeps the code here simpler.
+        */
+       max = vm_start_gap(vma);
+       if (vma->vm_prev) {
+               prev_end = vm_end_gap(vma->vm_prev);
+               if (max > prev_end)
+                       max -= prev_end;
+               else
+                       max = 0;
+       }
        if (vma->vm_rb.rb_left) {
                subtree_gap = rb_entry(vma->vm_rb.rb_left,
                                struct vm_area_struct, vm_rb)->rb_subtree_gap;
@@ -352,7 +366,7 @@ static void validate_mm(struct mm_struct *mm)
                        anon_vma_unlock_read(anon_vma);
                }
 
-               highest_address = vma->vm_end;
+               highest_address = vm_end_gap(vma);
                vma = vma->vm_next;
                i++;
        }
@@ -541,7 +555,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
        if (vma->vm_next)
                vma_gap_update(vma->vm_next);
        else
-               mm->highest_vm_end = vma->vm_end;
+               mm->highest_vm_end = vm_end_gap(vma);
 
        /*
         * vma->vm_prev wasn't known when we followed the rbtree to find the
@@ -856,7 +870,7 @@ again:
                        vma_gap_update(vma);
                if (end_changed) {
                        if (!next)
-                               mm->highest_vm_end = end;
+                               mm->highest_vm_end = vm_end_gap(vma);
                        else if (!adjust_next)
                                vma_gap_update(next);
                }
@@ -941,7 +955,7 @@ again:
                         * mm->highest_vm_end doesn't need any update
                         * in remove_next == 1 case.
                         */
-                       VM_WARN_ON(mm->highest_vm_end != end);
+                       VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
                }
        }
        if (insert && file)
@@ -1787,7 +1801,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 
        while (true) {
                /* Visit left subtree if it looks promising */
-               gap_end = vma->vm_start;
+               gap_end = vm_start_gap(vma);
                if (gap_end >= low_limit && vma->vm_rb.rb_left) {
                        struct vm_area_struct *left =
                                rb_entry(vma->vm_rb.rb_left,
@@ -1798,12 +1812,13 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
                        }
                }
 
-               gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+               gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
 check_current:
                /* Check if current node has a suitable gap */
                if (gap_start > high_limit)
                        return -ENOMEM;
-               if (gap_end >= low_limit && gap_end - gap_start >= length)
+               if (gap_end >= low_limit &&
+                   gap_end > gap_start && gap_end - gap_start >= length)
                        goto found;
 
                /* Visit right subtree if it looks promising */
@@ -1825,8 +1840,8 @@ check_current:
                        vma = rb_entry(rb_parent(prev),
                                       struct vm_area_struct, vm_rb);
                        if (prev == vma->vm_rb.rb_left) {
-                               gap_start = vma->vm_prev->vm_end;
-                               gap_end = vma->vm_start;
+                               gap_start = vm_end_gap(vma->vm_prev);
+                               gap_end = vm_start_gap(vma);
                                goto check_current;
                        }
                }
@@ -1890,7 +1905,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 
        while (true) {
                /* Visit right subtree if it looks promising */
-               gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+               gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
                if (gap_start <= high_limit && vma->vm_rb.rb_right) {
                        struct vm_area_struct *right =
                                rb_entry(vma->vm_rb.rb_right,
@@ -1903,10 +1918,11 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 
 check_current:
                /* Check if current node has a suitable gap */
-               gap_end = vma->vm_start;
+               gap_end = vm_start_gap(vma);
                if (gap_end < low_limit)
                        return -ENOMEM;
-               if (gap_start <= high_limit && gap_end - gap_start >= length)
+               if (gap_start <= high_limit &&
+                   gap_end > gap_start && gap_end - gap_start >= length)
                        goto found;
 
                /* Visit left subtree if it looks promising */
@@ -1929,7 +1945,7 @@ check_current:
                                       struct vm_area_struct, vm_rb);
                        if (prev == vma->vm_rb.rb_right) {
                                gap_start = vma->vm_prev ?
-                                       vma->vm_prev->vm_end : 0;
+                                       vm_end_gap(vma->vm_prev) : 0;
                                goto check_current;
                        }
                }
@@ -1967,7 +1983,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
                unsigned long len, unsigned long pgoff, unsigned long flags)
 {
        struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma;
+       struct vm_area_struct *vma, *prev;
        struct vm_unmapped_area_info info;
 
        if (len > TASK_SIZE - mmap_min_addr)
@@ -1978,9 +1994,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
        if (addr) {
                addr = PAGE_ALIGN(addr);
-               vma = find_vma(mm, addr);
+               vma = find_vma_prev(mm, addr, &prev);
                if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)) &&
+                   (!prev || addr >= vm_end_gap(prev)))
                        return addr;
        }
 
@@ -2003,7 +2020,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                          const unsigned long len, const unsigned long pgoff,
                          const unsigned long flags)
 {
-       struct vm_area_struct *vma;
+       struct vm_area_struct *vma, *prev;
        struct mm_struct *mm = current->mm;
        unsigned long addr = addr0;
        struct vm_unmapped_area_info info;
@@ -2018,9 +2035,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
        /* requesting a specific address */
        if (addr) {
                addr = PAGE_ALIGN(addr);
-               vma = find_vma(mm, addr);
+               vma = find_vma_prev(mm, addr, &prev);
                if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-                               (!vma || addr + len <= vma->vm_start))
+                               (!vma || addr + len <= vm_start_gap(vma)) &&
+                               (!prev || addr >= vm_end_gap(prev)))
                        return addr;
        }
 
@@ -2155,21 +2173,19 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
  * update accounting. This is shared with both the
  * grow-up and grow-down cases.
  */
-static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
+static int acct_stack_growth(struct vm_area_struct *vma,
+                            unsigned long size, unsigned long grow)
 {
        struct mm_struct *mm = vma->vm_mm;
        struct rlimit *rlim = current->signal->rlim;
-       unsigned long new_start, actual_size;
+       unsigned long new_start;
 
        /* address space limit tests */
        if (!may_expand_vm(mm, vma->vm_flags, grow))
                return -ENOMEM;
 
        /* Stack limit test */
-       actual_size = size;
-       if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
-               actual_size -= PAGE_SIZE;
-       if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+       if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
                return -ENOMEM;
 
        /* mlock limit tests */
@@ -2207,16 +2223,32 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
 int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
        struct mm_struct *mm = vma->vm_mm;
+       struct vm_area_struct *next;
+       unsigned long gap_addr;
        int error = 0;
 
        if (!(vma->vm_flags & VM_GROWSUP))
                return -EFAULT;
 
-       /* Guard against wrapping around to address 0. */
-       if (address < PAGE_ALIGN(address+4))
-               address = PAGE_ALIGN(address+4);
-       else
+       /* Guard against exceeding limits of the address space. */
+       address &= PAGE_MASK;
+       if (address >= TASK_SIZE)
                return -ENOMEM;
+       address += PAGE_SIZE;
+
+       /* Enforce stack_guard_gap */
+       gap_addr = address + stack_guard_gap;
+
+       /* Guard against overflow */
+       if (gap_addr < address || gap_addr > TASK_SIZE)
+               gap_addr = TASK_SIZE;
+
+       next = vma->vm_next;
+       if (next && next->vm_start < gap_addr) {
+               if (!(next->vm_flags & VM_GROWSUP))
+                       return -ENOMEM;
+               /* Check that both stack segments have the same anon_vma? */
+       }
 
        /* We must make sure the anon_vma is allocated. */
        if (unlikely(anon_vma_prepare(vma)))
@@ -2261,7 +2293,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                if (vma->vm_next)
                                        vma_gap_update(vma->vm_next);
                                else
-                                       mm->highest_vm_end = address;
+                                       mm->highest_vm_end = vm_end_gap(vma);
                                spin_unlock(&mm->page_table_lock);
 
                                perf_event_mmap(vma);
@@ -2282,6 +2314,8 @@ int expand_downwards(struct vm_area_struct *vma,
                                   unsigned long address)
 {
        struct mm_struct *mm = vma->vm_mm;
+       struct vm_area_struct *prev;
+       unsigned long gap_addr;
        int error;
 
        address &= PAGE_MASK;
@@ -2289,6 +2323,17 @@ int expand_downwards(struct vm_area_struct *vma,
        if (error)
                return error;
 
+       /* Enforce stack_guard_gap */
+       gap_addr = address - stack_guard_gap;
+       if (gap_addr > address)
+               return -ENOMEM;
+       prev = vma->vm_prev;
+       if (prev && prev->vm_end > gap_addr) {
+               if (!(prev->vm_flags & VM_GROWSDOWN))
+                       return -ENOMEM;
+               /* Check that both stack segments have the same anon_vma? */
+       }
+
        /* We must make sure the anon_vma is allocated. */
        if (unlikely(anon_vma_prepare(vma)))
                return -ENOMEM;
@@ -2343,28 +2388,25 @@ int expand_downwards(struct vm_area_struct *vma,
        return error;
 }
 
-/*
- * Note how expand_stack() refuses to expand the stack all the way to
- * abut the next virtual mapping, *unless* that mapping itself is also
- * a stack mapping. We want to leave room for a guard page, after all
- * (the guard page itself is not added here, that is done by the
- * actual page faulting logic)
- *
- * This matches the behavior of the guard page logic (see mm/memory.c:
- * check_stack_guard_page()), which only allows the guard page to be
- * removed under these circumstances.
- */
+/* enforced gap between the expanding stack and other mappings. */
+unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
+
+static int __init cmdline_parse_stack_guard_gap(char *p)
+{
+       unsigned long val;
+       char *endptr;
+
+       val = simple_strtoul(p, &endptr, 10);
+       if (!*endptr)
+               stack_guard_gap = val << PAGE_SHIFT;
+
+       return 0;
+}
+__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
+
 #ifdef CONFIG_STACK_GROWSUP
 int expand_stack(struct vm_area_struct *vma, unsigned long address)
 {
-       struct vm_area_struct *next;
-
-       address &= PAGE_MASK;
-       next = vma->vm_next;
-       if (next && next->vm_start == address + PAGE_SIZE) {
-               if (!(next->vm_flags & VM_GROWSUP))
-                       return -ENOMEM;
-       }
        return expand_upwards(vma, address);
 }
 
@@ -2386,14 +2428,6 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 #else
 int expand_stack(struct vm_area_struct *vma, unsigned long address)
 {
-       struct vm_area_struct *prev;
-
-       address &= PAGE_MASK;
-       prev = vma->vm_prev;
-       if (prev && prev->vm_end == address) {
-               if (!(prev->vm_flags & VM_GROWSDOWN))
-                       return -ENOMEM;
-       }
        return expand_downwards(vma, address);
 }
 
@@ -2491,7 +2525,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
                vma->vm_prev = prev;
                vma_gap_update(vma);
        } else
-               mm->highest_vm_end = prev ? prev->vm_end : 0;
+               mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
        tail_vma->vm_next = NULL;
 
        /* Kill the cache */
index 23f6d0d..2da71e6 100644 (file)
@@ -45,7 +45,7 @@ void end_swap_bio_write(struct bio *bio)
 {
        struct page *page = bio->bi_io_vec[0].bv_page;
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                SetPageError(page);
                /*
                 * We failed to write the page out to swap-space.
@@ -118,7 +118,7 @@ static void end_swap_bio_read(struct bio *bio)
 {
        struct page *page = bio->bi_io_vec[0].bv_page;
 
-       if (bio->bi_error) {
+       if (bio->bi_status) {
                SetPageError(page);
                ClearPageUptodate(page);
                pr_alert("Read-error on swap-device (%u:%u:%llu)\n",
index e67d6ba..391f2dc 100644 (file)
@@ -75,6 +75,7 @@ static struct vfsmount *shm_mnt;
 #include <uapi/linux/memfd.h>
 #include <linux/userfaultfd_k.h>
 #include <linux/rmap.h>
+#include <linux/uuid.h>
 
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
@@ -3761,6 +3762,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
 #ifdef CONFIG_TMPFS_POSIX_ACL
        sb->s_flags |= MS_POSIXACL;
 #endif
+       uuid_gen(&sb->s_uuid);
 
        inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE);
        if (!inode)
index 7449593..8addc53 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5625,6 +5625,28 @@ static char *create_unique_id(struct kmem_cache *s)
        return name;
 }
 
+static void sysfs_slab_remove_workfn(struct work_struct *work)
+{
+       struct kmem_cache *s =
+               container_of(work, struct kmem_cache, kobj_remove_work);
+
+       if (!s->kobj.state_in_sysfs)
+               /*
+                * For a memcg cache, this may be called during
+                * deactivation and again on shutdown.  Remove only once.
+                * A cache is never shut down before deactivation is
+                * complete, so no need to worry about synchronization.
+                */
+               return;
+
+#ifdef CONFIG_MEMCG
+       kset_unregister(s->memcg_kset);
+#endif
+       kobject_uevent(&s->kobj, KOBJ_REMOVE);
+       kobject_del(&s->kobj);
+       kobject_put(&s->kobj);
+}
+
 static int sysfs_slab_add(struct kmem_cache *s)
 {
        int err;
@@ -5632,6 +5654,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
        struct kset *kset = cache_kset(s);
        int unmergeable = slab_unmergeable(s);
 
+       INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
+
        if (!kset) {
                kobject_init(&s->kobj, &slab_ktype);
                return 0;
@@ -5695,20 +5719,8 @@ static void sysfs_slab_remove(struct kmem_cache *s)
                 */
                return;
 
-       if (!s->kobj.state_in_sysfs)
-               /*
-                * For a memcg cache, this may be called during
-                * deactivation and again on shutdown.  Remove only once.
-                * A cache is never shut down before deactivation is
-                * complete, so no need to worry about synchronization.
-                */
-               return;
-
-#ifdef CONFIG_MEMCG
-       kset_unregister(s->memcg_kset);
-#endif
-       kobject_uevent(&s->kobj, KOBJ_REMOVE);
-       kobject_del(&s->kobj);
+       kobject_get(&s->kobj);
+       schedule_work(&s->kobj_remove_work);
 }
 
 void sysfs_slab_release(struct kmem_cache *s)
index ac6318a..3405b4e 100644 (file)
@@ -48,6 +48,9 @@ static int swap_cgroup_prepare(int type)
                if (!page)
                        goto not_enough_page;
                ctrl->map[idx] = page;
+
+               if (!(idx % SWAP_CLUSTER_MAX))
+                       cond_resched();
        }
        return 0;
 not_enough_page:
index 34a1c3e..ecc97f7 100644 (file)
@@ -287,10 +287,21 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
        if (p4d_none(*p4d))
                return NULL;
        pud = pud_offset(p4d, addr);
-       if (pud_none(*pud))
+
+       /*
+        * Don't dereference bad PUD or PMD (below) entries. This will also
+        * identify huge mappings, which we may encounter on architectures
+        * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be
+        * identified as vmalloc addresses by is_vmalloc_addr(), but are
+        * not [unambiguously] associated with a struct page, so there is
+        * no correct value to return for them.
+        */
+       WARN_ON_ONCE(pud_bad(*pud));
+       if (pud_none(*pud) || pud_bad(*pud))
                return NULL;
        pmd = pmd_offset(pud, addr);
-       if (pmd_none(*pmd))
+       WARN_ON_ONCE(pmd_bad(*pmd));
+       if (pmd_none(*pmd) || pmd_bad(*pmd))
                return NULL;
 
        ptep = pte_offset_map(pmd, addr);
index 6063581..ce0618b 100644 (file)
@@ -115,9 +115,9 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
        unsigned long pressure = 0;
 
        /*
-        * reclaimed can be greater than scanned in cases
-        * like THP, where the scanned is 1 and reclaimed
-        * could be 512
+        * reclaimed can be greater than scanned for things such as reclaimed
+        * slab pages. shrink_node() just adds reclaimed pages without a
+        * related increment to scanned pages.
         */
        if (reclaimed >= scanned)
                goto out;
index 467069b..9649579 100644 (file)
@@ -277,7 +277,8 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
        return 0;
 
 out_free_newdev:
-       free_netdev(new_dev);
+       if (new_dev->reg_state == NETREG_UNINITIALIZED)
+               free_netdev(new_dev);
        return err;
 }
 
index 953b672..abc5f40 100644 (file)
@@ -813,7 +813,6 @@ static void vlan_dev_free(struct net_device *dev)
 
        free_percpu(vlan->vlan_pcpu_stats);
        vlan->vlan_pcpu_stats = NULL;
-       free_netdev(dev);
 }
 
 void vlan_setup(struct net_device *dev)
@@ -826,7 +825,8 @@ void vlan_setup(struct net_device *dev)
        netif_keep_dst(dev);
 
        dev->netdev_ops         = &vlan_netdev_ops;
-       dev->destructor         = vlan_dev_free;
+       dev->needs_free_netdev  = true;
+       dev->priv_destructor    = vlan_dev_free;
        dev->ethtool_ops        = &vlan_ethtool_ops;
 
        dev->min_mtu            = 0;
index 013e970..000ca2f 100644 (file)
@@ -1064,8 +1064,9 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
 
                skb_new->protocol = eth_type_trans(skb_new, soft_iface);
 
-               soft_iface->stats.rx_packets++;
-               soft_iface->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size;
+               batadv_inc_counter(bat_priv, BATADV_CNT_RX);
+               batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES,
+                                  skb->len + ETH_HLEN + hdr_size);
 
                netif_rx(skb_new);
                batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n");
index e1ebe14..ae9f4d3 100644 (file)
@@ -987,7 +987,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
                                batadv_dbg(BATADV_DBG_BLA, bat_priv,
                                           "recv_unicast_packet(): Dropped unicast pkt received from another backbone gw %pM.\n",
                                           orig_addr_gw);
-                               return NET_RX_DROP;
+                               goto free_skb;
                        }
                }
 
index b25789a..10f7edf 100644 (file)
@@ -1034,8 +1034,6 @@ static void batadv_softif_free(struct net_device *dev)
         * netdev and its private data (bat_priv)
         */
        rcu_barrier();
-
-       free_netdev(dev);
 }
 
 /**
@@ -1047,7 +1045,8 @@ static void batadv_softif_init_early(struct net_device *dev)
        ether_setup(dev);
 
        dev->netdev_ops = &batadv_netdev_ops;
-       dev->destructor = batadv_softif_free;
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = batadv_softif_free;
        dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL;
        dev->priv_flags |= IFF_NO_QUEUE;
 
index 6089599..ab3b654 100644 (file)
@@ -598,7 +598,7 @@ static void netdev_setup(struct net_device *dev)
 
        dev->netdev_ops         = &netdev_ops;
        dev->header_ops         = &header_ops;
-       dev->destructor         = free_netdev;
+       dev->needs_free_netdev  = true;
 }
 
 static struct device_type bt_type = {
index 430b53e..f0f3447 100644 (file)
@@ -379,7 +379,7 @@ void br_dev_setup(struct net_device *dev)
        ether_setup(dev);
 
        dev->netdev_ops = &br_netdev_ops;
-       dev->destructor = free_netdev;
+       dev->needs_free_netdev = true;
        dev->ethtool_ops = &br_ethtool_ops;
        SET_NETDEV_DEVTYPE(dev, &br_type);
        dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE;
index adcad34..21f18ea 100644 (file)
@@ -754,6 +754,10 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 
        lock_sock(sk);
 
+       err = -EINVAL;
+       if (addr_len < offsetofend(struct sockaddr, sa_family))
+               goto out;
+
        err = -EAFNOSUPPORT;
        if (uaddr->sa_family != AF_CAIF)
                goto out;
index 59ce1fc..71b6ab2 100644 (file)
@@ -81,11 +81,7 @@ static struct cfpkt *cfpkt_create_pfx(u16 len, u16 pfx)
 {
        struct sk_buff *skb;
 
-       if (likely(in_interrupt()))
-               skb = alloc_skb(len + pfx, GFP_ATOMIC);
-       else
-               skb = alloc_skb(len + pfx, GFP_KERNEL);
-
+       skb = alloc_skb(len + pfx, GFP_ATOMIC);
        if (unlikely(skb == NULL))
                return NULL;
 
index 1816fc9..fe3c53e 100644 (file)
@@ -392,14 +392,14 @@ static void chnl_net_destructor(struct net_device *dev)
 {
        struct chnl_net *priv = netdev_priv(dev);
        caif_free_client(&priv->chnl);
-       free_netdev(dev);
 }
 
 static void ipcaif_net_setup(struct net_device *dev)
 {
        struct chnl_net *priv;
        dev->netdev_ops = &netdev_ops;
-       dev->destructor = chnl_net_destructor;
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = chnl_net_destructor;
        dev->flags |= IFF_NOARP;
        dev->flags |= IFF_POINTOPOINT;
        dev->mtu = GPRS_PDP_MTU;
index b6406fe..88edac0 100644 (file)
@@ -872,8 +872,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
 
 static int can_pernet_init(struct net *net)
 {
-       net->can.can_rcvlists_lock =
-               __SPIN_LOCK_UNLOCKED(net->can.can_rcvlists_lock);
+       spin_lock_init(&net->can.can_rcvlists_lock);
        net->can.can_rx_alldev_list =
                kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL);
 
index fca407b..416137c 100644 (file)
@@ -1253,8 +1253,9 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
        if (!new_ifalias)
                return -ENOMEM;
        dev->ifalias = new_ifalias;
+       memcpy(dev->ifalias, alias, len);
+       dev->ifalias[len] = 0;
 
-       strlcpy(dev->ifalias, alias, len+1);
        return len;
 }
 
@@ -4766,6 +4767,13 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
 }
 EXPORT_SYMBOL(gro_find_complete_by_type);
 
+static void napi_skb_free_stolen_head(struct sk_buff *skb)
+{
+       skb_dst_drop(skb);
+       secpath_reset(skb);
+       kmem_cache_free(skbuff_head_cache, skb);
+}
+
 static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 {
        switch (ret) {
@@ -4779,13 +4787,10 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
                break;
 
        case GRO_MERGED_FREE:
-               if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
-                       skb_dst_drop(skb);
-                       secpath_reset(skb);
-                       kmem_cache_free(skbuff_head_cache, skb);
-               } else {
+               if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+                       napi_skb_free_stolen_head(skb);
+               else
                        __kfree_skb(skb);
-               }
                break;
 
        case GRO_HELD:
@@ -4857,10 +4862,16 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
                break;
 
        case GRO_DROP:
-       case GRO_MERGED_FREE:
                napi_reuse_skb(napi, skb);
                break;
 
+       case GRO_MERGED_FREE:
+               if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+                       napi_skb_free_stolen_head(skb);
+               else
+                       napi_reuse_skb(napi, skb);
+               break;
+
        case GRO_MERGED:
        case GRO_CONSUMED:
                break;
@@ -4948,6 +4959,19 @@ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(__skb_gro_checksum_complete);
 
+static void net_rps_send_ipi(struct softnet_data *remsd)
+{
+#ifdef CONFIG_RPS
+       while (remsd) {
+               struct softnet_data *next = remsd->rps_ipi_next;
+
+               if (cpu_online(remsd->cpu))
+                       smp_call_function_single_async(remsd->cpu, &remsd->csd);
+               remsd = next;
+       }
+#endif
+}
+
 /*
  * net_rps_action_and_irq_enable sends any pending IPI's for rps.
  * Note: called with local irq disabled, but exits with local irq enabled.
@@ -4963,14 +4987,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
                local_irq_enable();
 
                /* Send pending IPI's to kick RPS processing on remote cpus. */
-               while (remsd) {
-                       struct softnet_data *next = remsd->rps_ipi_next;
-
-                       if (cpu_online(remsd->cpu))
-                               smp_call_function_single_async(remsd->cpu,
-                                                          &remsd->csd);
-                       remsd = next;
-               }
+               net_rps_send_ipi(remsd);
        } else
 #endif
                local_irq_enable();
@@ -5199,8 +5216,6 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
        if (rc == BUSY_POLL_BUDGET)
                __napi_schedule(napi);
        local_bh_enable();
-       if (local_softirq_pending())
-               do_softirq();
 }
 
 void napi_busy_loop(unsigned int napi_id,
@@ -7501,6 +7516,8 @@ out:
 err_uninit:
        if (dev->netdev_ops->ndo_uninit)
                dev->netdev_ops->ndo_uninit(dev);
+       if (dev->priv_destructor)
+               dev->priv_destructor(dev);
        goto out;
 }
 EXPORT_SYMBOL(register_netdevice);
@@ -7708,8 +7725,10 @@ void netdev_run_todo(void)
                WARN_ON(rcu_access_pointer(dev->ip6_ptr));
                WARN_ON(dev->dn_ptr);
 
-               if (dev->destructor)
-                       dev->destructor(dev);
+               if (dev->priv_destructor)
+                       dev->priv_destructor(dev);
+               if (dev->needs_free_netdev)
+                       free_netdev(dev);
 
                /* Report a network device has been unregistered */
                rtnl_lock();
@@ -7774,9 +7793,9 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
        } else {
                netdev_stats_to_stats64(storage, &dev->stats);
        }
-       storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
-       storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
-       storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
+       storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
+       storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
+       storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler);
        return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
@@ -8192,7 +8211,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
        struct sk_buff **list_skb;
        struct sk_buff *skb;
        unsigned int cpu;
-       struct softnet_data *sd, *oldsd;
+       struct softnet_data *sd, *oldsd, *remsd = NULL;
 
        local_irq_disable();
        cpu = smp_processor_id();
@@ -8233,6 +8252,13 @@ static int dev_cpu_dead(unsigned int oldcpu)
        raise_softirq_irqoff(NET_TX_SOFTIRQ);
        local_irq_enable();
 
+#ifdef CONFIG_RPS
+       remsd = oldsd->rps_ipi_list;
+       oldsd->rps_ipi_list = NULL;
+#endif
+       /* send out pending IPI's on offline CPU */
+       net_rps_send_ipi(remsd);
+
        /* Process offline CPU's input_pkt_queue */
        while ((skb = __skb_dequeue(&oldsd->process_queue))) {
                netif_rx_ni(skb);
index b94b1d2..27fad31 100644 (file)
@@ -410,6 +410,22 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
        if (cmd == SIOCGIFNAME)
                return dev_ifname(net, (struct ifreq __user *)arg);
 
+       /*
+        * Take care of Wireless Extensions. Unfortunately struct iwreq
+        * isn't a proper subset of struct ifreq (it's 8 byte shorter)
+        * so we need to treat it specially, otherwise applications may
+        * fault if the struct they're passing happens to land at the
+        * end of a mapped page.
+        */
+       if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+               struct iwreq iwr;
+
+               if (copy_from_user(&iwr, arg, sizeof(iwr)))
+                       return -EFAULT;
+
+               return wext_handle_ioctl(net, &iwr, cmd, arg);
+       }
+
        if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
                return -EFAULT;
 
@@ -559,9 +575,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
                                ret = -EFAULT;
                        return ret;
                }
-               /* Take care of Wireless Extensions */
-               if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
-                       return wext_handle_ioctl(net, &ifr, cmd, arg);
                return -ENOTTY;
        }
 }
index 6192f11..13ba4a0 100644 (file)
@@ -469,6 +469,20 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
                spin_lock_bh(&dst_garbage.lock);
                dst = dst_garbage.list;
                dst_garbage.list = NULL;
+               /* The code in dst_ifdown places a hold on the loopback device.
+                * If the gc entry processing is set to expire after a lengthy
+                * interval, this hold can cause netdev_wait_allrefs() to hang
+                * out and wait for a long time -- until the the loopback
+                * interface is released.  If we're really unlucky, it'll emit
+                * pr_emerg messages to console too.  Reset the interval here,
+                * so dst cleanups occur in a more timely fashion.
+                */
+               if (dst_garbage.timer_inc > DST_GC_INC) {
+                       dst_garbage.timer_inc = DST_GC_INC;
+                       dst_garbage.timer_expires = DST_GC_MIN;
+                       mod_delayed_work(system_wq, &dst_gc_work,
+                                        dst_garbage.timer_expires);
+               }
                spin_unlock_bh(&dst_garbage.lock);
 
                if (last)
index f21c4d3..3bba291 100644 (file)
@@ -568,7 +568,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
        struct net *net = sock_net(skb->sk);
        struct fib_rule_hdr *frh = nlmsg_data(nlh);
        struct fib_rules_ops *ops = NULL;
-       struct fib_rule *rule, *tmp;
+       struct fib_rule *rule, *r;
        struct nlattr *tb[FRA_MAX+1];
        struct fib_kuid_range range;
        int err = -EINVAL;
@@ -668,16 +668,23 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 
                /*
                 * Check if this rule is a target to any of them. If so,
+                * adjust to the next one with the same preference or
                 * disable them. As this operation is eventually very
-                * expensive, it is only performed if goto rules have
-                * actually been added.
+                * expensive, it is only performed if goto rules, except
+                * current if it is goto rule, have actually been added.
                 */
                if (ops->nr_goto_rules > 0) {
-                       list_for_each_entry(tmp, &ops->rules_list, list) {
-                               if (rtnl_dereference(tmp->ctarget) == rule) {
-                                       RCU_INIT_POINTER(tmp->ctarget, NULL);
+                       struct fib_rule *n;
+
+                       n = list_next_entry(rule, list);
+                       if (&n->list == &ops->rules_list || n->pref != rule->pref)
+                               n = NULL;
+                       list_for_each_entry(r, &ops->rules_list, list) {
+                               if (rtnl_dereference(r->ctarget) != rule)
+                                       continue;
+                               rcu_assign_pointer(r->ctarget, n);
+                               if (!n)
                                        ops->unresolved_rules++;
-                               }
                        }
                }
 
index 9e2c0a7..467a2f4 100644 (file)
@@ -931,6 +931,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
               + nla_total_size(1) /* IFLA_LINKMODE */
               + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
               + nla_total_size(4) /* IFLA_LINK_NETNSID */
+              + nla_total_size(4) /* IFLA_GROUP */
               + nla_total_size(ext_filter_mask
                                & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
               + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
@@ -1124,6 +1125,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
        struct ifla_vf_mac vf_mac;
        struct ifla_vf_info ivi;
 
+       memset(&ivi, 0, sizeof(ivi));
+
        /* Not all SR-IOV capable drivers support the
         * spoofcheck and "RSS query enable" query.  Preset to
         * -1 so the user space tool can detect that the driver
@@ -1132,7 +1135,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
        ivi.spoofchk = -1;
        ivi.rss_query_en = -1;
        ivi.trusted = -1;
-       memset(ivi.mac, 0, sizeof(ivi.mac));
        /* The default value for VF link state is "auto"
         * IFLA_VF_LINK_STATE_AUTO which equals zero
         */
@@ -1467,6 +1469,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
        [IFLA_LINK_NETNSID]     = { .type = NLA_S32 },
        [IFLA_PROTO_DOWN]       = { .type = NLA_U8 },
        [IFLA_XDP]              = { .type = NLA_NESTED },
+       [IFLA_GROUP]            = { .type = NLA_U32 },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
index 4b9518a..6f95612 100644 (file)
@@ -188,12 +188,6 @@ static inline void dnrt_free(struct dn_route *rt)
        call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
-static inline void dnrt_drop(struct dn_route *rt)
-{
-       dst_release(&rt->dst);
-       call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
-}
-
 static void dn_dst_check_expire(unsigned long dummy)
 {
        int i;
@@ -248,7 +242,7 @@ static int dn_dst_gc(struct dst_ops *ops)
                        }
                        *rtp = rt->dst.dn_next;
                        rt->dst.dn_next = NULL;
-                       dnrt_drop(rt);
+                       dnrt_free(rt);
                        break;
                }
                spin_unlock_bh(&dn_rt_hash_table[i].lock);
@@ -350,7 +344,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
                        dst_use(&rth->dst, now);
                        spin_unlock_bh(&dn_rt_hash_table[hash].lock);
 
-                       dnrt_drop(rt);
+                       dst_free(&rt->dst);
                        *rp = rth;
                        return 0;
                }
@@ -380,7 +374,7 @@ static void dn_run_flush(unsigned long dummy)
                for(; rt; rt = next) {
                        next = rcu_dereference_raw(rt->dst.dn_next);
                        RCU_INIT_POINTER(rt->dst.dn_next, NULL);
-                       dst_free((struct dst_entry *)rt);
+                       dnrt_free(rt);
                }
 
 nothing_to_declare:
@@ -1187,7 +1181,7 @@ make_route:
        if (dev_out->flags & IFF_LOOPBACK)
                flags |= RTCF_LOCAL;
 
-       rt = dst_alloc(&dn_dst_ops, dev_out, 1, DST_OBSOLETE_NONE, DST_HOST);
+       rt = dst_alloc(&dn_dst_ops, dev_out, 0, DST_OBSOLETE_NONE, DST_HOST);
        if (rt == NULL)
                goto e_nobufs;
 
index 1ed81ac..aa8ffec 100644 (file)
@@ -102,7 +102,9 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
 {
        struct nlmsghdr *nlh = nlmsg_hdr(skb);
 
-       if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+       if (skb->len < sizeof(*nlh) ||
+           nlh->nlmsg_len < sizeof(*nlh) ||
+           skb->len < nlh->nlmsg_len)
                return;
 
        if (!netlink_capable(skb, CAP_NET_ADMIN))
index c73160f..0a0a392 100644 (file)
@@ -378,7 +378,6 @@ static void hsr_dev_destroy(struct net_device *hsr_dev)
        del_timer_sync(&hsr->announce_timer);
 
        synchronize_rcu();
-       free_netdev(hsr_dev);
 }
 
 static const struct net_device_ops hsr_device_ops = {
@@ -404,7 +403,8 @@ void hsr_dev_setup(struct net_device *dev)
        SET_NETDEV_DEVTYPE(dev, &hsr_type);
        dev->priv_flags |= IFF_NO_QUEUE;
 
-       dev->destructor = hsr_dev_destroy;
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = hsr_dev_destroy;
 
        dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
                           NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
index 4ebe2aa..04b5450 100644 (file)
@@ -324,8 +324,7 @@ static int hsr_fill_frame_info(struct hsr_frame_info *frame,
        unsigned long irqflags;
 
        frame->is_supervision = is_supervision_frame(port->hsr, skb);
-       frame->node_src = hsr_get_node(&port->hsr->node_db, skb,
-                                      frame->is_supervision);
+       frame->node_src = hsr_get_node(port, skb, frame->is_supervision);
        if (frame->node_src == NULL)
                return -1; /* Unknown node and !is_supervision, or no mem */
 
index 7ea9258..284a9b8 100644 (file)
@@ -158,9 +158,10 @@ struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
 
 /* Get the hsr_node from which 'skb' was sent.
  */
-struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb,
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
                              bool is_sup)
 {
+       struct list_head *node_db = &port->hsr->node_db;
        struct hsr_node *node;
        struct ethhdr *ethhdr;
        u16 seq_out;
@@ -186,7 +187,11 @@ struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb,
                 */
                seq_out = hsr_get_skb_sequence_nr(skb) - 1;
        } else {
-               WARN_ONCE(1, "%s: Non-HSR frame\n", __func__);
+               /* this is called also for frames from master port and
+                * so warn only for non master ports
+                */
+               if (port->type != HSR_PT_MASTER)
+                       WARN_ONCE(1, "%s: Non-HSR frame\n", __func__);
                seq_out = HSR_SEQNR_START;
        }
 
index 438b40f..4e04f0e 100644 (file)
@@ -18,7 +18,7 @@ struct hsr_node;
 
 struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
                              u16 seq_out);
-struct hsr_node *hsr_get_node(struct list_head *node_db, struct sk_buff *skb,
+struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
                              bool is_sup);
 void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
                          struct hsr_port *port);
index d7efbf0..0a866f3 100644 (file)
@@ -107,7 +107,7 @@ static void lowpan_setup(struct net_device *ldev)
 
        ldev->netdev_ops        = &lowpan_netdev_ops;
        ldev->header_ops        = &lowpan_header_ops;
-       ldev->destructor        = free_netdev;
+       ldev->needs_free_netdev = true;
        ldev->features          |= NETIF_F_NETNS_LOCAL;
 }
 
index 43318b5..9144fa7 100644 (file)
@@ -657,8 +657,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
        /* Needed by both icmp_global_allow and icmp_xmit_lock */
        local_bh_disable();
 
-       /* Check global sysctl_icmp_msgs_per_sec ratelimit */
-       if (!icmpv4_global_allow(net, type, code))
+       /* Check global sysctl_icmp_msgs_per_sec ratelimit, unless
+        * incoming dev is loopback.  If outgoing dev change to not be
+        * loopback, then peer ratelimit still work (in icmpv4_xrlim_allow)
+        */
+       if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
+             !icmpv4_global_allow(net, type, code))
                goto out_bh_enable;
 
        sk = icmp_xmit_lock(net);
index 44fd86d..ec9a396 100644 (file)
@@ -1112,6 +1112,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
        pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
        if (!pmc)
                return;
+       spin_lock_init(&pmc->lock);
        spin_lock_bh(&im->lock);
        pmc->interface = im->interface;
        in_dev_hold(in_dev);
@@ -2071,21 +2072,26 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 
 static void ip_mc_clear_src(struct ip_mc_list *pmc)
 {
-       struct ip_sf_list *psf, *nextpsf;
+       struct ip_sf_list *psf, *nextpsf, *tomb, *sources;
 
-       for (psf = pmc->tomb; psf; psf = nextpsf) {
+       spin_lock_bh(&pmc->lock);
+       tomb = pmc->tomb;
+       pmc->tomb = NULL;
+       sources = pmc->sources;
+       pmc->sources = NULL;
+       pmc->sfmode = MCAST_EXCLUDE;
+       pmc->sfcount[MCAST_INCLUDE] = 0;
+       pmc->sfcount[MCAST_EXCLUDE] = 1;
+       spin_unlock_bh(&pmc->lock);
+
+       for (psf = tomb; psf; psf = nextpsf) {
                nextpsf = psf->sf_next;
                kfree(psf);
        }
-       pmc->tomb = NULL;
-       for (psf = pmc->sources; psf; psf = nextpsf) {
+       for (psf = sources; psf; psf = nextpsf) {
                nextpsf = psf->sf_next;
                kfree(psf);
        }
-       pmc->sources = NULL;
-       pmc->sfmode = MCAST_EXCLUDE;
-       pmc->sfcount[MCAST_INCLUDE] = 0;
-       pmc->sfcount[MCAST_EXCLUDE] = 1;
 }
 
 /* Join a multicast group
index 7a3fd25..532b36e 100644 (file)
@@ -964,7 +964,8 @@ static int __ip_append_data(struct sock *sk,
                csummode = CHECKSUM_PARTIAL;
 
        cork->length += length;
-       if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
+       if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) ||
+            (skb && skb_is_gso(skb))) &&
            (sk->sk_protocol == IPPROTO_UDP) &&
            (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
            (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
index b878ecb..129d1a3 100644 (file)
@@ -446,6 +446,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
        return 0;
 
 drop:
+       if (tun_dst)
+               dst_release((struct dst_entry *)tun_dst);
        kfree_skb(skb);
        return 0;
 }
@@ -967,7 +969,6 @@ static void ip_tunnel_dev_free(struct net_device *dev)
        gro_cells_destroy(&tunnel->gro_cells);
        dst_cache_destroy(&tunnel->dst_cache);
        free_percpu(dev->tstats);
-       free_netdev(dev);
 }
 
 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
@@ -1155,7 +1156,8 @@ int ip_tunnel_init(struct net_device *dev)
        struct iphdr *iph = &tunnel->parms.iph;
        int err;
 
-       dev->destructor = ip_tunnel_dev_free;
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = ip_tunnel_dev_free;
        dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
        if (!dev->tstats)
                return -ENOMEM;
index 551de4d..8ae425c 100644 (file)
@@ -101,8 +101,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id);
 static void ipmr_free_table(struct mr_table *mrt);
 
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
-                         struct sk_buff *skb, struct mfc_cache *cache,
-                         int local);
+                         struct net_device *dev, struct sk_buff *skb,
+                         struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct mr_table *mrt,
                             struct sk_buff *pkt, vifi_t vifi, int assert);
 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
@@ -501,7 +501,7 @@ static void reg_vif_setup(struct net_device *dev)
        dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
        dev->flags              = IFF_NOARP;
        dev->netdev_ops         = &reg_vif_netdev_ops;
-       dev->destructor         = free_netdev;
+       dev->needs_free_netdev  = true;
        dev->features           |= NETIF_F_NETNS_LOCAL;
 }
 
@@ -988,7 +988,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
 
                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
                } else {
-                       ip_mr_forward(net, mrt, skb, c, 0);
+                       ip_mr_forward(net, mrt, skb->dev, skb, c, 0);
                }
        }
 }
@@ -1073,7 +1073,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 
 /* Queue a packet for resolution. It gets locked cache entry! */
 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
-                                struct sk_buff *skb)
+                                struct sk_buff *skb, struct net_device *dev)
 {
        const struct iphdr *iph = ip_hdr(skb);
        struct mfc_cache *c;
@@ -1130,6 +1130,10 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
                kfree_skb(skb);
                err = -ENOBUFS;
        } else {
+               if (dev) {
+                       skb->dev = dev;
+                       skb->skb_iif = dev->ifindex;
+               }
                skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
                err = 0;
        }
@@ -1828,10 +1832,10 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
 
 /* "local" means that we should preserve one skb (for local delivery) */
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
-                         struct sk_buff *skb, struct mfc_cache *cache,
-                         int local)
+                         struct net_device *dev, struct sk_buff *skb,
+                         struct mfc_cache *cache, int local)
 {
-       int true_vifi = ipmr_find_vif(mrt, skb->dev);
+       int true_vifi = ipmr_find_vif(mrt, dev);
        int psend = -1;
        int vif, ct;
 
@@ -1853,13 +1857,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
        }
 
        /* Wrong interface: drop packet and (maybe) send PIM assert. */
-       if (mrt->vif_table[vif].dev != skb->dev) {
-               struct net_device *mdev;
-
-               mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev);
-               if (mdev == skb->dev)
-                       goto forward;
-
+       if (mrt->vif_table[vif].dev != dev) {
                if (rt_is_output_route(skb_rtable(skb))) {
                        /* It is our own packet, looped back.
                         * Very complicated situation...
@@ -2053,7 +2051,7 @@ int ip_mr_input(struct sk_buff *skb)
                read_lock(&mrt_lock);
                vif = ipmr_find_vif(mrt, dev);
                if (vif >= 0) {
-                       int err2 = ipmr_cache_unresolved(mrt, vif, skb);
+                       int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev);
                        read_unlock(&mrt_lock);
 
                        return err2;
@@ -2064,7 +2062,7 @@ int ip_mr_input(struct sk_buff *skb)
        }
 
        read_lock(&mrt_lock);
-       ip_mr_forward(net, mrt, skb, cache, local);
+       ip_mr_forward(net, mrt, dev, skb, cache, local);
        read_unlock(&mrt_lock);
 
        if (local)
@@ -2238,7 +2236,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
                iph->saddr = saddr;
                iph->daddr = daddr;
                iph->version = 0;
-               err = ipmr_cache_unresolved(mrt, vif, skb2);
+               err = ipmr_cache_unresolved(mrt, vif, skb2, dev);
                read_unlock(&mrt_lock);
                rcu_read_unlock();
                return err;
index b5ea036..40aca78 100644 (file)
@@ -2330,6 +2330,8 @@ int tcp_disconnect(struct sock *sk, int flags)
        tcp_init_send_head(sk);
        memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
        __sk_dst_reset(sk);
+       dst_release(sk->sk_rx_dst);
+       sk->sk_rx_dst = NULL;
        tcp_saved_syn_free(tp);
 
        /* Clean up fastopen related fields */
index 6a4fb1e..1d2dbac 100644 (file)
@@ -332,9 +332,9 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev,
 static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
                                   unsigned long delay)
 {
-       if (!delayed_work_pending(&ifp->dad_work))
-               in6_ifa_hold(ifp);
-       mod_delayed_work(addrconf_wq, &ifp->dad_work, delay);
+       in6_ifa_hold(ifp);
+       if (mod_delayed_work(addrconf_wq, &ifp->dad_work, delay))
+               in6_ifa_put(ifp);
 }
 
 static int snmp6_alloc_dev(struct inet6_dev *idev)
@@ -3369,6 +3369,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct netdev_notifier_changeupper_info *info;
        struct inet6_dev *idev = __in6_dev_get(dev);
+       struct net *net = dev_net(dev);
        int run_pending = 0;
        int err;
 
@@ -3384,7 +3385,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
        case NETDEV_CHANGEMTU:
                /* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */
                if (dev->mtu < IPV6_MIN_MTU) {
-                       addrconf_ifdown(dev, 1);
+                       addrconf_ifdown(dev, dev != net->loopback_dev);
                        break;
                }
 
@@ -3500,7 +3501,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
                         * IPV6_MIN_MTU stop IPv6 on this interface.
                         */
                        if (dev->mtu < IPV6_MIN_MTU)
-                               addrconf_ifdown(dev, 1);
+                               addrconf_ifdown(dev, dev != net->loopback_dev);
                }
                break;
 
index e011122..5c786f5 100644 (file)
@@ -250,8 +250,14 @@ ipv4_connected:
         */
 
        err = ip6_datagram_dst_update(sk, true);
-       if (err)
+       if (err) {
+               /* Reset daddr and dport so that udp_v6_early_demux()
+                * fails to find this socket
+                */
+               memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr));
+               inet->inet_dport = 0;
                goto out;
+       }
 
        sk->sk_state = TCP_ESTABLISHED;
        sk_set_txhash(sk);
index d950d43..f02f131 100644 (file)
 #include <net/ipv6.h>
 #include <linux/icmpv6.h>
 
+static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
+{
+       int off = sizeof(struct ipv6hdr);
+       struct ipv6_opt_hdr *exthdr;
+
+       if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
+               return offsetof(struct ipv6hdr, nexthdr);
+
+       while (off < nhlen) {
+               exthdr = (void *)ipv6_hdr + off;
+               if (exthdr->nexthdr == NEXTHDR_ESP)
+                       return off;
+
+               off += ipv6_optlen(exthdr);
+       }
+
+       return 0;
+}
+
 static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
                                         struct sk_buff *skb)
 {
@@ -38,6 +57,7 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
        struct xfrm_state *x;
        __be32 seq;
        __be32 spi;
+       int nhoff;
        int err;
 
        skb_pull(skb, offset);
@@ -72,6 +92,11 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
 
        xo->flags |= XFRM_GRO;
 
+       nhoff = esp6_nexthdr_esp_offset(ipv6_hdr(skb), offset);
+       if (!nhoff)
+               goto out;
+
+       IP6CB(skb)->nhoff = nhoff;
        XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
        XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
        XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
index eea23b5..ec849d8 100644 (file)
@@ -32,7 +32,6 @@ struct fib6_rule {
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
                                   int flags, pol_lookup_t lookup)
 {
-       struct rt6_info *rt;
        struct fib_lookup_arg arg = {
                .lookup_ptr = lookup,
                .flags = FIB_LOOKUP_NOREF,
@@ -44,21 +43,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
        fib_rules_lookup(net->ipv6.fib6_rules_ops,
                         flowi6_to_flowi(fl6), flags, &arg);
 
-       rt = arg.result;
+       if (arg.result)
+               return arg.result;
 
-       if (!rt) {
-               dst_hold(&net->ipv6.ip6_null_entry->dst);
-               return &net->ipv6.ip6_null_entry->dst;
-       }
-
-       if (rt->rt6i_flags & RTF_REJECT &&
-           rt->dst.error == -EAGAIN) {
-               ip6_rt_put(rt);
-               rt = net->ipv6.ip6_null_entry;
-               dst_hold(&rt->dst);
-       }
-
-       return &rt->dst;
+       dst_hold(&net->ipv6.ip6_null_entry->dst);
+       return &net->ipv6.ip6_null_entry->dst;
 }
 
 static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
@@ -121,7 +110,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
                        flp6->saddr = saddr;
                }
                err = rt->dst.error;
-               goto out;
+               if (err != -EAGAIN)
+                       goto out;
        }
 again:
        ip6_rt_put(rt);
index 230b5aa..8d7b113 100644 (file)
@@ -491,7 +491,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
        local_bh_disable();
 
        /* Check global sysctl_icmp_msgs_per_sec ratelimit */
-       if (!icmpv6_global_allow(type))
+       if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
                goto out_bh_enable;
 
        mip6_addr_swap(skb);
index 2fd5ca1..77f7f8c 100644 (file)
@@ -62,6 +62,7 @@ static inline u32 ila_locator_hash(struct ila_locator loc)
 {
        u32 *v = (u32 *)loc.v32;
 
+       __ila_hash_secret_init();
        return jhash_2words(v[0], v[1], hashrnd);
 }
 
index d4bf2c6..e6b78ba 100644 (file)
@@ -289,8 +289,7 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
        struct rt6_info *rt;
 
        rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
-       if (rt->rt6i_flags & RTF_REJECT &&
-           rt->dst.error == -EAGAIN) {
+       if (rt->dst.error == -EAGAIN) {
                ip6_rt_put(rt);
                rt = net->ipv6.ip6_null_entry;
                dst_hold(&rt->dst);
index 0c5b4ca..64eea39 100644 (file)
@@ -991,13 +991,13 @@ static void ip6gre_dev_free(struct net_device *dev)
 
        dst_cache_destroy(&t->dst_cache);
        free_percpu(dev->tstats);
-       free_netdev(dev);
 }
 
 static void ip6gre_tunnel_setup(struct net_device *dev)
 {
        dev->netdev_ops = &ip6gre_netdev_ops;
-       dev->destructor = ip6gre_dev_free;
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = ip6gre_dev_free;
 
        dev->type = ARPHRD_IP6GRE;
 
@@ -1148,7 +1148,7 @@ static int __net_init ip6gre_init_net(struct net *net)
        return 0;
 
 err_reg_dev:
-       ip6gre_dev_free(ign->fb_tunnel_dev);
+       free_netdev(ign->fb_tunnel_dev);
 err_alloc_dev:
        return err;
 }
@@ -1300,7 +1300,8 @@ static void ip6gre_tap_setup(struct net_device *dev)
        ether_setup(dev);
 
        dev->netdev_ops = &ip6gre_tap_netdev_ops;
-       dev->destructor = ip6gre_dev_free;
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = ip6gre_dev_free;
 
        dev->features |= NETIF_F_NETNS_LOCAL;
        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
index bf8a58a..1699acb 100644 (file)
@@ -1390,7 +1390,7 @@ emsgsize:
         */
 
        cork->length += length;
-       if ((((length + fragheaderlen) > mtu) ||
+       if ((((length + (skb ? skb->len : headersize)) > mtu) ||
             (skb && skb_is_gso(skb))) &&
            (sk->sk_protocol == IPPROTO_UDP) &&
            (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
index 9b37f97..8c6c3c8 100644 (file)
@@ -254,7 +254,6 @@ static void ip6_dev_free(struct net_device *dev)
        gro_cells_destroy(&t->gro_cells);
        dst_cache_destroy(&t->dst_cache);
        free_percpu(dev->tstats);
-       free_netdev(dev);
 }
 
 static int ip6_tnl_create2(struct net_device *dev)
@@ -322,7 +321,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
        return t;
 
 failed_free:
-       ip6_dev_free(dev);
+       free_netdev(dev);
 failed:
        return ERR_PTR(err);
 }
@@ -859,6 +858,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
        return 0;
 
 drop:
+       if (tun_dst)
+               dst_release((struct dst_entry *)tun_dst);
        kfree_skb(skb);
        return 0;
 }
@@ -1247,7 +1248,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
                fl6.flowi6_proto = IPPROTO_IPIP;
                fl6.daddr = key->u.ipv6.dst;
                fl6.flowlabel = key->label;
-               dsfield = ip6_tclass(key->label);
+               dsfield =  key->tos;
        } else {
                if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
                        encap_limit = t->parms.encap_limit;
@@ -1318,7 +1319,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
                fl6.flowi6_proto = IPPROTO_IPV6;
                fl6.daddr = key->u.ipv6.dst;
                fl6.flowlabel = key->label;
-               dsfield = ip6_tclass(key->label);
+               dsfield = key->tos;
        } else {
                offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
                /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
@@ -1777,7 +1778,8 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
 static void ip6_tnl_dev_setup(struct net_device *dev)
 {
        dev->netdev_ops = &ip6_tnl_netdev_ops;
-       dev->destructor = ip6_dev_free;
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = ip6_dev_free;
 
        dev->type = ARPHRD_TUNNEL6;
        dev->flags |= IFF_NOARP;
@@ -2224,7 +2226,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
        return 0;
 
 err_register:
-       ip6_dev_free(ip6n->fb_tnl_dev);
+       free_netdev(ip6n->fb_tnl_dev);
 err_alloc_dev:
        return err;
 }
index d67ef56..837ea1e 100644 (file)
@@ -180,7 +180,6 @@ vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t)
 static void vti6_dev_free(struct net_device *dev)
 {
        free_percpu(dev->tstats);
-       free_netdev(dev);
 }
 
 static int vti6_tnl_create2(struct net_device *dev)
@@ -235,7 +234,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p
        return t;
 
 failed_free:
-       vti6_dev_free(dev);
+       free_netdev(dev);
 failed:
        return NULL;
 }
@@ -842,7 +841,8 @@ static const struct net_device_ops vti6_netdev_ops = {
 static void vti6_dev_setup(struct net_device *dev)
 {
        dev->netdev_ops = &vti6_netdev_ops;
-       dev->destructor = vti6_dev_free;
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = vti6_dev_free;
 
        dev->type = ARPHRD_TUNNEL6;
        dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
@@ -1100,7 +1100,7 @@ static int __net_init vti6_init_net(struct net *net)
        return 0;
 
 err_register:
-       vti6_dev_free(ip6n->fb_tnl_dev);
+       free_netdev(ip6n->fb_tnl_dev);
 err_alloc_dev:
        return err;
 }
index 374997d..2ecb39b 100644 (file)
@@ -733,7 +733,7 @@ static void reg_vif_setup(struct net_device *dev)
        dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
        dev->flags              = IFF_NOARP;
        dev->netdev_ops         = &reg_vif_netdev_ops;
-       dev->destructor         = free_netdev;
+       dev->needs_free_netdev  = true;
        dev->features           |= NETIF_F_NETNS_LOCAL;
 }
 
index cc8e3ae..e88bcb8 100644 (file)
@@ -219,7 +219,7 @@ static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
        u64 buff64[SNMP_MIB_MAX];
        int i;
 
-       memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
+       memset(buff64, 0, sizeof(u64) * SNMP_MIB_MAX);
 
        snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff);
        for (i = 0; itemlist[i].name; i++)
index dc61b0b..322bd62 100644 (file)
@@ -2804,6 +2804,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg)
        if ((rt->dst.dev == dev || !dev) &&
            rt != adn->net->ipv6.ip6_null_entry &&
            (rt->rt6i_nsiblings == 0 ||
+            (dev && netdev_unregistering(dev)) ||
             !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
                return -1;
 
@@ -3721,7 +3722,11 @@ static int ip6_route_dev_notify(struct notifier_block *this,
                net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
                net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
 #endif
-        } else if (event == NETDEV_UNREGISTER) {
+        } else if (event == NETDEV_UNREGISTER &&
+                   dev->reg_state != NETREG_UNREGISTERED) {
+               /* NETDEV_UNREGISTER could be fired for multiple times by
+                * netdev_wait_allrefs(). Make sure we only call this once.
+                */
                in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
                in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
index 61e5902..f8ad158 100644 (file)
@@ -265,7 +265,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
        return nt;
 
 failed_free:
-       ipip6_dev_free(dev);
+       free_netdev(dev);
 failed:
        return NULL;
 }
@@ -305,7 +305,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
         * we try harder to allocate.
         */
        kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
-               kcalloc(cmax, sizeof(*kp), GFP_KERNEL) :
+               kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
                NULL;
 
        rcu_read_lock();
@@ -1336,7 +1336,6 @@ static void ipip6_dev_free(struct net_device *dev)
 
        dst_cache_destroy(&tunnel->dst_cache);
        free_percpu(dev->tstats);
-       free_netdev(dev);
 }
 
 #define SIT_FEATURES (NETIF_F_SG          | \
@@ -1351,7 +1350,8 @@ static void ipip6_tunnel_setup(struct net_device *dev)
        int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 
        dev->netdev_ops         = &ipip6_netdev_ops;
-       dev->destructor         = ipip6_dev_free;
+       dev->needs_free_netdev  = true;
+       dev->priv_destructor    = ipip6_dev_free;
 
        dev->type               = ARPHRD_SIT;
        dev->hard_header_len    = LL_MAX_HEADER + t_hlen;
index 06ec39b..75703fd 100644 (file)
@@ -879,7 +879,8 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
        struct sock *sk;
 
        udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
-               if (INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
+               if (sk->sk_state == TCP_ESTABLISHED &&
+                   INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
                        return sk;
                /* Only check first socket in chain */
                break;
index 08a807b..3ef5d91 100644 (file)
@@ -43,8 +43,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
                return 1;
 #endif
 
-       ipv6_hdr(skb)->payload_len = htons(skb->len);
        __skb_push(skb, skb->data - skb_network_header(skb));
+       ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 
        if (xo && (xo->flags & XFRM_GRO)) {
                skb_mac_header_rebuild(skb);
index 74d09f9..3be8528 100644 (file)
@@ -65,7 +65,7 @@ static void irlan_eth_setup(struct net_device *dev)
        ether_setup(dev);
 
        dev->netdev_ops         = &irlan_eth_netdev_ops;
-       dev->destructor         = free_netdev;
+       dev->needs_free_netdev  = true;
        dev->min_mtu            = 0;
        dev->max_mtu            = ETH_MAX_MTU;
 
index 512dc43..b1432b6 100644 (file)
@@ -1157,6 +1157,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
                        goto out;
        }
 
+       err = -ENOBUFS;
        key = ext_hdrs[SADB_EXT_KEY_AUTH - 1];
        if (sa->sadb_sa_auth) {
                int keysize = 0;
@@ -1168,8 +1169,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
                if (key)
                        keysize = (key->sadb_key_bits + 7) / 8;
                x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL);
-               if (!x->aalg)
+               if (!x->aalg) {
+                       err = -ENOMEM;
                        goto out;
+               }
                strcpy(x->aalg->alg_name, a->name);
                x->aalg->alg_key_len = 0;
                if (key) {
@@ -1188,8 +1191,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
                                goto out;
                        }
                        x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL);
-                       if (!x->calg)
+                       if (!x->calg) {
+                               err = -ENOMEM;
                                goto out;
+                       }
                        strcpy(x->calg->alg_name, a->name);
                        x->props.calgo = sa->sadb_sa_encrypt;
                } else {
@@ -1203,8 +1208,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
                        if (key)
                                keysize = (key->sadb_key_bits + 7) / 8;
                        x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL);
-                       if (!x->ealg)
+                       if (!x->ealg) {
+                               err = -ENOMEM;
                                goto out;
+                       }
                        strcpy(x->ealg->alg_name, a->name);
                        x->ealg->alg_key_len = 0;
                        if (key) {
@@ -1249,8 +1256,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
                struct xfrm_encap_tmpl *natt;
 
                x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
-               if (!x->encap)
+               if (!x->encap) {
+                       err = -ENOMEM;
                        goto out;
+               }
 
                natt = x->encap;
                n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1];
@@ -2755,6 +2764,8 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
        int err, err2;
 
        err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
+       if (!err)
+               xfrm_garbage_collect(net);
        err2 = unicast_flush_resp(sk, hdr);
        if (err || err2) {
                if (err == -ESRCH) /* empty table - old silent behavior */
index 8b21af7..4de2ec9 100644 (file)
@@ -114,12 +114,13 @@ static void l2tp_eth_get_stats64(struct net_device *dev,
 {
        struct l2tp_eth *priv = netdev_priv(dev);
 
-       stats->tx_bytes   = atomic_long_read(&priv->tx_bytes);
-       stats->tx_packets = atomic_long_read(&priv->tx_packets);
-       stats->tx_dropped = atomic_long_read(&priv->tx_dropped);
-       stats->rx_bytes   = atomic_long_read(&priv->rx_bytes);
-       stats->rx_packets = atomic_long_read(&priv->rx_packets);
-       stats->rx_errors  = atomic_long_read(&priv->rx_errors);
+       stats->tx_bytes   = (unsigned long) atomic_long_read(&priv->tx_bytes);
+       stats->tx_packets = (unsigned long) atomic_long_read(&priv->tx_packets);
+       stats->tx_dropped = (unsigned long) atomic_long_read(&priv->tx_dropped);
+       stats->rx_bytes   = (unsigned long) atomic_long_read(&priv->rx_bytes);
+       stats->rx_packets = (unsigned long) atomic_long_read(&priv->rx_packets);
+       stats->rx_errors  = (unsigned long) atomic_long_read(&priv->rx_errors);
+
 }
 
 static const struct net_device_ops l2tp_eth_netdev_ops = {
@@ -141,7 +142,7 @@ static void l2tp_eth_dev_setup(struct net_device *dev)
        dev->priv_flags         &= ~IFF_TX_SKB_SHARING;
        dev->features           |= NETIF_F_LLTX;
        dev->netdev_ops         = &l2tp_eth_netdev_ops;
-       dev->destructor         = free_netdev;
+       dev->needs_free_netdev  = true;
 }
 
 static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
index 6c2e606..4a388fe 100644 (file)
@@ -902,6 +902,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
        default:
                return -EINVAL;
        }
+       sdata->u.ap.req_smps = sdata->smps_mode;
+
        sdata->needed_rx_chains = sdata->local->rx_chains;
 
        sdata->vif.bss_conf.beacon_int = params->beacon_interval;
index 665501a..5e002f6 100644 (file)
@@ -1531,7 +1531,7 @@ ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status)
                return true;
        /* can't handle non-legacy preamble yet */
        if (status->flag & RX_FLAG_MACTIME_PLCP_START &&
-           status->encoding != RX_ENC_LEGACY)
+           status->encoding == RX_ENC_LEGACY)
                return true;
        return false;
 }
index 8fae1a7..f5f5015 100644 (file)
@@ -1213,7 +1213,6 @@ static const struct net_device_ops ieee80211_monitorif_ops = {
 static void ieee80211_if_free(struct net_device *dev)
 {
        free_percpu(dev->tstats);
-       free_netdev(dev);
 }
 
 static void ieee80211_if_setup(struct net_device *dev)
@@ -1221,7 +1220,8 @@ static void ieee80211_if_setup(struct net_device *dev)
        ether_setup(dev);
        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
        dev->netdev_ops = &ieee80211_dataif_ops;
-       dev->destructor = ieee80211_if_free;
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = ieee80211_if_free;
 }
 
 static void ieee80211_if_setup_no_queue(struct net_device *dev)
@@ -1816,6 +1816,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
                ret = dev_alloc_name(ndev, ndev->name);
                if (ret < 0) {
                        ieee80211_if_free(ndev);
+                       free_netdev(ndev);
                        return ret;
                }
 
@@ -1905,7 +1906,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
                ret = register_netdevice(ndev);
                if (ret) {
-                       ieee80211_if_free(ndev);
+                       free_netdev(ndev);
                        return ret;
                }
        }
index 0ea9712..cc8e6ea 100644 (file)
@@ -601,7 +601,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
        struct ieee80211_supported_band *sband;
        struct ieee80211_chanctx_conf *chanctx_conf;
        struct ieee80211_channel *chan;
-       u32 rate_flags, rates = 0;
+       u32 rates = 0;
 
        sdata_assert_lock(sdata);
 
@@ -612,7 +612,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
                return;
        }
        chan = chanctx_conf->def.chan;
-       rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
        rcu_read_unlock();
        sband = local->hw.wiphy->bands[chan->band];
        shift = ieee80211_vif_get_shift(&sdata->vif);
@@ -636,9 +635,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
                 */
                rates_len = 0;
                for (i = 0; i < sband->n_bitrates; i++) {
-                       if ((rate_flags & sband->bitrates[i].flags)
-                           != rate_flags)
-                               continue;
                        rates |= BIT(i);
                        rates_len++;
                }
@@ -2818,7 +2814,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
                                u32 *rates, u32 *basic_rates,
                                bool *have_higher_than_11mbit,
                                int *min_rate, int *min_rate_index,
-                               int shift, u32 rate_flags)
+                               int shift)
 {
        int i, j;
 
@@ -2846,8 +2842,6 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
                        int brate;
 
                        br = &sband->bitrates[j];
-                       if ((rate_flags & br->flags) != rate_flags)
-                               continue;
 
                        brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5);
                        if (brate == rate) {
@@ -4398,40 +4392,32 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
                        return -ENOMEM;
        }
 
-       if (new_sta || override) {
-               err = ieee80211_prep_channel(sdata, cbss);
-               if (err) {
-                       if (new_sta)
-                               sta_info_free(local, new_sta);
-                       return -EINVAL;
-               }
-       }
-
+       /*
+        * Set up the information for the new channel before setting the
+        * new channel. We can't - completely race-free - change the basic
+        * rates bitmap and the channel (sband) that it refers to, but if
+        * we set it up before we at least avoid calling into the driver's
+        * bss_info_changed() method with invalid information (since we do
+        * call that from changing the channel - only for IDLE and perhaps
+        * some others, but ...).
+        *
+        * So to avoid that, just set up all the new information before the
+        * channel, but tell the driver to apply it only afterwards, since
+        * it might need the new channel for that.
+        */
        if (new_sta) {
                u32 rates = 0, basic_rates = 0;
                bool have_higher_than_11mbit;
                int min_rate = INT_MAX, min_rate_index = -1;
-               struct ieee80211_chanctx_conf *chanctx_conf;
                const struct cfg80211_bss_ies *ies;
                int shift = ieee80211_vif_get_shift(&sdata->vif);
-               u32 rate_flags;
-
-               rcu_read_lock();
-               chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
-               if (WARN_ON(!chanctx_conf)) {
-                       rcu_read_unlock();
-                       sta_info_free(local, new_sta);
-                       return -EINVAL;
-               }
-               rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
-               rcu_read_unlock();
 
                ieee80211_get_rates(sband, bss->supp_rates,
                                    bss->supp_rates_len,
                                    &rates, &basic_rates,
                                    &have_higher_than_11mbit,
                                    &min_rate, &min_rate_index,
-                                   shift, rate_flags);
+                                   shift);
 
                /*
                 * This used to be a workaround for basic rates missing
@@ -4489,8 +4475,22 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
                        sdata->vif.bss_conf.sync_dtim_count = 0;
                }
                rcu_read_unlock();
+       }
 
-               /* tell driver about BSSID, basic rates and timing */
+       if (new_sta || override) {
+               err = ieee80211_prep_channel(sdata, cbss);
+               if (err) {
+                       if (new_sta)
+                               sta_info_free(local, new_sta);
+                       return -EINVAL;
+               }
+       }
+
+       if (new_sta) {
+               /*
+                * tell driver about BSSID, basic rates and timing
+                * this was set up above, before setting the channel
+                */
                ieee80211_bss_info_change_notify(sdata,
                        BSS_CHANGED_BSSID | BSS_CHANGED_BASIC_RATES |
                        BSS_CHANGED_BEACON_INT);
index 1f75280..3674fe3 100644 (file)
@@ -1613,12 +1613,16 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
         */
        if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) &&
            !ieee80211_has_morefrags(hdr->frame_control) &&
+           !ieee80211_is_back_req(hdr->frame_control) &&
            !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) &&
            (rx->sdata->vif.type == NL80211_IFTYPE_AP ||
             rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
-           /* PM bit is only checked in frames where it isn't reserved,
+           /*
+            * PM bit is only checked in frames where it isn't reserved,
             * in AP mode it's reserved in non-bufferable management frames
             * (cf. IEEE 802.11-2012 8.2.4.1.7 Power Management field)
+            * BAR frames should be ignored as specified in
+            * IEEE 802.11-2012 10.2.1.2.
             */
            (!ieee80211_is_mgmt(hdr->frame_control) ||
             ieee80211_is_bufferable_mmpdu(hdr->frame_control))) {
index c1ef22d..cc19614 100644 (file)
@@ -17,6 +17,7 @@
 #include <asm/unaligned.h>
 #include <net/mac80211.h>
 #include <crypto/aes.h>
+#include <crypto/algapi.h>
 
 #include "ieee80211_i.h"
 #include "michael.h"
@@ -153,7 +154,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
        data_len = skb->len - hdrlen - MICHAEL_MIC_LEN;
        key = &rx->key->conf.key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY];
        michael_mic(key, hdr, data, data_len, mic);
-       if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0)
+       if (crypto_memneq(mic, data + data_len, MICHAEL_MIC_LEN))
                goto mic_fail;
 
        /* remove Michael MIC from payload */
@@ -1048,7 +1049,7 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
                bip_aad(skb, aad);
                ieee80211_aes_cmac(key->u.aes_cmac.tfm, aad,
                                   skb->data + 24, skb->len - 24, mic);
-               if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+               if (crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
                        key->u.aes_cmac.icverrors++;
                        return RX_DROP_UNUSABLE;
                }
@@ -1098,7 +1099,7 @@ ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx)
                bip_aad(skb, aad);
                ieee80211_aes_cmac_256(key->u.aes_cmac.tfm, aad,
                                       skb->data + 24, skb->len - 24, mic);
-               if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+               if (crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
                        key->u.aes_cmac.icverrors++;
                        return RX_DROP_UNUSABLE;
                }
@@ -1202,7 +1203,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx)
                if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce,
                                       skb->data + 24, skb->len - 24,
                                       mic) < 0 ||
-                   memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+                   crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
                        key->u.aes_gmac.icverrors++;
                        return RX_DROP_UNUSABLE;
                }
index 06019db..bd88a9b 100644 (file)
@@ -526,8 +526,6 @@ static void mac802154_wpan_free(struct net_device *dev)
        struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
 
        mac802154_llsec_destroy(&sdata->sec);
-
-       free_netdev(dev);
 }
 
 static void ieee802154_if_setup(struct net_device *dev)
@@ -593,7 +591,8 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
                                        sdata->dev->dev_addr);
 
                sdata->dev->header_ops = &mac802154_header_ops;
-               sdata->dev->destructor = mac802154_wpan_free;
+               sdata->dev->needs_free_netdev = true;
+               sdata->dev->priv_destructor = mac802154_wpan_free;
                sdata->dev->netdev_ops = &mac802154_wpan_ops;
                sdata->dev->ml_priv = &mac802154_mlme_wpan;
                wpan_dev->promiscuous_mode = false;
@@ -608,7 +607,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
 
                break;
        case NL802154_IFTYPE_MONITOR:
-               sdata->dev->destructor = free_netdev;
+               sdata->dev->needs_free_netdev = true;
                sdata->dev->netdev_ops = &mac802154_monitor_ops;
                wpan_dev->promiscuous_mode = true;
                break;
index 89193a6..04a3128 100644 (file)
@@ -94,7 +94,6 @@ static void internal_dev_destructor(struct net_device *dev)
        struct vport *vport = ovs_internal_dev_get_vport(dev);
 
        ovs_vport_free(vport);
-       free_netdev(dev);
 }
 
 static void
@@ -156,7 +155,8 @@ static void do_setup(struct net_device *netdev)
        netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
        netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
                              IFF_PHONY_HEADROOM | IFF_NO_QUEUE;
-       netdev->destructor = internal_dev_destructor;
+       netdev->needs_free_netdev = true;
+       netdev->priv_destructor = internal_dev_destructor;
        netdev->ethtool_ops = &internal_dev_ethtool_ops;
        netdev->rtnl_link_ops = &internal_dev_link_ops;
 
index 21c28b5..2c93379 100644 (file)
@@ -236,7 +236,7 @@ static void gprs_setup(struct net_device *dev)
        dev->tx_queue_len       = 10;
 
        dev->netdev_ops         = &gprs_netdev_ops;
-       dev->destructor         = free_netdev;
+       dev->needs_free_netdev  = true;
 }
 
 /*
index 0a4e284..5436922 100644 (file)
@@ -217,7 +217,7 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ,
                                       unsigned int *_toklen)
 {
        const __be32 *xdr = *_xdr;
-       unsigned int toklen = *_toklen, n_parts, loop, tmp;
+       unsigned int toklen = *_toklen, n_parts, loop, tmp, paddedlen;
 
        /* there must be at least one name, and at least #names+1 length
         * words */
@@ -247,16 +247,16 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ,
                toklen -= 4;
                if (tmp <= 0 || tmp > AFSTOKEN_STRING_MAX)
                        return -EINVAL;
-               if (tmp > toklen)
+               paddedlen = (tmp + 3) & ~3;
+               if (paddedlen > toklen)
                        return -EINVAL;
                princ->name_parts[loop] = kmalloc(tmp + 1, GFP_KERNEL);
                if (!princ->name_parts[loop])
                        return -ENOMEM;
                memcpy(princ->name_parts[loop], xdr, tmp);
                princ->name_parts[loop][tmp] = 0;
-               tmp = (tmp + 3) & ~3;
-               toklen -= tmp;
-               xdr += tmp >> 2;
+               toklen -= paddedlen;
+               xdr += paddedlen >> 2;
        }
 
        if (toklen < 4)
@@ -265,16 +265,16 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ,
        toklen -= 4;
        if (tmp <= 0 || tmp > AFSTOKEN_K5_REALM_MAX)
                return -EINVAL;
-       if (tmp > toklen)
+       paddedlen = (tmp + 3) & ~3;
+       if (paddedlen > toklen)
                return -EINVAL;
        princ->realm = kmalloc(tmp + 1, GFP_KERNEL);
        if (!princ->realm)
                return -ENOMEM;
        memcpy(princ->realm, xdr, tmp);
        princ->realm[tmp] = 0;
-       tmp = (tmp + 3) & ~3;
-       toklen -= tmp;
-       xdr += tmp >> 2;
+       toklen -= paddedlen;
+       xdr += paddedlen >> 2;
 
        _debug("%s/...@%s", princ->name_parts[0], princ->realm);
 
@@ -293,7 +293,7 @@ static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td,
                                         unsigned int *_toklen)
 {
        const __be32 *xdr = *_xdr;
-       unsigned int toklen = *_toklen, len;
+       unsigned int toklen = *_toklen, len, paddedlen;
 
        /* there must be at least one tag and one length word */
        if (toklen <= 8)
@@ -307,15 +307,17 @@ static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td,
        toklen -= 8;
        if (len > max_data_size)
                return -EINVAL;
+       paddedlen = (len + 3) & ~3;
+       if (paddedlen > toklen)
+               return -EINVAL;
        td->data_len = len;
 
        if (len > 0) {
                td->data = kmemdup(xdr, len, GFP_KERNEL);
                if (!td->data)
                        return -ENOMEM;
-               len = (len + 3) & ~3;
-               toklen -= len;
-               xdr += len >> 2;
+               toklen -= paddedlen;
+               xdr += paddedlen >> 2;
        }
 
        _debug("tag %x len %x", td->tag, td->data_len);
@@ -387,7 +389,7 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen,
                                    const __be32 **_xdr, unsigned int *_toklen)
 {
        const __be32 *xdr = *_xdr;
-       unsigned int toklen = *_toklen, len;
+       unsigned int toklen = *_toklen, len, paddedlen;
 
        /* there must be at least one length word */
        if (toklen <= 4)
@@ -399,6 +401,9 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen,
        toklen -= 4;
        if (len > AFSTOKEN_K5_TIX_MAX)
                return -EINVAL;
+       paddedlen = (len + 3) & ~3;
+       if (paddedlen > toklen)
+               return -EINVAL;
        *_tktlen = len;
 
        _debug("ticket len %u", len);
@@ -407,9 +412,8 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen,
                *_ticket = kmemdup(xdr, len, GFP_KERNEL);
                if (!*_ticket)
                        return -ENOMEM;
-               len = (len + 3) & ~3;
-               toklen -= len;
-               xdr += len >> 2;
+               toklen -= paddedlen;
+               xdr += paddedlen >> 2;
        }
 
        *_xdr = xdr;
@@ -552,7 +556,7 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep)
 {
        const __be32 *xdr = prep->data, *token;
        const char *cp;
-       unsigned int len, tmp, loop, ntoken, toklen, sec_ix;
+       unsigned int len, paddedlen, loop, ntoken, toklen, sec_ix;
        size_t datalen = prep->datalen;
        int ret;
 
@@ -578,22 +582,21 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep)
        if (len < 1 || len > AFSTOKEN_CELL_MAX)
                goto not_xdr;
        datalen -= 4;
-       tmp = (len + 3) & ~3;
-       if (tmp > datalen)
+       paddedlen = (len + 3) & ~3;
+       if (paddedlen > datalen)
                goto not_xdr;
 
        cp = (const char *) xdr;
        for (loop = 0; loop < len; loop++)
                if (!isprint(cp[loop]))
                        goto not_xdr;
-       if (len < tmp)
-               for (; loop < tmp; loop++)
-                       if (cp[loop])
-                               goto not_xdr;
+       for (; loop < paddedlen; loop++)
+               if (cp[loop])
+                       goto not_xdr;
        _debug("cellname: [%u/%u] '%*.*s'",
-              len, tmp, len, len, (const char *) xdr);
-       datalen -= tmp;
-       xdr += tmp >> 2;
+              len, paddedlen, len, len, (const char *) xdr);
+       datalen -= paddedlen;
+       xdr += paddedlen >> 2;
 
        /* get the token count */
        if (datalen < 12)
@@ -614,10 +617,11 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep)
                sec_ix = ntohl(*xdr);
                datalen -= 4;
                _debug("token: [%x/%zx] %x", toklen, datalen, sec_ix);
-               if (toklen < 20 || toklen > datalen)
+               paddedlen = (toklen + 3) & ~3;
+               if (toklen < 20 || toklen > datalen || paddedlen > datalen)
                        goto not_xdr;
-               datalen -= (toklen + 3) & ~3;
-               xdr += (toklen + 3) >> 2;
+               datalen -= paddedlen;
+               xdr += paddedlen >> 2;
 
        } while (--loop > 0);
 
index 164b5ac..7dc5892 100644 (file)
@@ -94,8 +94,10 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla,
                k++;
        }
 
-       if (n)
+       if (n) {
+               err = -EINVAL;
                goto err_out;
+       }
 
        return keys_ex;
 
index f42008b..b062bc8 100644 (file)
@@ -132,21 +132,21 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
                }
        }
 
-       spin_lock_bh(&police->tcf_lock);
        if (est) {
                err = gen_replace_estimator(&police->tcf_bstats, NULL,
                                            &police->tcf_rate_est,
                                            &police->tcf_lock,
                                            NULL, est);
                if (err)
-                       goto failure_unlock;
+                       goto failure;
        } else if (tb[TCA_POLICE_AVRATE] &&
                   (ret == ACT_P_CREATED ||
                    !gen_estimator_active(&police->tcf_rate_est))) {
                err = -EINVAL;
-               goto failure_unlock;
+               goto failure;
        }
 
+       spin_lock_bh(&police->tcf_lock);
        /* No failure allowed after this point */
        police->tcfp_mtu = parm->mtu;
        if (police->tcfp_mtu == 0) {
@@ -192,8 +192,6 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 
        return ret;
 
-failure_unlock:
-       spin_unlock_bh(&police->tcf_lock);
 failure:
        qdisc_put_rtab(P_tab);
        qdisc_put_rtab(R_tab);
index e88342f..cfdbfa1 100644 (file)
@@ -1019,7 +1019,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
                return sch;
        }
        /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
-       ops->destroy(sch);
+       if (ops->destroy)
+               ops->destroy(sch);
 err_out3:
        dev_put(dev);
        kfree((char *) sch - sch->padded);
index 8c58923..3dcd0ec 100644 (file)
@@ -275,6 +275,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
                if (sctp_sk(sk)->bind_hash)
                        sctp_put_port(sk);
 
+               sctp_sk(sk)->ep = NULL;
                sock_put(sk);
        }
 
index 048954e..9a64721 100644 (file)
@@ -278,7 +278,6 @@ out:
 
 static int sctp_sock_dump(struct sock *sk, void *p)
 {
-       struct sctp_endpoint *ep = sctp_sk(sk)->ep;
        struct sctp_comm_param *commp = p;
        struct sk_buff *skb = commp->skb;
        struct netlink_callback *cb = commp->cb;
@@ -287,7 +286,9 @@ static int sctp_sock_dump(struct sock *sk, void *p)
        int err = 0;
 
        lock_sock(sk);
-       list_for_each_entry(assoc, &ep->asocs, asocs) {
+       if (!sctp_sk(sk)->ep)
+               goto release;
+       list_for_each_entry(assoc, &sctp_sk(sk)->ep->asocs, asocs) {
                if (cb->args[4] < cb->args[1])
                        goto next;
 
index f16c8d9..3a8318e 100644 (file)
@@ -4622,13 +4622,13 @@ int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *),
 
        for (head = sctp_ep_hashtable; hash < sctp_ep_hashsize;
             hash++, head++) {
-               read_lock(&head->lock);
+               read_lock_bh(&head->lock);
                sctp_for_each_hentry(epb, &head->chain) {
                        err = cb(sctp_ep(epb), p);
                        if (err)
                                break;
                }
-               read_unlock(&head->lock);
+               read_unlock_bh(&head->lock);
        }
 
        return err;
@@ -4666,9 +4666,8 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
        if (err)
                return err;
 
-       sctp_transport_get_idx(net, &hti, pos);
-       obj = sctp_transport_get_next(net, &hti);
-       for (; obj && !IS_ERR(obj); obj = sctp_transport_get_next(net, &hti)) {
+       obj = sctp_transport_get_idx(net, &hti, pos + 1);
+       for (; !IS_ERR_OR_NULL(obj); obj = sctp_transport_get_next(net, &hti)) {
                struct sctp_transport *transport = obj;
 
                if (!sctp_transport_hold(transport))
index 312ef7d..ab30876 100644 (file)
@@ -508,7 +508,7 @@ bool tipc_msg_reverse(u32 own_node,  struct sk_buff **skb, int err)
        }
 
        if (skb_cloned(_skb) &&
-           pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_KERNEL))
+           pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC))
                goto exit;
 
        /* Now reverse the concerned fields */
index 6a7fe76..1a0c961 100644 (file)
@@ -999,7 +999,8 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        struct path path = { };
 
        err = -EINVAL;
-       if (sunaddr->sun_family != AF_UNIX)
+       if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
+           sunaddr->sun_family != AF_UNIX)
                goto out;
 
        if (addr_len == sizeof(short)) {
@@ -1110,6 +1111,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
        unsigned int hash;
        int err;
 
+       err = -EINVAL;
+       if (alen < offsetofend(struct sockaddr, sa_family))
+               goto out;
+
        if (addr->sa_family != AF_UNSPEC) {
                err = unix_mkname(sunaddr, alen, &hash);
                if (err < 0)
index 1a4db67..6cdb054 100644 (file)
@@ -914,13 +914,12 @@ int call_commit_handler(struct net_device *dev)
  * Main IOCTl dispatcher.
  * Check the type of IOCTL and call the appropriate wrapper...
  */
-static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
+static int wireless_process_ioctl(struct net *net, struct iwreq *iwr,
                                  unsigned int cmd,
                                  struct iw_request_info *info,
                                  wext_ioctl_func standard,
                                  wext_ioctl_func private)
 {
-       struct iwreq *iwr = (struct iwreq *) ifr;
        struct net_device *dev;
        iw_handler      handler;
 
@@ -928,7 +927,7 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
         * The copy_to/from_user() of ifr is also dealt with in there */
 
        /* Make sure the device exist */
-       if ((dev = __dev_get_by_name(net, ifr->ifr_name)) == NULL)
+       if ((dev = __dev_get_by_name(net, iwr->ifr_name)) == NULL)
                return -ENODEV;
 
        /* A bunch of special cases, then the generic case...
@@ -957,9 +956,6 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
                else if (private)
                        return private(dev, iwr, cmd, info, handler);
        }
-       /* Old driver API : call driver ioctl handler */
-       if (dev->netdev_ops->ndo_do_ioctl)
-               return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
        return -EOPNOTSUPP;
 }
 
@@ -977,7 +973,7 @@ static int wext_permission_check(unsigned int cmd)
 }
 
 /* entry point from dev ioctl */
-static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr,
+static int wext_ioctl_dispatch(struct net *net, struct iwreq *iwr,
                               unsigned int cmd, struct iw_request_info *info,
                               wext_ioctl_func standard,
                               wext_ioctl_func private)
@@ -987,9 +983,9 @@ static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr,
        if (ret)
                return ret;
 
-       dev_load(net, ifr->ifr_name);
+       dev_load(net, iwr->ifr_name);
        rtnl_lock();
-       ret = wireless_process_ioctl(net, ifr, cmd, info, standard, private);
+       ret = wireless_process_ioctl(net, iwr, cmd, info, standard, private);
        rtnl_unlock();
 
        return ret;
@@ -1039,18 +1035,18 @@ static int ioctl_standard_call(struct net_device *      dev,
 }
 
 
-int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
                      void __user *arg)
 {
        struct iw_request_info info = { .cmd = cmd, .flags = 0 };
        int ret;
 
-       ret = wext_ioctl_dispatch(net, ifr, cmd, &info,
+       ret = wext_ioctl_dispatch(net, iwr, cmd, &info,
                                  ioctl_standard_call,
                                  ioctl_private_call);
        if (ret >= 0 &&
            IW_IS_GET(cmd) &&
-           copy_to_user(arg, ifr, sizeof(struct iwreq)))
+           copy_to_user(arg, iwr, sizeof(struct iwreq)))
                return -EFAULT;
 
        return ret;
@@ -1107,7 +1103,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
        info.cmd = cmd;
        info.flags = IW_REQUEST_FLAG_COMPAT;
 
-       ret = wext_ioctl_dispatch(net, (struct ifreq *) &iwr, cmd, &info,
+       ret = wext_ioctl_dispatch(net, &iwr, cmd, &info,
                                  compat_standard_call,
                                  compat_private_call);
 
index abf81b3..55b2ac3 100644 (file)
@@ -4,8 +4,7 @@
 
 obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
                      xfrm_input.o xfrm_output.o \
-                     xfrm_sysctl.o xfrm_replay.o
-obj-$(CONFIG_XFRM_OFFLOAD) += xfrm_device.o
+                     xfrm_sysctl.o xfrm_replay.o xfrm_device.o
 obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
 obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o
index 574e6f3..5aba036 100644 (file)
@@ -22,6 +22,7 @@
 #include <net/xfrm.h>
 #include <linux/notifier.h>
 
+#ifdef CONFIG_XFRM_OFFLOAD
 int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
 {
        int err;
@@ -137,6 +138,7 @@ ok:
        return true;
 }
 EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok);
+#endif
 
 int xfrm_dev_register(struct net_device *dev)
 {
index ed4e52d..643a18f 100644 (file)
@@ -1006,10 +1006,6 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
                err = -ESRCH;
 out:
        spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
-
-       if (cnt)
-               xfrm_garbage_collect(net);
-
        return err;
 }
 EXPORT_SYMBOL(xfrm_policy_flush);
index 38614df..86116e9 100644 (file)
@@ -2027,6 +2027,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
                        return 0;
                return err;
        }
+       xfrm_garbage_collect(net);
 
        c.data.type = type;
        c.event = nlh->nlmsg_type;
index ce753a4..c583a1e 100644 (file)
@@ -14,7 +14,15 @@ __headers:
 include scripts/Kbuild.include
 
 srcdir        := $(srctree)/$(obj)
-subdirs       := $(patsubst $(srcdir)/%/.,%,$(wildcard $(srcdir)/*/.))
+
+# When make is run under a fakechroot environment, the function
+# $(wildcard $(srcdir)/*/.) doesn't only return directories, but also regular
+# files. So, we are using a combination of sort/dir/wildcard which works
+# with fakechroot.
+subdirs       := $(patsubst $(srcdir)/%/,%,\
+                $(filter-out $(srcdir)/,\
+                $(sort $(dir $(wildcard $(srcdir)/*/)))))
+
 # caller may set destination dir (when installing to asm/)
 _dst          := $(if $(dst),$(dst),$(obj))
 
index 3bffdca..b724a02 100644 (file)
@@ -75,7 +75,7 @@ struct string_list *copy_list_range(struct string_list *start,
 int yylex(void);
 int yyparse(void);
 
-void error_with_pos(const char *, ...);
+void error_with_pos(const char *, ...) __attribute__ ((format(printf, 1, 2)));
 
 /*----------------------------------------------------------------------*/
 #define xmalloc(size) ({ void *__ptr = malloc(size);           \
index 90a091b..eb81446 100644 (file)
@@ -196,7 +196,7 @@ clean-files     += config.pot linux.pot
 
 # Check that we have the required ncurses stuff installed for lxdialog (menuconfig)
 PHONY += $(obj)/dochecklxdialog
-$(addprefix $(obj)/,$(lxdialog)): $(obj)/dochecklxdialog
+$(addprefix $(obj)/, mconf.o $(lxdialog)): $(obj)/dochecklxdialog
 $(obj)/dochecklxdialog:
        $(Q)$(CONFIG_SHELL) $(check-lxdialog) -check $(HOSTCC) $(HOST_EXTRACFLAGS) $(HOSTLOADLIBES_mconf)
 
index a9bc533..0031147 100644 (file)
@@ -271,7 +271,7 @@ static struct mitem k_menu_items[MAX_MENU_ITEMS];
 static int items_num;
 static int global_exit;
 /* the currently selected button */
-const char *current_instructions = menu_instructions;
+static const char *current_instructions = menu_instructions;
 
 static char *dialog_input_result;
 static int dialog_input_result_len;
@@ -305,7 +305,7 @@ struct function_keys {
 };
 
 static const int function_keys_num = 9;
-struct function_keys function_keys[] = {
+static struct function_keys function_keys[] = {
        {
                .key_str = "F1",
                .func = "Help",
@@ -508,7 +508,7 @@ static int get_mext_match(const char *match_str, match_f flag)
        index = (index + items_num) % items_num;
        while (true) {
                char *str = k_menu_items[index].str;
-               if (strcasestr(str, match_str) != 0)
+               if (strcasestr(str, match_str) != NULL)
                        return index;
                if (flag == FIND_NEXT_MATCH_UP ||
                    flag == MATCH_TINKER_PATTERN_UP)
@@ -1067,7 +1067,7 @@ static int do_match(int key, struct match_state *state, int *ans)
 
 static void conf(struct menu *menu)
 {
-       struct menu *submenu = 0;
+       struct menu *submenu = NULL;
        const char *prompt = menu_get_prompt(menu);
        struct symbol *sym;
        int res;
@@ -1234,7 +1234,7 @@ static void show_help(struct menu *menu)
 static void conf_choice(struct menu *menu)
 {
        const char *prompt = _(menu_get_prompt(menu));
-       struct menu *child = 0;
+       struct menu *child = NULL;
        struct symbol *active;
        int selected_index = 0;
        int last_top_row = 0;
@@ -1456,7 +1456,7 @@ static void conf_save(void)
        }
 }
 
-void setup_windows(void)
+static void setup_windows(void)
 {
        int lines, columns;
 
index 4b2f44c..a64b1c3 100644 (file)
@@ -129,7 +129,7 @@ static void no_colors_theme(void)
        mkattrn(FUNCTION_TEXT, A_REVERSE);
 }
 
-void set_colors()
+void set_colors(void)
 {
        start_color();
        use_default_colors();
@@ -192,7 +192,7 @@ const char *get_line(const char *text, int line_no)
        int lines = 0;
 
        if (!text)
-               return 0;
+               return NULL;
 
        for (i = 0; text[i] != '\0' && lines < line_no; i++)
                if (text[i] == '\n')
index d661f2f..d23dcbf 100755 (executable)
@@ -106,6 +106,7 @@ all_compiled_sources()
                case "$i" in
                        *.[cS])
                                j=${i/\.[cS]/\.o}
+                               j="${j#$tree}"
                                if [ -e $j ]; then
                                        echo $i
                                fi
index d7f282d..1d32cd2 100644 (file)
@@ -164,7 +164,7 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode,
        hmac_misc.mode = inode->i_mode;
        crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc));
        if (evm_hmac_attrs & EVM_ATTR_FSUUID)
-               crypto_shash_update(desc, inode->i_sb->s_uuid,
+               crypto_shash_update(desc, &inode->i_sb->s_uuid.b[0],
                                    sizeof(inode->i_sb->s_uuid));
        crypto_shash_final(desc, digest);
 }
index 3ab1067..6f885fa 100644 (file)
@@ -61,7 +61,7 @@ struct ima_rule_entry {
        enum ima_hooks func;
        int mask;
        unsigned long fsmagic;
-       u8 fsuuid[16];
+       uuid_t fsuuid;
        kuid_t uid;
        kuid_t fowner;
        bool (*uid_op)(kuid_t, kuid_t);    /* Handlers for operators       */
@@ -244,7 +244,7 @@ static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode,
            && rule->fsmagic != inode->i_sb->s_magic)
                return false;
        if ((rule->flags & IMA_FSUUID) &&
-           memcmp(rule->fsuuid, inode->i_sb->s_uuid, sizeof(rule->fsuuid)))
+           !uuid_equal(&rule->fsuuid, &inode->i_sb->s_uuid))
                return false;
        if ((rule->flags & IMA_UID) && !rule->uid_op(cred->uid, rule->uid))
                return false;
@@ -711,14 +711,12 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
                case Opt_fsuuid:
                        ima_log_string(ab, "fsuuid", args[0].from);
 
-                       if (memchr_inv(entry->fsuuid, 0x00,
-                                      sizeof(entry->fsuuid))) {
+                       if (uuid_is_null(&entry->fsuuid)) {
                                result = -EINVAL;
                                break;
                        }
 
-                       result = blk_part_pack_uuid(args[0].from,
-                                                   entry->fsuuid);
+                       result = uuid_parse(args[0].from, &entry->fsuuid);
                        if (!result)
                                entry->flags |= IMA_FSUUID;
                        break;
@@ -1087,7 +1085,7 @@ int ima_policy_show(struct seq_file *m, void *v)
        }
 
        if (entry->flags & IMA_FSUUID) {
-               seq_printf(m, "fsuuid=%pU", entry->fsuuid);
+               seq_printf(m, "fsuuid=%pU", &entry->fsuuid);
                seq_puts(m, " ");
        }
 
index e67a526..819fd68 100644 (file)
@@ -1106,10 +1106,8 @@ static int selinux_parse_opts_str(char *options,
 
        opts->mnt_opts_flags = kcalloc(NUM_SEL_MNT_OPTS, sizeof(int),
                                       GFP_KERNEL);
-       if (!opts->mnt_opts_flags) {
-               kfree(opts->mnt_opts);
+       if (!opts->mnt_opts_flags)
                goto out_err;
-       }
 
        if (fscontext) {
                opts->mnt_opts[num_mnt_opts] = fscontext;
@@ -1132,6 +1130,7 @@ static int selinux_parse_opts_str(char *options,
        return 0;
 
 out_err:
+       security_free_mnt_opts(opts);
        kfree(context);
        kfree(defcontext);
        kfree(fscontext);
index 5088d4b..009e6c9 100644 (file)
@@ -2492,7 +2492,7 @@ static int pcm_chmap_ctl_get(struct snd_kcontrol *kcontrol,
        struct snd_pcm_substream *substream;
        const struct snd_pcm_chmap_elem *map;
 
-       if (snd_BUG_ON(!info->chmap))
+       if (!info->chmap)
                return -EINVAL;
        substream = snd_pcm_chmap_substream(info, idx);
        if (!substream)
@@ -2524,7 +2524,7 @@ static int pcm_chmap_ctl_tlv(struct snd_kcontrol *kcontrol, int op_flag,
        unsigned int __user *dst;
        int c, count = 0;
 
-       if (snd_BUG_ON(!info->chmap))
+       if (!info->chmap)
                return -EINVAL;
        if (size < 8)
                return -ENOMEM;
index 9e6f54f..1e26854 100644 (file)
@@ -682,7 +682,9 @@ static void out_stream_callback(struct fw_iso_context *context, u32 tstamp,
                cycle = increment_cycle_count(cycle, 1);
                if (s->handle_packet(s, 0, cycle, i) < 0) {
                        s->packet_index = -1;
-                       amdtp_stream_pcm_abort(s);
+                       if (in_interrupt())
+                               amdtp_stream_pcm_abort(s);
+                       WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN);
                        return;
                }
        }
@@ -734,7 +736,9 @@ static void in_stream_callback(struct fw_iso_context *context, u32 tstamp,
        /* Queueing error or detecting invalid payload. */
        if (i < packets) {
                s->packet_index = -1;
-               amdtp_stream_pcm_abort(s);
+               if (in_interrupt())
+                       amdtp_stream_pcm_abort(s);
+               WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN);
                return;
        }
 
index 7e88317..ea1a91e 100644 (file)
@@ -135,7 +135,7 @@ struct amdtp_stream {
        /* For a PCM substream processing. */
        struct snd_pcm_substream *pcm;
        struct tasklet_struct period_tasklet;
-       unsigned int pcm_buffer_pointer;
+       snd_pcm_uframes_t pcm_buffer_pointer;
        unsigned int pcm_period_pointer;
 
        /* To wait for first packet. */
index d6fb2d5..60ce1cf 100644 (file)
@@ -295,6 +295,8 @@ struct hda_codec {
 
 #define list_for_each_codec(c, bus) \
        list_for_each_entry(c, &(bus)->core.codec_list, core.list)
+#define list_for_each_codec_safe(c, n, bus)                            \
+       list_for_each_entry_safe(c, n, &(bus)->core.codec_list, core.list)
 
 /* snd_hda_codec_read/write optional flags */
 #define HDA_RW_NO_RESPONSE_FALLBACK    (1 << 0)
index 3715a57..1c60beb 100644 (file)
@@ -1337,8 +1337,12 @@ EXPORT_SYMBOL_GPL(azx_probe_codecs);
 /* configure each codec instance */
 int azx_codec_configure(struct azx *chip)
 {
-       struct hda_codec *codec;
-       list_for_each_codec(codec, &chip->bus) {
+       struct hda_codec *codec, *next;
+
+       /* use _safe version here since snd_hda_codec_configure() deregisters
+        * the device upon error and deletes itself from the bus list.
+        */
+       list_for_each_codec_safe(codec, next, &chip->bus) {
                snd_hda_codec_configure(codec);
        }
        return 0;
index 2842c82..71545b5 100644 (file)
@@ -3174,6 +3174,7 @@ static int check_dyn_adc_switch(struct hda_codec *codec)
                                                spec->input_paths[i][nums]);
                                        spec->input_paths[i][nums] =
                                                spec->input_paths[i][n];
+                                       spec->input_paths[i][n] = 0;
                                }
                        }
                        nums++;
index 1770f08..01eb1dc 100644 (file)
@@ -370,10 +370,12 @@ enum {
 #define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71)
 #define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0)
 #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+#define IS_BXT_T(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x1a98)
 #define IS_GLK(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x3198)
-#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \
-                       IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci)  || \
-                       IS_GLK(pci)
+#define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348)
+#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci) || \
+                         IS_BXT_T(pci) || IS_KBL(pci) || IS_KBL_LP(pci) || \
+                         IS_KBL_H(pci) || IS_GLK(pci) || IS_CFL(pci))
 
 static char *driver_short_names[] = {
        [AZX_DRIVER_ICH] = "HDA Intel",
@@ -2378,6 +2380,9 @@ static const struct pci_device_id azx_ids[] = {
        /* Kabylake-H */
        { PCI_DEVICE(0x8086, 0xa2f0),
          .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
+       /* Coffelake */
+       { PCI_DEVICE(0x8086, 0xa348),
+         .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE},
        /* Broxton-P(Apollolake) */
        { PCI_DEVICE(0x8086, 0x5a98),
          .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
index e3f0667..e7d766d 100644 (file)
@@ -21,8 +21,9 @@
 #include "skl.h"
 
 /* Unique identification for getting NHLT blobs */
-static u8 OSC_UUID[16] = {0x6E, 0x88, 0x9F, 0xA6, 0xEB, 0x6C, 0x94, 0x45,
-                               0xA4, 0x1F, 0x7B, 0x5D, 0xCE, 0x24, 0xC5, 0x53};
+static guid_t osc_guid =
+       GUID_INIT(0xA69F886E, 0x6CEB, 0x4594,
+                 0xA4, 0x1F, 0x7B, 0x5D, 0xCE, 0x24, 0xC5, 0x53);
 
 struct nhlt_acpi_table *skl_nhlt_init(struct device *dev)
 {
@@ -37,7 +38,7 @@ struct nhlt_acpi_table *skl_nhlt_init(struct device *dev)
                return NULL;
        }
 
-       obj = acpi_evaluate_dsm(handle, OSC_UUID, 1, 1, NULL);
+       obj = acpi_evaluate_dsm(handle, &osc_guid, 1, 1, NULL);
        if (obj && obj->type == ACPI_TYPE_BUFFER) {
                nhlt_ptr = (struct nhlt_resource_desc  *)obj->buffer.pointer;
                nhlt_table = (struct nhlt_acpi_table *)
index d6cdece..6f2e198 100644 (file)
@@ -1,5 +1,6 @@
 objtool-y += arch/$(SRCARCH)/
 objtool-y += builtin-check.o
+objtool-y += check.o
 objtool-y += elf.o
 objtool-y += special.o
 objtool-y += objtool.o
index 55a60d3..17c1195 100644 (file)
@@ -127,28 +127,13 @@ b) 100% reliable stack traces for DWARF enabled kernels
 
 c) Higher live patching compatibility rate
 
-   (NOTE: This is not yet implemented)
-
-   Currently with CONFIG_LIVEPATCH there's a basic live patching
-   framework which is safe for roughly 85-90% of "security" fixes.  But
-   patches can't have complex features like function dependency or
-   prototype changes, or data structure changes.
-
-   There's a strong need to support patches which have the more complex
-   features so that the patch compatibility rate for security fixes can
-   eventually approach something resembling 100%.  To achieve that, a
-   "consistency model" is needed, which allows tasks to be safely
-   transitioned from an unpatched state to a patched state.
-
-   One of the key requirements of the currently proposed livepatch
-   consistency model [*] is that it needs to walk the stack of each
-   sleeping task to determine if it can be transitioned to the patched
-   state.  If objtool can ensure that stack traces are reliable, this
-   consistency model can be used and the live patching compatibility
-   rate can be improved significantly.
-
-   [*] https://lkml.kernel.org/r/cover.1423499826.git.jpoimboe@redhat.com
+   Livepatch has an optional "consistency model", which is needed for
+   more complex patches.  In order for the consistency model to work,
+   stack traces need to be reliable (or an unreliable condition needs to
+   be detectable).  Objtool makes that possible.
 
+   For more details, see the livepatch documentation in the Linux kernel
+   source tree at Documentation/livepatch/livepatch.txt.
 
 Rules
 -----
@@ -201,80 +186,84 @@ To achieve the validation, objtool enforces the following rules:
    return normally.
 
 
-Errors in .S files
-------------------
+Objtool warnings
+----------------
 
-If you're getting an error in a compiled .S file which you don't
-understand, first make sure that the affected code follows the above
-rules.
+For asm files, if you're getting an error which doesn't make sense,
+first make sure that the affected code follows the above rules.
+
+For C files, the common culprits are inline asm statements and calls to
+"noreturn" functions.  See below for more details.
+
+Another possible cause for errors in C code is if the Makefile removes
+-fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options.
 
 Here are some examples of common warnings reported by objtool, what
 they mean, and suggestions for how to fix them.
 
 
-1. asm_file.o: warning: objtool: func()+0x128: call without frame pointer save/setup
+1. file.o: warning: objtool: func()+0x128: call without frame pointer save/setup
 
    The func() function made a function call without first saving and/or
-   updating the frame pointer.
-
-   If func() is indeed a callable function, add proper frame pointer
-   logic using the FRAME_BEGIN and FRAME_END macros.  Otherwise, remove
-   its ELF function annotation by changing ENDPROC to END.
+   updating the frame pointer, and CONFIG_FRAME_POINTER is enabled.
 
-   If you're getting this error in a .c file, see the "Errors in .c
-   files" section.
+   If the error is for an asm file, and func() is indeed a callable
+   function, add proper frame pointer logic using the FRAME_BEGIN and
+   FRAME_END macros.  Otherwise, if it's not a callable function, remove
+   its ELF function annotation by changing ENDPROC to END, and instead
+   use the manual CFI hint macros in asm/undwarf.h.
 
+   If it's a GCC-compiled .c file, the error may be because the function
+   uses an inline asm() statement which has a "call" instruction.  An
+   asm() statement with a call instruction must declare the use of the
+   stack pointer in its output operand.  For example, on x86_64:
 
-2. asm_file.o: warning: objtool: .text+0x53: return instruction outside of a callable function
-
-   A return instruction was detected, but objtool couldn't find a way
-   for a callable function to reach the instruction.
+     register void *__sp asm("rsp");
+     asm volatile("call func" : "+r" (__sp));
 
-   If the return instruction is inside (or reachable from) a callable
-   function, the function needs to be annotated with the ENTRY/ENDPROC
-   macros.
+   Otherwise the stack frame may not get created before the call.
 
-   If you _really_ need a return instruction outside of a function, and
-   are 100% sure that it won't affect stack traces, you can tell
-   objtool to ignore it.  See the "Adding exceptions" section below.
 
+2. file.o: warning: objtool: .text+0x53: unreachable instruction
 
-3. asm_file.o: warning: objtool: func()+0x9: function has unreachable instruction
+   Objtool couldn't find a code path to reach the instruction.
 
-   The instruction lives inside of a callable function, but there's no
-   possible control flow path from the beginning of the function to the
-   instruction.
+   If the error is for an asm file, and the instruction is inside (or
+   reachable from) a callable function, the function should be annotated
+   with the ENTRY/ENDPROC macros (ENDPROC is the important one).
+   Otherwise, the code should probably be annotated with the CFI hint
+   macros in asm/undwarf.h so objtool and the unwinder can know the
+   stack state associated with the code.
 
-   If the instruction is actually needed, and it's actually in a
-   callable function, ensure that its function is properly annotated
-   with ENTRY/ENDPROC.
+   If you're 100% sure the code won't affect stack traces, or if you're
+   a just a bad person, you can tell objtool to ignore it.  See the
+   "Adding exceptions" section below.
 
    If it's not actually in a callable function (e.g. kernel entry code),
    change ENDPROC to END.
 
 
-4. asm_file.o: warning: objtool: func(): can't find starting instruction
+4. file.o: warning: objtool: func(): can't find starting instruction
    or
-   asm_file.o: warning: objtool: func()+0x11dd: can't decode instruction
+   file.o: warning: objtool: func()+0x11dd: can't decode instruction
 
-   Did you put data in a text section?  If so, that can confuse
+   Does the file have data in a text section?  If so, that can confuse
    objtool's instruction decoder.  Move the data to a more appropriate
    section like .data or .rodata.
 
 
-5. asm_file.o: warning: objtool: func()+0x6: kernel entry/exit from callable instruction
-
-   This is a kernel entry/exit instruction like sysenter or sysret.
-   Such instructions aren't allowed in a callable function, and are most
-   likely part of the kernel entry code.
+5. file.o: warning: objtool: func()+0x6: unsupported instruction in callable function
 
-   If the instruction isn't actually in a callable function, change
-   ENDPROC to END.
+   This is a kernel entry/exit instruction like sysenter or iret.  Such
+   instructions aren't allowed in a callable function, and are most
+   likely part of the kernel entry code.  They should usually not have
+   the callable function annotation (ENDPROC) and should always be
+   annotated with the CFI hint macros in asm/undwarf.h.
 
 
-6. asm_file.o: warning: objtool: func()+0x26: sibling call from callable instruction with changed frame pointer
+6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame
 
-   This is a dynamic jump or a jump to an undefined symbol.  Stacktool
+   This is a dynamic jump or a jump to an undefined symbol.  Objtool
    assumed it's a sibling call and detected that the frame pointer
    wasn't first restored to its original state.
 
@@ -282,24 +271,28 @@ they mean, and suggestions for how to fix them.
    destination code to the local file.
 
    If the instruction is not actually in a callable function (e.g.
-   kernel entry code), change ENDPROC to END.
+   kernel entry code), change ENDPROC to END and annotate manually with
+   the CFI hint macros in asm/undwarf.h.
 
 
-7. asm_file: warning: objtool: func()+0x5c: frame pointer state mismatch
+7. file: warning: objtool: func()+0x5c: stack state mismatch
 
    The instruction's frame pointer state is inconsistent, depending on
    which execution path was taken to reach the instruction.
 
-   Make sure the function pushes and sets up the frame pointer (for
-   x86_64, this means rbp) at the beginning of the function and pops it
-   at the end of the function.  Also make sure that no other code in the
-   function touches the frame pointer.
+   Make sure that, when CONFIG_FRAME_POINTER is enabled, the function
+   pushes and sets up the frame pointer (for x86_64, this means rbp) at
+   the beginning of the function and pops it at the end of the function.
+   Also make sure that no other code in the function touches the frame
+   pointer.
 
+   Another possibility is that the code has some asm or inline asm which
+   does some unusual things to the stack or the frame pointer.  In such
+   cases it's probably appropriate to use the CFI hint macros in
+   asm/undwarf.h.
 
-Errors in .c files
-------------------
 
-1. c_file.o: warning: objtool: funcA() falls through to next function funcB()
+8. file.o: warning: objtool: funcA() falls through to next function funcB()
 
    This means that funcA() doesn't end with a return instruction or an
    unconditional jump, and that objtool has determined that the function
@@ -318,22 +311,6 @@ Errors in .c files
       might be corrupt due to a gcc bug.  For more details, see:
       https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646
 
-2. If you're getting any other objtool error in a compiled .c file, it
-   may be because the file uses an asm() statement which has a "call"
-   instruction.  An asm() statement with a call instruction must declare
-   the use of the stack pointer in its output operand.  For example, on
-   x86_64:
-
-     register void *__sp asm("rsp");
-     asm volatile("call func" : "+r" (__sp));
-
-   Otherwise the stack frame may not get created before the call.
-
-3. Another possible cause for errors in C code is if the Makefile removes
-   -fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options.
-
-Also see the above section for .S file errors for more information what
-the individual error messages mean.
 
 If the error doesn't seem to make sense, it could be a bug in objtool.
 Feel free to ask the objtool maintainer for help.
index 27e019c..0e2765e 100644 (file)
@@ -25,7 +25,7 @@ OBJTOOL_IN := $(OBJTOOL)-in.o
 all: $(OBJTOOL)
 
 INCLUDES := -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi
-CFLAGS   += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES)
+CFLAGS   += -Wall -Werror $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -fomit-frame-pointer -O2 -g $(INCLUDES)
 LDFLAGS  += -lelf $(LIBSUBCMD)
 
 # Allow old libelf to be used:
index a59e061..21aeca8 100644 (file)
 #define _ARCH_H
 
 #include <stdbool.h>
+#include <linux/list.h>
 #include "elf.h"
+#include "cfi.h"
 
-#define INSN_FP_SAVE           1
-#define INSN_FP_SETUP          2
-#define INSN_FP_RESTORE                3
-#define INSN_JUMP_CONDITIONAL  4
-#define INSN_JUMP_UNCONDITIONAL        5
-#define INSN_JUMP_DYNAMIC      6
-#define INSN_CALL              7
-#define INSN_CALL_DYNAMIC      8
-#define INSN_RETURN            9
-#define INSN_CONTEXT_SWITCH    10
-#define INSN_NOP               11
-#define INSN_OTHER             12
+#define INSN_JUMP_CONDITIONAL  1
+#define INSN_JUMP_UNCONDITIONAL        2
+#define INSN_JUMP_DYNAMIC      3
+#define INSN_CALL              4
+#define INSN_CALL_DYNAMIC      5
+#define INSN_RETURN            6
+#define INSN_CONTEXT_SWITCH    7
+#define INSN_STACK             8
+#define INSN_NOP               9
+#define INSN_OTHER             10
 #define INSN_LAST              INSN_OTHER
 
+enum op_dest_type {
+       OP_DEST_REG,
+       OP_DEST_REG_INDIRECT,
+       OP_DEST_MEM,
+       OP_DEST_PUSH,
+       OP_DEST_LEAVE,
+};
+
+struct op_dest {
+       enum op_dest_type type;
+       unsigned char reg;
+       int offset;
+};
+
+enum op_src_type {
+       OP_SRC_REG,
+       OP_SRC_REG_INDIRECT,
+       OP_SRC_CONST,
+       OP_SRC_POP,
+       OP_SRC_ADD,
+       OP_SRC_AND,
+};
+
+struct op_src {
+       enum op_src_type type;
+       unsigned char reg;
+       int offset;
+};
+
+struct stack_op {
+       struct op_dest dest;
+       struct op_src src;
+};
+
+void arch_initial_func_cfi_state(struct cfi_state *state);
+
 int arch_decode_instruction(struct elf *elf, struct section *sec,
                            unsigned long offset, unsigned int maxlen,
                            unsigned int *len, unsigned char *type,
-                           unsigned long *displacement);
+                           unsigned long *immediate, struct stack_op *op);
+
+bool arch_callee_saved_reg(unsigned char reg);
 
 #endif /* _ARCH_H */
index 6ac99e3..a36c2eb 100644 (file)
 #include "../../arch.h"
 #include "../../warn.h"
 
+static unsigned char op_to_cfi_reg[][2] = {
+       {CFI_AX, CFI_R8},
+       {CFI_CX, CFI_R9},
+       {CFI_DX, CFI_R10},
+       {CFI_BX, CFI_R11},
+       {CFI_SP, CFI_R12},
+       {CFI_BP, CFI_R13},
+       {CFI_SI, CFI_R14},
+       {CFI_DI, CFI_R15},
+};
+
 static int is_x86_64(struct elf *elf)
 {
        switch (elf->ehdr.e_machine) {
@@ -40,24 +51,50 @@ static int is_x86_64(struct elf *elf)
        }
 }
 
+bool arch_callee_saved_reg(unsigned char reg)
+{
+       switch (reg) {
+       case CFI_BP:
+       case CFI_BX:
+       case CFI_R12:
+       case CFI_R13:
+       case CFI_R14:
+       case CFI_R15:
+               return true;
+
+       case CFI_AX:
+       case CFI_CX:
+       case CFI_DX:
+       case CFI_SI:
+       case CFI_DI:
+       case CFI_SP:
+       case CFI_R8:
+       case CFI_R9:
+       case CFI_R10:
+       case CFI_R11:
+       case CFI_RA:
+       default:
+               return false;
+       }
+}
+
 int arch_decode_instruction(struct elf *elf, struct section *sec,
                            unsigned long offset, unsigned int maxlen,
                            unsigned int *len, unsigned char *type,
-                           unsigned long *immediate)
+                           unsigned long *immediate, struct stack_op *op)
 {
        struct insn insn;
-       int x86_64;
-       unsigned char op1, op2, ext;
+       int x86_64, sign;
+       unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
+                     modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
+                     sib = 0;
 
        x86_64 = is_x86_64(elf);
        if (x86_64 == -1)
                return -1;
 
-       insn_init(&insn, (void *)(sec->data + offset), maxlen, x86_64);
+       insn_init(&insn, sec->data->d_buf + offset, maxlen, x86_64);
        insn_get_length(&insn);
-       insn_get_opcode(&insn);
-       insn_get_modrm(&insn);
-       insn_get_immediate(&insn);
 
        if (!insn_complete(&insn)) {
                WARN_FUNC("can't decode instruction", sec, offset);
@@ -73,67 +110,323 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
        op1 = insn.opcode.bytes[0];
        op2 = insn.opcode.bytes[1];
 
+       if (insn.rex_prefix.nbytes) {
+               rex = insn.rex_prefix.bytes[0];
+               rex_w = X86_REX_W(rex) >> 3;
+               rex_r = X86_REX_R(rex) >> 2;
+               rex_b = X86_REX_B(rex);
+       }
+
+       if (insn.modrm.nbytes) {
+               modrm = insn.modrm.bytes[0];
+               modrm_mod = X86_MODRM_MOD(modrm);
+               modrm_reg = X86_MODRM_REG(modrm);
+               modrm_rm = X86_MODRM_RM(modrm);
+       }
+
+       if (insn.sib.nbytes)
+               sib = insn.sib.bytes[0];
+
        switch (op1) {
-       case 0x55:
-               if (!insn.rex_prefix.nbytes)
-                       /* push rbp */
-                       *type = INSN_FP_SAVE;
+
+       case 0x1:
+       case 0x29:
+               if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
+
+                       /* add/sub reg, %rsp */
+                       *type = INSN_STACK;
+                       op->src.type = OP_SRC_ADD;
+                       op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+                       op->dest.type = OP_SRC_REG;
+                       op->dest.reg = CFI_SP;
+               }
+               break;
+
+       case 0x50 ... 0x57:
+
+               /* push reg */
+               *type = INSN_STACK;
+               op->src.type = OP_SRC_REG;
+               op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+               op->dest.type = OP_DEST_PUSH;
+
                break;
 
-       case 0x5d:
-               if (!insn.rex_prefix.nbytes)
-                       /* pop rbp */
-                       *type = INSN_FP_RESTORE;
+       case 0x58 ... 0x5f:
+
+               /* pop reg */
+               *type = INSN_STACK;
+               op->src.type = OP_SRC_POP;
+               op->dest.type = OP_DEST_REG;
+               op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+
+               break;
+
+       case 0x68:
+       case 0x6a:
+               /* push immediate */
+               *type = INSN_STACK;
+               op->src.type = OP_SRC_CONST;
+               op->dest.type = OP_DEST_PUSH;
                break;
 
        case 0x70 ... 0x7f:
                *type = INSN_JUMP_CONDITIONAL;
                break;
 
+       case 0x81:
+       case 0x83:
+               if (rex != 0x48)
+                       break;
+
+               if (modrm == 0xe4) {
+                       /* and imm, %rsp */
+                       *type = INSN_STACK;
+                       op->src.type = OP_SRC_AND;
+                       op->src.reg = CFI_SP;
+                       op->src.offset = insn.immediate.value;
+                       op->dest.type = OP_DEST_REG;
+                       op->dest.reg = CFI_SP;
+                       break;
+               }
+
+               if (modrm == 0xc4)
+                       sign = 1;
+               else if (modrm == 0xec)
+                       sign = -1;
+               else
+                       break;
+
+               /* add/sub imm, %rsp */
+               *type = INSN_STACK;
+               op->src.type = OP_SRC_ADD;
+               op->src.reg = CFI_SP;
+               op->src.offset = insn.immediate.value * sign;
+               op->dest.type = OP_DEST_REG;
+               op->dest.reg = CFI_SP;
+               break;
+
        case 0x89:
-               if (insn.rex_prefix.nbytes == 1 &&
-                   insn.rex_prefix.bytes[0] == 0x48 &&
-                   insn.modrm.nbytes && insn.modrm.bytes[0] == 0xe5)
-                       /* mov rsp, rbp */
-                       *type = INSN_FP_SETUP;
+               if (rex == 0x48 && modrm == 0xe5) {
+
+                       /* mov %rsp, %rbp */
+                       *type = INSN_STACK;
+                       op->src.type = OP_SRC_REG;
+                       op->src.reg = CFI_SP;
+                       op->dest.type = OP_DEST_REG;
+                       op->dest.reg = CFI_BP;
+                       break;
+               }
+               /* fallthrough */
+       case 0x88:
+               if (!rex_b &&
+                   (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) {
+
+                       /* mov reg, disp(%rbp) */
+                       *type = INSN_STACK;
+                       op->src.type = OP_SRC_REG;
+                       op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+                       op->dest.type = OP_DEST_REG_INDIRECT;
+                       op->dest.reg = CFI_BP;
+                       op->dest.offset = insn.displacement.value;
+
+               } else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
+
+                       /* mov reg, disp(%rsp) */
+                       *type = INSN_STACK;
+                       op->src.type = OP_SRC_REG;
+                       op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+                       op->dest.type = OP_DEST_REG_INDIRECT;
+                       op->dest.reg = CFI_SP;
+                       op->dest.offset = insn.displacement.value;
+               }
+
+               break;
+
+       case 0x8b:
+               if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) {
+
+                       /* mov disp(%rbp), reg */
+                       *type = INSN_STACK;
+                       op->src.type = OP_SRC_REG_INDIRECT;
+                       op->src.reg = CFI_BP;
+                       op->src.offset = insn.displacement.value;
+                       op->dest.type = OP_DEST_REG;
+                       op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+
+               } else if (rex_w && !rex_b && sib == 0x24 &&
+                          modrm_mod != 3 && modrm_rm == 4) {
+
+                       /* mov disp(%rsp), reg */
+                       *type = INSN_STACK;
+                       op->src.type = OP_SRC_REG_INDIRECT;
+                       op->src.reg = CFI_SP;
+                       op->src.offset = insn.displacement.value;
+                       op->dest.type = OP_DEST_REG;
+                       op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+               }
+
                break;
 
        case 0x8d:
-               if (insn.rex_prefix.nbytes &&
-                   insn.rex_prefix.bytes[0] == 0x48 &&
-                   insn.modrm.nbytes && insn.modrm.bytes[0] == 0x2c &&
-                   insn.sib.nbytes && insn.sib.bytes[0] == 0x24)
-                       /* lea %(rsp), %rbp */
-                       *type = INSN_FP_SETUP;
+               if (rex == 0x48 && modrm == 0x65) {
+
+                       /* lea -disp(%rbp), %rsp */
+                       *type = INSN_STACK;
+                       op->src.type = OP_SRC_ADD;
+                       op->src.reg = CFI_BP;
+                       op->src.offset = insn.displacement.value;
+                       op->dest.type = OP_DEST_REG;
+                       op->dest.reg = CFI_SP;
+                       break;
+               }
+
+               if (rex == 0x4c && modrm == 0x54 && sib == 0x24 &&
+                   insn.displacement.value == 8) {
+
+                       /*
+                        * lea 0x8(%rsp), %r10
+                        *
+                        * Here r10 is the "drap" pointer, used as a stack
+                        * pointer helper when the stack gets realigned.
+                        */
+                       *type = INSN_STACK;
+                       op->src.type = OP_SRC_ADD;
+                       op->src.reg = CFI_SP;
+                       op->src.offset = 8;
+                       op->dest.type = OP_DEST_REG;
+                       op->dest.reg = CFI_R10;
+                       break;
+               }
+
+               if (rex == 0x4c && modrm == 0x6c && sib == 0x24 &&
+                   insn.displacement.value == 16) {
+
+                       /*
+                        * lea 0x10(%rsp), %r13
+                        *
+                        * Here r13 is the "drap" pointer, used as a stack
+                        * pointer helper when the stack gets realigned.
+                        */
+                       *type = INSN_STACK;
+                       op->src.type = OP_SRC_ADD;
+                       op->src.reg = CFI_SP;
+                       op->src.offset = 16;
+                       op->dest.type = OP_DEST_REG;
+                       op->dest.reg = CFI_R13;
+                       break;
+               }
+
+               if (rex == 0x49 && modrm == 0x62 &&
+                   insn.displacement.value == -8) {
+
+                       /*
+                        * lea -0x8(%r10), %rsp
+                        *
+                        * Restoring rsp back to its original value after a
+                        * stack realignment.
+                        */
+                       *type = INSN_STACK;
+                       op->src.type = OP_SRC_ADD;
+                       op->src.reg = CFI_R10;
+                       op->src.offset = -8;
+                       op->dest.type = OP_DEST_REG;
+                       op->dest.reg = CFI_SP;
+                       break;
+               }
+
+               if (rex == 0x49 && modrm == 0x65 &&
+                   insn.displacement.value == -16) {
+
+                       /*
+                        * lea -0x10(%r13), %rsp
+                        *
+                        * Restoring rsp back to its original value after a
+                        * stack realignment.
+                        */
+                       *type = INSN_STACK;
+                       op->src.type = OP_SRC_ADD;
+                       op->src.reg = CFI_R13;
+                       op->src.offset = -16;
+                       op->dest.type = OP_DEST_REG;
+                       op->dest.reg = CFI_SP;
+                       break;
+               }
+
+               break;
+
+       case 0x8f:
+               /* pop to mem */
+               *type = INSN_STACK;
+               op->src.type = OP_SRC_POP;
+               op->dest.type = OP_DEST_MEM;
                break;
 
        case 0x90:
                *type = INSN_NOP;
                break;
 
+       case 0x9c:
+               /* pushf */
+               *type = INSN_STACK;
+               op->src.type = OP_SRC_CONST;
+               op->dest.type = OP_DEST_PUSH;
+               break;
+
+       case 0x9d:
+               /* popf */
+               *type = INSN_STACK;
+               op->src.type = OP_SRC_POP;
+               op->dest.type = OP_DEST_MEM;
+               break;
+
        case 0x0f:
+
                if (op2 >= 0x80 && op2 <= 0x8f)
                        *type = INSN_JUMP_CONDITIONAL;
                else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 ||
                         op2 == 0x35)
+
                        /* sysenter, sysret */
                        *type = INSN_CONTEXT_SWITCH;
+
                else if (op2 == 0x0d || op2 == 0x1f)
+
                        /* nopl/nopw */
                        *type = INSN_NOP;
-               else if (op2 == 0x01 && insn.modrm.nbytes &&
-                        (insn.modrm.bytes[0] == 0xc2 ||
-                         insn.modrm.bytes[0] == 0xd8))
-                       /* vmlaunch, vmrun */
-                       *type = INSN_CONTEXT_SWITCH;
+
+               else if (op2 == 0xa0 || op2 == 0xa8) {
+
+                       /* push fs/gs */
+                       *type = INSN_STACK;
+                       op->src.type = OP_SRC_CONST;
+                       op->dest.type = OP_DEST_PUSH;
+
+               } else if (op2 == 0xa1 || op2 == 0xa9) {
+
+                       /* pop fs/gs */
+                       *type = INSN_STACK;
+                       op->src.type = OP_SRC_POP;
+                       op->dest.type = OP_DEST_MEM;
+               }
 
                break;
 
-       case 0xc9: /* leave */
-               *type = INSN_FP_RESTORE;
+       case 0xc9:
+               /*
+                * leave
+                *
+                * equivalent to:
+                * mov bp, sp
+                * pop bp
+                */
+               *type = INSN_STACK;
+               op->dest.type = OP_DEST_LEAVE;
+
                break;
 
-       case 0xe3: /* jecxz/jrcxz */
+       case 0xe3:
+               /* jecxz/jrcxz */
                *type = INSN_JUMP_CONDITIONAL;
                break;
 
@@ -158,14 +451,27 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
                break;
 
        case 0xff:
-               ext = X86_MODRM_REG(insn.modrm.bytes[0]);
-               if (ext == 2 || ext == 3)
+               if (modrm_reg == 2 || modrm_reg == 3)
+
                        *type = INSN_CALL_DYNAMIC;
-               else if (ext == 4)
+
+               else if (modrm_reg == 4)
+
                        *type = INSN_JUMP_DYNAMIC;
-               else if (ext == 5) /*jmpf */
+
+               else if (modrm_reg == 5)
+
+                       /* jmpf */
                        *type = INSN_CONTEXT_SWITCH;
 
+               else if (modrm_reg == 6) {
+
+                       /* push from mem */
+                       *type = INSN_STACK;
+                       op->src.type = OP_SRC_CONST;
+                       op->dest.type = OP_DEST_PUSH;
+               }
+
                break;
 
        default:
@@ -176,3 +482,21 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 
        return 0;
 }
+
+void arch_initial_func_cfi_state(struct cfi_state *state)
+{
+       int i;
+
+       for (i = 0; i < CFI_NUM_REGS; i++) {
+               state->regs[i].base = CFI_UNDEFINED;
+               state->regs[i].offset = 0;
+       }
+
+       /* initial CFA (call frame address) */
+       state->cfa.base = CFI_SP;
+       state->cfa.offset = 8;
+
+       /* initial RA (return address) */
+       state->regs[16].base = CFI_CFA;
+       state->regs[16].offset = -8;
+}
index 282a603..365c34e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * For more information, see tools/objtool/Documentation/stack-validation.txt.
  */
 
-#include <string.h>
-#include <stdlib.h>
 #include <subcmd/parse-options.h>
-
 #include "builtin.h"
-#include "elf.h"
-#include "special.h"
-#include "arch.h"
-#include "warn.h"
-
-#include <linux/hashtable.h>
-#include <linux/kernel.h>
-
-#define STATE_FP_SAVED         0x1
-#define STATE_FP_SETUP         0x2
-#define STATE_FENTRY           0x4
-
-struct instruction {
-       struct list_head list;
-       struct hlist_node hash;
-       struct section *sec;
-       unsigned long offset;
-       unsigned int len, state;
-       unsigned char type;
-       unsigned long immediate;
-       bool alt_group, visited, dead_end;
-       struct symbol *call_dest;
-       struct instruction *jump_dest;
-       struct list_head alts;
-       struct symbol *func;
-};
-
-struct alternative {
-       struct list_head list;
-       struct instruction *insn;
-};
-
-struct objtool_file {
-       struct elf *elf;
-       struct list_head insn_list;
-       DECLARE_HASHTABLE(insn_hash, 16);
-       struct section *rodata, *whitelist;
-       bool ignore_unreachables, c_file;
-};
-
-const char *objname;
-static bool nofp;
-
-static struct instruction *find_insn(struct objtool_file *file,
-                                    struct section *sec, unsigned long offset)
-{
-       struct instruction *insn;
-
-       hash_for_each_possible(file->insn_hash, insn, hash, offset)
-               if (insn->sec == sec && insn->offset == offset)
-                       return insn;
-
-       return NULL;
-}
-
-static struct instruction *next_insn_same_sec(struct objtool_file *file,
-                                             struct instruction *insn)
-{
-       struct instruction *next = list_next_entry(insn, list);
-
-       if (&next->list == &file->insn_list || next->sec != insn->sec)
-               return NULL;
-
-       return next;
-}
-
-static bool gcov_enabled(struct objtool_file *file)
-{
-       struct section *sec;
-       struct symbol *sym;
-
-       list_for_each_entry(sec, &file->elf->sections, list)
-               list_for_each_entry(sym, &sec->symbol_list, list)
-                       if (!strncmp(sym->name, "__gcov_.", 8))
-                               return true;
-
-       return false;
-}
-
-#define for_each_insn(file, insn)                                      \
-       list_for_each_entry(insn, &file->insn_list, list)
-
-#define func_for_each_insn(file, func, insn)                           \
-       for (insn = find_insn(file, func->sec, func->offset);           \
-            insn && &insn->list != &file->insn_list &&                 \
-               insn->sec == func->sec &&                               \
-               insn->offset < func->offset + func->len;                \
-            insn = list_next_entry(insn, list))
-
-#define func_for_each_insn_continue_reverse(file, func, insn)          \
-       for (insn = list_prev_entry(insn, list);                        \
-            &insn->list != &file->insn_list &&                         \
-               insn->sec == func->sec && insn->offset >= func->offset; \
-            insn = list_prev_entry(insn, list))
-
-#define sec_for_each_insn_from(file, insn)                             \
-       for (; insn; insn = next_insn_same_sec(file, insn))
-
-
-/*
- * Check if the function has been manually whitelisted with the
- * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted
- * due to its use of a context switching instruction.
- */
-static bool ignore_func(struct objtool_file *file, struct symbol *func)
-{
-       struct rela *rela;
-       struct instruction *insn;
-
-       /* check for STACK_FRAME_NON_STANDARD */
-       if (file->whitelist && file->whitelist->rela)
-               list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) {
-                       if (rela->sym->type == STT_SECTION &&
-                           rela->sym->sec == func->sec &&
-                           rela->addend == func->offset)
-                               return true;
-                       if (rela->sym->type == STT_FUNC && rela->sym == func)
-                               return true;
-               }
-
-       /* check if it has a context switching instruction */
-       func_for_each_insn(file, func, insn)
-               if (insn->type == INSN_CONTEXT_SWITCH)
-                       return true;
-
-       return false;
-}
-
-/*
- * This checks to see if the given function is a "noreturn" function.
- *
- * For global functions which are outside the scope of this object file, we
- * have to keep a manual list of them.
- *
- * For local functions, we have to detect them manually by simply looking for
- * the lack of a return instruction.
- *
- * Returns:
- *  -1: error
- *   0: no dead end
- *   1: dead end
- */
-static int __dead_end_function(struct objtool_file *file, struct symbol *func,
-                              int recursion)
-{
-       int i;
-       struct instruction *insn;
-       bool empty = true;
-
-       /*
-        * Unfortunately these have to be hard coded because the noreturn
-        * attribute isn't provided in ELF data.
-        */
-       static const char * const global_noreturns[] = {
-               "__stack_chk_fail",
-               "panic",
-               "do_exit",
-               "do_task_dead",
-               "__module_put_and_exit",
-               "complete_and_exit",
-               "kvm_spurious_fault",
-               "__reiserfs_panic",
-               "lbug_with_loc"
-       };
-
-       if (func->bind == STB_WEAK)
-               return 0;
-
-       if (func->bind == STB_GLOBAL)
-               for (i = 0; i < ARRAY_SIZE(global_noreturns); i++)
-                       if (!strcmp(func->name, global_noreturns[i]))
-                               return 1;
-
-       if (!func->sec)
-               return 0;
-
-       func_for_each_insn(file, func, insn) {
-               empty = false;
-
-               if (insn->type == INSN_RETURN)
-                       return 0;
-       }
-
-       if (empty)
-               return 0;
-
-       /*
-        * A function can have a sibling call instead of a return.  In that
-        * case, the function's dead-end status depends on whether the target
-        * of the sibling call returns.
-        */
-       func_for_each_insn(file, func, insn) {
-               if (insn->sec != func->sec ||
-                   insn->offset >= func->offset + func->len)
-                       break;
-
-               if (insn->type == INSN_JUMP_UNCONDITIONAL) {
-                       struct instruction *dest = insn->jump_dest;
-                       struct symbol *dest_func;
-
-                       if (!dest)
-                               /* sibling call to another file */
-                               return 0;
-
-                       if (dest->sec != func->sec ||
-                           dest->offset < func->offset ||
-                           dest->offset >= func->offset + func->len) {
-                               /* local sibling call */
-                               dest_func = find_symbol_by_offset(dest->sec,
-                                                                 dest->offset);
-                               if (!dest_func)
-                                       continue;
-
-                               if (recursion == 5) {
-                                       WARN_FUNC("infinite recursion (objtool bug!)",
-                                                 dest->sec, dest->offset);
-                                       return -1;
-                               }
-
-                               return __dead_end_function(file, dest_func,
-                                                          recursion + 1);
-                       }
-               }
-
-               if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts))
-                       /* sibling call */
-                       return 0;
-       }
-
-       return 1;
-}
-
-static int dead_end_function(struct objtool_file *file, struct symbol *func)
-{
-       return __dead_end_function(file, func, 0);
-}
-
-/*
- * Call the arch-specific instruction decoder for all the instructions and add
- * them to the global instruction list.
- */
-static int decode_instructions(struct objtool_file *file)
-{
-       struct section *sec;
-       struct symbol *func;
-       unsigned long offset;
-       struct instruction *insn;
-       int ret;
-
-       list_for_each_entry(sec, &file->elf->sections, list) {
-
-               if (!(sec->sh.sh_flags & SHF_EXECINSTR))
-                       continue;
-
-               for (offset = 0; offset < sec->len; offset += insn->len) {
-                       insn = malloc(sizeof(*insn));
-                       memset(insn, 0, sizeof(*insn));
-
-                       INIT_LIST_HEAD(&insn->alts);
-                       insn->sec = sec;
-                       insn->offset = offset;
-
-                       ret = arch_decode_instruction(file->elf, sec, offset,
-                                                     sec->len - offset,
-                                                     &insn->len, &insn->type,
-                                                     &insn->immediate);
-                       if (ret)
-                               return ret;
-
-                       if (!insn->type || insn->type > INSN_LAST) {
-                               WARN_FUNC("invalid instruction type %d",
-                                         insn->sec, insn->offset, insn->type);
-                               return -1;
-                       }
-
-                       hash_add(file->insn_hash, &insn->hash, insn->offset);
-                       list_add_tail(&insn->list, &file->insn_list);
-               }
-
-               list_for_each_entry(func, &sec->symbol_list, list) {
-                       if (func->type != STT_FUNC)
-                               continue;
-
-                       if (!find_insn(file, sec, func->offset)) {
-                               WARN("%s(): can't find starting instruction",
-                                    func->name);
-                               return -1;
-                       }
-
-                       func_for_each_insn(file, func, insn)
-                               if (!insn->func)
-                                       insn->func = func;
-               }
-       }
-
-       return 0;
-}
-
-/*
- * Find all uses of the unreachable() macro, which are code path dead ends.
- */
-static int add_dead_ends(struct objtool_file *file)
-{
-       struct section *sec;
-       struct rela *rela;
-       struct instruction *insn;
-       bool found;
-
-       sec = find_section_by_name(file->elf, ".rela.discard.unreachable");
-       if (!sec)
-               return 0;
-
-       list_for_each_entry(rela, &sec->rela_list, list) {
-               if (rela->sym->type != STT_SECTION) {
-                       WARN("unexpected relocation symbol type in %s", sec->name);
-                       return -1;
-               }
-               insn = find_insn(file, rela->sym->sec, rela->addend);
-               if (insn)
-                       insn = list_prev_entry(insn, list);
-               else if (rela->addend == rela->sym->sec->len) {
-                       found = false;
-                       list_for_each_entry_reverse(insn, &file->insn_list, list) {
-                               if (insn->sec == rela->sym->sec) {
-                                       found = true;
-                                       break;
-                               }
-                       }
-
-                       if (!found) {
-                               WARN("can't find unreachable insn at %s+0x%x",
-                                    rela->sym->sec->name, rela->addend);
-                               return -1;
-                       }
-               } else {
-                       WARN("can't find unreachable insn at %s+0x%x",
-                            rela->sym->sec->name, rela->addend);
-                       return -1;
-               }
-
-               insn->dead_end = true;
-       }
-
-       return 0;
-}
-
-/*
- * Warnings shouldn't be reported for ignored functions.
- */
-static void add_ignores(struct objtool_file *file)
-{
-       struct instruction *insn;
-       struct section *sec;
-       struct symbol *func;
-
-       list_for_each_entry(sec, &file->elf->sections, list) {
-               list_for_each_entry(func, &sec->symbol_list, list) {
-                       if (func->type != STT_FUNC)
-                               continue;
-
-                       if (!ignore_func(file, func))
-                               continue;
-
-                       func_for_each_insn(file, func, insn)
-                               insn->visited = true;
-               }
-       }
-}
-
-/*
- * Find the destination instructions for all jumps.
- */
-static int add_jump_destinations(struct objtool_file *file)
-{
-       struct instruction *insn;
-       struct rela *rela;
-       struct section *dest_sec;
-       unsigned long dest_off;
-
-       for_each_insn(file, insn) {
-               if (insn->type != INSN_JUMP_CONDITIONAL &&
-                   insn->type != INSN_JUMP_UNCONDITIONAL)
-                       continue;
-
-               /* skip ignores */
-               if (insn->visited)
-                       continue;
-
-               rela = find_rela_by_dest_range(insn->sec, insn->offset,
-                                              insn->len);
-               if (!rela) {
-                       dest_sec = insn->sec;
-                       dest_off = insn->offset + insn->len + insn->immediate;
-               } else if (rela->sym->type == STT_SECTION) {
-                       dest_sec = rela->sym->sec;
-                       dest_off = rela->addend + 4;
-               } else if (rela->sym->sec->idx) {
-                       dest_sec = rela->sym->sec;
-                       dest_off = rela->sym->sym.st_value + rela->addend + 4;
-               } else {
-                       /* sibling call */
-                       insn->jump_dest = 0;
-                       continue;
-               }
-
-               insn->jump_dest = find_insn(file, dest_sec, dest_off);
-               if (!insn->jump_dest) {
-
-                       /*
-                        * This is a special case where an alt instruction
-                        * jumps past the end of the section.  These are
-                        * handled later in handle_group_alt().
-                        */
-                       if (!strcmp(insn->sec->name, ".altinstr_replacement"))
-                               continue;
-
-                       WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
-                                 insn->sec, insn->offset, dest_sec->name,
-                                 dest_off);
-                       return -1;
-               }
-       }
-
-       return 0;
-}
-
-/*
- * Find the destination instructions for all calls.
- */
-static int add_call_destinations(struct objtool_file *file)
-{
-       struct instruction *insn;
-       unsigned long dest_off;
-       struct rela *rela;
-
-       for_each_insn(file, insn) {
-               if (insn->type != INSN_CALL)
-                       continue;
-
-               rela = find_rela_by_dest_range(insn->sec, insn->offset,
-                                              insn->len);
-               if (!rela) {
-                       dest_off = insn->offset + insn->len + insn->immediate;
-                       insn->call_dest = find_symbol_by_offset(insn->sec,
-                                                               dest_off);
-                       if (!insn->call_dest) {
-                               WARN_FUNC("can't find call dest symbol at offset 0x%lx",
-                                         insn->sec, insn->offset, dest_off);
-                               return -1;
-                       }
-               } else if (rela->sym->type == STT_SECTION) {
-                       insn->call_dest = find_symbol_by_offset(rela->sym->sec,
-                                                               rela->addend+4);
-                       if (!insn->call_dest ||
-                           insn->call_dest->type != STT_FUNC) {
-                               WARN_FUNC("can't find call dest symbol at %s+0x%x",
-                                         insn->sec, insn->offset,
-                                         rela->sym->sec->name,
-                                         rela->addend + 4);
-                               return -1;
-                       }
-               } else
-                       insn->call_dest = rela->sym;
-       }
-
-       return 0;
-}
-
-/*
- * The .alternatives section requires some extra special care, over and above
- * what other special sections require:
- *
- * 1. Because alternatives are patched in-place, we need to insert a fake jump
- *    instruction at the end so that validate_branch() skips all the original
- *    replaced instructions when validating the new instruction path.
- *
- * 2. An added wrinkle is that the new instruction length might be zero.  In
- *    that case the old instructions are replaced with noops.  We simulate that
- *    by creating a fake jump as the only new instruction.
- *
- * 3. In some cases, the alternative section includes an instruction which
- *    conditionally jumps to the _end_ of the entry.  We have to modify these
- *    jumps' destinations to point back to .text rather than the end of the
- *    entry in .altinstr_replacement.
- *
- * 4. It has been requested that we don't validate the !POPCNT feature path
- *    which is a "very very small percentage of machines".
- */
-static int handle_group_alt(struct objtool_file *file,
-                           struct special_alt *special_alt,
-                           struct instruction *orig_insn,
-                           struct instruction **new_insn)
-{
-       struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump;
-       unsigned long dest_off;
-
-       last_orig_insn = NULL;
-       insn = orig_insn;
-       sec_for_each_insn_from(file, insn) {
-               if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
-                       break;
-
-               if (special_alt->skip_orig)
-                       insn->type = INSN_NOP;
-
-               insn->alt_group = true;
-               last_orig_insn = insn;
-       }
-
-       if (!next_insn_same_sec(file, last_orig_insn)) {
-               WARN("%s: don't know how to handle alternatives at end of section",
-                    special_alt->orig_sec->name);
-               return -1;
-       }
-
-       fake_jump = malloc(sizeof(*fake_jump));
-       if (!fake_jump) {
-               WARN("malloc failed");
-               return -1;
-       }
-       memset(fake_jump, 0, sizeof(*fake_jump));
-       INIT_LIST_HEAD(&fake_jump->alts);
-       fake_jump->sec = special_alt->new_sec;
-       fake_jump->offset = -1;
-       fake_jump->type = INSN_JUMP_UNCONDITIONAL;
-       fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
-
-       if (!special_alt->new_len) {
-               *new_insn = fake_jump;
-               return 0;
-       }
-
-       last_new_insn = NULL;
-       insn = *new_insn;
-       sec_for_each_insn_from(file, insn) {
-               if (insn->offset >= special_alt->new_off + special_alt->new_len)
-                       break;
-
-               last_new_insn = insn;
-
-               if (insn->type != INSN_JUMP_CONDITIONAL &&
-                   insn->type != INSN_JUMP_UNCONDITIONAL)
-                       continue;
-
-               if (!insn->immediate)
-                       continue;
-
-               dest_off = insn->offset + insn->len + insn->immediate;
-               if (dest_off == special_alt->new_off + special_alt->new_len)
-                       insn->jump_dest = fake_jump;
-
-               if (!insn->jump_dest) {
-                       WARN_FUNC("can't find alternative jump destination",
-                                 insn->sec, insn->offset);
-                       return -1;
-               }
-       }
-
-       if (!last_new_insn) {
-               WARN_FUNC("can't find last new alternative instruction",
-                         special_alt->new_sec, special_alt->new_off);
-               return -1;
-       }
-
-       list_add(&fake_jump->list, &last_new_insn->list);
-
-       return 0;
-}
-
-/*
- * A jump table entry can either convert a nop to a jump or a jump to a nop.
- * If the original instruction is a jump, make the alt entry an effective nop
- * by just skipping the original instruction.
- */
-static int handle_jump_alt(struct objtool_file *file,
-                          struct special_alt *special_alt,
-                          struct instruction *orig_insn,
-                          struct instruction **new_insn)
-{
-       if (orig_insn->type == INSN_NOP)
-               return 0;
-
-       if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
-               WARN_FUNC("unsupported instruction at jump label",
-                         orig_insn->sec, orig_insn->offset);
-               return -1;
-       }
-
-       *new_insn = list_next_entry(orig_insn, list);
-       return 0;
-}
-
-/*
- * Read all the special sections which have alternate instructions which can be
- * patched in or redirected to at runtime.  Each instruction having alternate
- * instruction(s) has them added to its insn->alts list, which will be
- * traversed in validate_branch().
- */
-static int add_special_section_alts(struct objtool_file *file)
-{
-       struct list_head special_alts;
-       struct instruction *orig_insn, *new_insn;
-       struct special_alt *special_alt, *tmp;
-       struct alternative *alt;
-       int ret;
-
-       ret = special_get_alts(file->elf, &special_alts);
-       if (ret)
-               return ret;
-
-       list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
-               alt = malloc(sizeof(*alt));
-               if (!alt) {
-                       WARN("malloc failed");
-                       ret = -1;
-                       goto out;
-               }
-
-               orig_insn = find_insn(file, special_alt->orig_sec,
-                                     special_alt->orig_off);
-               if (!orig_insn) {
-                       WARN_FUNC("special: can't find orig instruction",
-                                 special_alt->orig_sec, special_alt->orig_off);
-                       ret = -1;
-                       goto out;
-               }
+#include "check.h"
 
-               new_insn = NULL;
-               if (!special_alt->group || special_alt->new_len) {
-                       new_insn = find_insn(file, special_alt->new_sec,
-                                            special_alt->new_off);
-                       if (!new_insn) {
-                               WARN_FUNC("special: can't find new instruction",
-                                         special_alt->new_sec,
-                                         special_alt->new_off);
-                               ret = -1;
-                               goto out;
-                       }
-               }
+bool nofp;
 
-               if (special_alt->group) {
-                       ret = handle_group_alt(file, special_alt, orig_insn,
-                                              &new_insn);
-                       if (ret)
-                               goto out;
-               } else if (special_alt->jump_or_nop) {
-                       ret = handle_jump_alt(file, special_alt, orig_insn,
-                                             &new_insn);
-                       if (ret)
-                               goto out;
-               }
-
-               alt->insn = new_insn;
-               list_add_tail(&alt->list, &orig_insn->alts);
-
-               list_del(&special_alt->list);
-               free(special_alt);
-       }
-
-out:
-       return ret;
-}
-
-static int add_switch_table(struct objtool_file *file, struct symbol *func,
-                           struct instruction *insn, struct rela *table,
-                           struct rela *next_table)
-{
-       struct rela *rela = table;
-       struct instruction *alt_insn;
-       struct alternative *alt;
-
-       list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
-               if (rela == next_table)
-                       break;
-
-               if (rela->sym->sec != insn->sec ||
-                   rela->addend <= func->offset ||
-                   rela->addend >= func->offset + func->len)
-                       break;
-
-               alt_insn = find_insn(file, insn->sec, rela->addend);
-               if (!alt_insn) {
-                       WARN("%s: can't find instruction at %s+0x%x",
-                            file->rodata->rela->name, insn->sec->name,
-                            rela->addend);
-                       return -1;
-               }
-
-               alt = malloc(sizeof(*alt));
-               if (!alt) {
-                       WARN("malloc failed");
-                       return -1;
-               }
-
-               alt->insn = alt_insn;
-               list_add_tail(&alt->list, &insn->alts);
-       }
-
-       return 0;
-}
-
-/*
- * find_switch_table() - Given a dynamic jump, find the switch jump table in
- * .rodata associated with it.
- *
- * There are 3 basic patterns:
- *
- * 1. jmpq *[rodata addr](,%reg,8)
- *
- *    This is the most common case by far.  It jumps to an address in a simple
- *    jump table which is stored in .rodata.
- *
- * 2. jmpq *[rodata addr](%rip)
- *
- *    This is caused by a rare GCC quirk, currently only seen in three driver
- *    functions in the kernel, only with certain obscure non-distro configs.
- *
- *    As part of an optimization, GCC makes a copy of an existing switch jump
- *    table, modifies it, and then hard-codes the jump (albeit with an indirect
- *    jump) to use a single entry in the table.  The rest of the jump table and
- *    some of its jump targets remain as dead code.
- *
- *    In such a case we can just crudely ignore all unreachable instruction
- *    warnings for the entire object file.  Ideally we would just ignore them
- *    for the function, but that would require redesigning the code quite a
- *    bit.  And honestly that's just not worth doing: unreachable instruction
- *    warnings are of questionable value anyway, and this is such a rare issue.
- *
- * 3. mov [rodata addr],%reg1
- *    ... some instructions ...
- *    jmpq *(%reg1,%reg2,8)
- *
- *    This is a fairly uncommon pattern which is new for GCC 6.  As of this
- *    writing, there are 11 occurrences of it in the allmodconfig kernel.
- *
- *    TODO: Once we have DWARF CFI and smarter instruction decoding logic,
- *    ensure the same register is used in the mov and jump instructions.
- */
-static struct rela *find_switch_table(struct objtool_file *file,
-                                     struct symbol *func,
-                                     struct instruction *insn)
-{
-       struct rela *text_rela, *rodata_rela;
-       struct instruction *orig_insn = insn;
-
-       text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
-       if (text_rela && text_rela->sym == file->rodata->sym) {
-               /* case 1 */
-               rodata_rela = find_rela_by_dest(file->rodata,
-                                               text_rela->addend);
-               if (rodata_rela)
-                       return rodata_rela;
-
-               /* case 2 */
-               rodata_rela = find_rela_by_dest(file->rodata,
-                                               text_rela->addend + 4);
-               if (!rodata_rela)
-                       return NULL;
-               file->ignore_unreachables = true;
-               return rodata_rela;
-       }
-
-       /* case 3 */
-       func_for_each_insn_continue_reverse(file, func, insn) {
-               if (insn->type == INSN_JUMP_DYNAMIC)
-                       break;
-
-               /* allow small jumps within the range */
-               if (insn->type == INSN_JUMP_UNCONDITIONAL &&
-                   insn->jump_dest &&
-                   (insn->jump_dest->offset <= insn->offset ||
-                    insn->jump_dest->offset > orig_insn->offset))
-                   break;
-
-               /* look for a relocation which references .rodata */
-               text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
-                                                   insn->len);
-               if (!text_rela || text_rela->sym != file->rodata->sym)
-                       continue;
-
-               /*
-                * Make sure the .rodata address isn't associated with a
-                * symbol.  gcc jump tables are anonymous data.
-                */
-               if (find_symbol_containing(file->rodata, text_rela->addend))
-                       continue;
-
-               return find_rela_by_dest(file->rodata, text_rela->addend);
-       }
-
-       return NULL;
-}
-
-static int add_func_switch_tables(struct objtool_file *file,
-                                 struct symbol *func)
-{
-       struct instruction *insn, *prev_jump = NULL;
-       struct rela *rela, *prev_rela = NULL;
-       int ret;
-
-       func_for_each_insn(file, func, insn) {
-               if (insn->type != INSN_JUMP_DYNAMIC)
-                       continue;
-
-               rela = find_switch_table(file, func, insn);
-               if (!rela)
-                       continue;
-
-               /*
-                * We found a switch table, but we don't know yet how big it
-                * is.  Don't add it until we reach the end of the function or
-                * the beginning of another switch table in the same function.
-                */
-               if (prev_jump) {
-                       ret = add_switch_table(file, func, prev_jump, prev_rela,
-                                              rela);
-                       if (ret)
-                               return ret;
-               }
-
-               prev_jump = insn;
-               prev_rela = rela;
-       }
-
-       if (prev_jump) {
-               ret = add_switch_table(file, func, prev_jump, prev_rela, NULL);
-               if (ret)
-                       return ret;
-       }
-
-       return 0;
-}
-
-/*
- * For some switch statements, gcc generates a jump table in the .rodata
- * section which contains a list of addresses within the function to jump to.
- * This finds these jump tables and adds them to the insn->alts lists.
- */
-static int add_switch_table_alts(struct objtool_file *file)
-{
-       struct section *sec;
-       struct symbol *func;
-       int ret;
-
-       if (!file->rodata || !file->rodata->rela)
-               return 0;
-
-       list_for_each_entry(sec, &file->elf->sections, list) {
-               list_for_each_entry(func, &sec->symbol_list, list) {
-                       if (func->type != STT_FUNC)
-                               continue;
-
-                       ret = add_func_switch_tables(file, func);
-                       if (ret)
-                               return ret;
-               }
-       }
-
-       return 0;
-}
-
-static int decode_sections(struct objtool_file *file)
-{
-       int ret;
-
-       ret = decode_instructions(file);
-       if (ret)
-               return ret;
-
-       ret = add_dead_ends(file);
-       if (ret)
-               return ret;
-
-       add_ignores(file);
-
-       ret = add_jump_destinations(file);
-       if (ret)
-               return ret;
-
-       ret = add_call_destinations(file);
-       if (ret)
-               return ret;
-
-       ret = add_special_section_alts(file);
-       if (ret)
-               return ret;
-
-       ret = add_switch_table_alts(file);
-       if (ret)
-               return ret;
-
-       return 0;
-}
-
-static bool is_fentry_call(struct instruction *insn)
-{
-       if (insn->type == INSN_CALL &&
-           insn->call_dest->type == STT_NOTYPE &&
-           !strcmp(insn->call_dest->name, "__fentry__"))
-               return true;
-
-       return false;
-}
-
-static bool has_modified_stack_frame(struct instruction *insn)
-{
-       return (insn->state & STATE_FP_SAVED) ||
-              (insn->state & STATE_FP_SETUP);
-}
-
-static bool has_valid_stack_frame(struct instruction *insn)
-{
-       return (insn->state & STATE_FP_SAVED) &&
-              (insn->state & STATE_FP_SETUP);
-}
-
-static unsigned int frame_state(unsigned long state)
-{
-       return (state & (STATE_FP_SAVED | STATE_FP_SETUP));
-}
-
-/*
- * Follow the branch starting at the given instruction, and recursively follow
- * any other branches (jumps).  Meanwhile, track the frame pointer state at
- * each instruction and validate all the rules described in
- * tools/objtool/Documentation/stack-validation.txt.
- */
-static int validate_branch(struct objtool_file *file,
-                          struct instruction *first, unsigned char first_state)
-{
-       struct alternative *alt;
-       struct instruction *insn;
-       struct section *sec;
-       struct symbol *func = NULL;
-       unsigned char state;
-       int ret;
-
-       insn = first;
-       sec = insn->sec;
-       state = first_state;
-
-       if (insn->alt_group && list_empty(&insn->alts)) {
-               WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
-                         sec, insn->offset);
-               return 1;
-       }
-
-       while (1) {
-               if (file->c_file && insn->func) {
-                       if (func && func != insn->func) {
-                               WARN("%s() falls through to next function %s()",
-                                    func->name, insn->func->name);
-                               return 1;
-                       }
-
-                       func = insn->func;
-               }
-
-               if (insn->visited) {
-                       if (frame_state(insn->state) != frame_state(state)) {
-                               WARN_FUNC("frame pointer state mismatch",
-                                         sec, insn->offset);
-                               return 1;
-                       }
-
-                       return 0;
-               }
-
-               insn->visited = true;
-               insn->state = state;
-
-               list_for_each_entry(alt, &insn->alts, list) {
-                       ret = validate_branch(file, alt->insn, state);
-                       if (ret)
-                               return 1;
-               }
-
-               switch (insn->type) {
-
-               case INSN_FP_SAVE:
-                       if (!nofp) {
-                               if (state & STATE_FP_SAVED) {
-                                       WARN_FUNC("duplicate frame pointer save",
-                                                 sec, insn->offset);
-                                       return 1;
-                               }
-                               state |= STATE_FP_SAVED;
-                       }
-                       break;
-
-               case INSN_FP_SETUP:
-                       if (!nofp) {
-                               if (state & STATE_FP_SETUP) {
-                                       WARN_FUNC("duplicate frame pointer setup",
-                                                 sec, insn->offset);
-                                       return 1;
-                               }
-                               state |= STATE_FP_SETUP;
-                       }
-                       break;
-
-               case INSN_FP_RESTORE:
-                       if (!nofp) {
-                               if (has_valid_stack_frame(insn))
-                                       state &= ~STATE_FP_SETUP;
-
-                               state &= ~STATE_FP_SAVED;
-                       }
-                       break;
-
-               case INSN_RETURN:
-                       if (!nofp && has_modified_stack_frame(insn)) {
-                               WARN_FUNC("return without frame pointer restore",
-                                         sec, insn->offset);
-                               return 1;
-                       }
-                       return 0;
-
-               case INSN_CALL:
-                       if (is_fentry_call(insn)) {
-                               state |= STATE_FENTRY;
-                               break;
-                       }
-
-                       ret = dead_end_function(file, insn->call_dest);
-                       if (ret == 1)
-                               return 0;
-                       if (ret == -1)
-                               return 1;
-
-                       /* fallthrough */
-               case INSN_CALL_DYNAMIC:
-                       if (!nofp && !has_valid_stack_frame(insn)) {
-                               WARN_FUNC("call without frame pointer save/setup",
-                                         sec, insn->offset);
-                               return 1;
-                       }
-                       break;
-
-               case INSN_JUMP_CONDITIONAL:
-               case INSN_JUMP_UNCONDITIONAL:
-                       if (insn->jump_dest) {
-                               ret = validate_branch(file, insn->jump_dest,
-                                                     state);
-                               if (ret)
-                                       return 1;
-                       } else if (has_modified_stack_frame(insn)) {
-                               WARN_FUNC("sibling call from callable instruction with changed frame pointer",
-                                         sec, insn->offset);
-                               return 1;
-                       } /* else it's a sibling call */
-
-                       if (insn->type == INSN_JUMP_UNCONDITIONAL)
-                               return 0;
-
-                       break;
-
-               case INSN_JUMP_DYNAMIC:
-                       if (list_empty(&insn->alts) &&
-                           has_modified_stack_frame(insn)) {
-                               WARN_FUNC("sibling call from callable instruction with changed frame pointer",
-                                         sec, insn->offset);
-                               return 1;
-                       }
-
-                       return 0;
-
-               default:
-                       break;
-               }
-
-               if (insn->dead_end)
-                       return 0;
-
-               insn = next_insn_same_sec(file, insn);
-               if (!insn) {
-                       WARN("%s: unexpected end of section", sec->name);
-                       return 1;
-               }
-       }
-
-       return 0;
-}
-
-static bool is_kasan_insn(struct instruction *insn)
-{
-       return (insn->type == INSN_CALL &&
-               !strcmp(insn->call_dest->name, "__asan_handle_no_return"));
-}
-
-static bool is_ubsan_insn(struct instruction *insn)
-{
-       return (insn->type == INSN_CALL &&
-               !strcmp(insn->call_dest->name,
-                       "__ubsan_handle_builtin_unreachable"));
-}
-
-static bool ignore_unreachable_insn(struct symbol *func,
-                                   struct instruction *insn)
-{
-       int i;
-
-       if (insn->type == INSN_NOP)
-               return true;
-
-       /*
-        * Check if this (or a subsequent) instruction is related to
-        * CONFIG_UBSAN or CONFIG_KASAN.
-        *
-        * End the search at 5 instructions to avoid going into the weeds.
-        */
-       for (i = 0; i < 5; i++) {
-
-               if (is_kasan_insn(insn) || is_ubsan_insn(insn))
-                       return true;
-
-               if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
-                       insn = insn->jump_dest;
-                       continue;
-               }
-
-               if (insn->offset + insn->len >= func->offset + func->len)
-                       break;
-               insn = list_next_entry(insn, list);
-       }
-
-       return false;
-}
-
-static int validate_functions(struct objtool_file *file)
-{
-       struct section *sec;
-       struct symbol *func;
-       struct instruction *insn;
-       int ret, warnings = 0;
-
-       list_for_each_entry(sec, &file->elf->sections, list) {
-               list_for_each_entry(func, &sec->symbol_list, list) {
-                       if (func->type != STT_FUNC)
-                               continue;
-
-                       insn = find_insn(file, sec, func->offset);
-                       if (!insn)
-                               continue;
-
-                       ret = validate_branch(file, insn, 0);
-                       warnings += ret;
-               }
-       }
-
-       list_for_each_entry(sec, &file->elf->sections, list) {
-               list_for_each_entry(func, &sec->symbol_list, list) {
-                       if (func->type != STT_FUNC)
-                               continue;
-
-                       func_for_each_insn(file, func, insn) {
-                               if (insn->visited)
-                                       continue;
-
-                               insn->visited = true;
-
-                               if (file->ignore_unreachables || warnings ||
-                                   ignore_unreachable_insn(func, insn))
-                                       continue;
-
-                               /*
-                                * gcov produces a lot of unreachable
-                                * instructions.  If we get an unreachable
-                                * warning and the file has gcov enabled, just
-                                * ignore it, and all other such warnings for
-                                * the file.
-                                */
-                               if (!file->ignore_unreachables &&
-                                   gcov_enabled(file)) {
-                                       file->ignore_unreachables = true;
-                                       continue;
-                               }
-
-                               WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset);
-                               warnings++;
-                       }
-               }
-       }
-
-       return warnings;
-}
-
-static int validate_uncallable_instructions(struct objtool_file *file)
-{
-       struct instruction *insn;
-       int warnings = 0;
-
-       for_each_insn(file, insn) {
-               if (!insn->visited && insn->type == INSN_RETURN) {
-                       WARN_FUNC("return instruction outside of a callable function",
-                                 insn->sec, insn->offset);
-                       warnings++;
-               }
-       }
-
-       return warnings;
-}
-
-static void cleanup(struct objtool_file *file)
-{
-       struct instruction *insn, *tmpinsn;
-       struct alternative *alt, *tmpalt;
-
-       list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) {
-               list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) {
-                       list_del(&alt->list);
-                       free(alt);
-               }
-               list_del(&insn->list);
-               hash_del(&insn->hash);
-               free(insn);
-       }
-       elf_close(file->elf);
-}
-
-const char * const check_usage[] = {
+static const char * const check_usage[] = {
        "objtool check [<options>] file.o",
        NULL,
 };
 
+const struct option check_options[] = {
+       OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"),
+       OPT_END(),
+};
+
 int cmd_check(int argc, const char **argv)
 {
-       struct objtool_file file;
-       int ret, warnings = 0;
-
-       const struct option options[] = {
-               OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"),
-               OPT_END(),
-       };
+       const char *objname;
 
-       argc = parse_options(argc, argv, options, check_usage, 0);
+       argc = parse_options(argc, argv, check_options, check_usage, 0);
 
        if (argc != 1)
-               usage_with_options(check_usage, options);
+               usage_with_options(check_usage, check_options);
 
        objname = argv[0];
 
-       file.elf = elf_open(objname);
-       if (!file.elf) {
-               fprintf(stderr, "error reading elf file %s\n", objname);
-               return 1;
-       }
-
-       INIT_LIST_HEAD(&file.insn_list);
-       hash_init(file.insn_hash);
-       file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
-       file.rodata = find_section_by_name(file.elf, ".rodata");
-       file.ignore_unreachables = false;
-       file.c_file = find_section_by_name(file.elf, ".comment");
-
-       ret = decode_sections(&file);
-       if (ret < 0)
-               goto out;
-       warnings += ret;
-
-       ret = validate_functions(&file);
-       if (ret < 0)
-               goto out;
-       warnings += ret;
-
-       ret = validate_uncallable_instructions(&file);
-       if (ret < 0)
-               goto out;
-       warnings += ret;
-
-out:
-       cleanup(&file);
-
-       /* ignore warnings for now until we get all the code cleaned up */
-       if (ret || warnings)
-               return 0;
-       return 0;
+       return check(objname, nofp);
 }
diff --git a/tools/objtool/cfi.h b/tools/objtool/cfi.h
new file mode 100644 (file)
index 0000000..443ab2c
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _OBJTOOL_CFI_H
+#define _OBJTOOL_CFI_H
+
+#define CFI_UNDEFINED          -1
+#define CFI_CFA                        -2
+#define CFI_SP_INDIRECT                -3
+#define CFI_BP_INDIRECT                -4
+
+#define CFI_AX                 0
+#define CFI_DX                 1
+#define CFI_CX                 2
+#define CFI_BX                 3
+#define CFI_SI                 4
+#define CFI_DI                 5
+#define CFI_BP                 6
+#define CFI_SP                 7
+#define CFI_R8                 8
+#define CFI_R9                 9
+#define CFI_R10                        10
+#define CFI_R11                        11
+#define CFI_R12                        12
+#define CFI_R13                        13
+#define CFI_R14                        14
+#define CFI_R15                        15
+#define CFI_RA                 16
+#define CFI_NUM_REGS   17
+
+struct cfi_reg {
+       int base;
+       int offset;
+};
+
+struct cfi_state {
+       struct cfi_reg cfa;
+       struct cfi_reg regs[CFI_NUM_REGS];
+};
+
+#endif /* _OBJTOOL_CFI_H */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
new file mode 100644 (file)
index 0000000..fea2221
--- /dev/null
@@ -0,0 +1,1655 @@
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+
+#include "check.h"
+#include "elf.h"
+#include "special.h"
+#include "arch.h"
+#include "warn.h"
+
+#include <linux/hashtable.h>
+#include <linux/kernel.h>
+
+struct alternative {
+       struct list_head list;
+       struct instruction *insn;
+};
+
+const char *objname;
+static bool nofp;
+struct cfi_state initial_func_cfi;
+
+static struct instruction *find_insn(struct objtool_file *file,
+                                    struct section *sec, unsigned long offset)
+{
+       struct instruction *insn;
+
+       hash_for_each_possible(file->insn_hash, insn, hash, offset)
+               if (insn->sec == sec && insn->offset == offset)
+                       return insn;
+
+       return NULL;
+}
+
+static struct instruction *next_insn_same_sec(struct objtool_file *file,
+                                             struct instruction *insn)
+{
+       struct instruction *next = list_next_entry(insn, list);
+
+       if (!next || &next->list == &file->insn_list || next->sec != insn->sec)
+               return NULL;
+
+       return next;
+}
+
+static bool gcov_enabled(struct objtool_file *file)
+{
+       struct section *sec;
+       struct symbol *sym;
+
+       for_each_sec(file, sec)
+               list_for_each_entry(sym, &sec->symbol_list, list)
+                       if (!strncmp(sym->name, "__gcov_.", 8))
+                               return true;
+
+       return false;
+}
+
+#define func_for_each_insn(file, func, insn)                           \
+       for (insn = find_insn(file, func->sec, func->offset);           \
+            insn && &insn->list != &file->insn_list &&                 \
+               insn->sec == func->sec &&                               \
+               insn->offset < func->offset + func->len;                \
+            insn = list_next_entry(insn, list))
+
+#define func_for_each_insn_continue_reverse(file, func, insn)          \
+       for (insn = list_prev_entry(insn, list);                        \
+            &insn->list != &file->insn_list &&                         \
+               insn->sec == func->sec && insn->offset >= func->offset; \
+            insn = list_prev_entry(insn, list))
+
+#define sec_for_each_insn_from(file, insn)                             \
+       for (; insn; insn = next_insn_same_sec(file, insn))
+
+#define sec_for_each_insn_continue(file, insn)                         \
+       for (insn = next_insn_same_sec(file, insn); insn;               \
+            insn = next_insn_same_sec(file, insn))
+
+/*
+ * Check if the function has been manually whitelisted with the
+ * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted
+ * due to its use of a context switching instruction.
+ */
+static bool ignore_func(struct objtool_file *file, struct symbol *func)
+{
+       struct rela *rela;
+       struct instruction *insn;
+
+       /* check for STACK_FRAME_NON_STANDARD */
+       if (file->whitelist && file->whitelist->rela)
+               list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) {
+                       if (rela->sym->type == STT_SECTION &&
+                           rela->sym->sec == func->sec &&
+                           rela->addend == func->offset)
+                               return true;
+                       if (rela->sym->type == STT_FUNC && rela->sym == func)
+                               return true;
+               }
+
+       /* check if it has a context switching instruction */
+       func_for_each_insn(file, func, insn)
+               if (insn->type == INSN_CONTEXT_SWITCH)
+                       return true;
+
+       return false;
+}
+
+/*
+ * This checks to see if the given function is a "noreturn" function.
+ *
+ * For global functions which are outside the scope of this object file, we
+ * have to keep a manual list of them.
+ *
+ * For local functions, we have to detect them manually by simply looking for
+ * the lack of a return instruction.
+ *
+ * Returns:
+ *  -1: error
+ *   0: no dead end
+ *   1: dead end
+ */
+static int __dead_end_function(struct objtool_file *file, struct symbol *func,
+                              int recursion)
+{
+       int i;
+       struct instruction *insn;
+       bool empty = true;
+
+       /*
+        * Unfortunately these have to be hard coded because the noreturn
+        * attribute isn't provided in ELF data.
+        */
+       static const char * const global_noreturns[] = {
+               "__stack_chk_fail",
+               "panic",
+               "do_exit",
+               "do_task_dead",
+               "__module_put_and_exit",
+               "complete_and_exit",
+               "kvm_spurious_fault",
+               "__reiserfs_panic",
+               "lbug_with_loc",
+               "fortify_panic",
+       };
+
+       if (func->bind == STB_WEAK)
+               return 0;
+
+       if (func->bind == STB_GLOBAL)
+               for (i = 0; i < ARRAY_SIZE(global_noreturns); i++)
+                       if (!strcmp(func->name, global_noreturns[i]))
+                               return 1;
+
+       if (!func->sec)
+               return 0;
+
+       func_for_each_insn(file, func, insn) {
+               empty = false;
+
+               if (insn->type == INSN_RETURN)
+                       return 0;
+       }
+
+       if (empty)
+               return 0;
+
+       /*
+        * A function can have a sibling call instead of a return.  In that
+        * case, the function's dead-end status depends on whether the target
+        * of the sibling call returns.
+        */
+       func_for_each_insn(file, func, insn) {
+               if (insn->sec != func->sec ||
+                   insn->offset >= func->offset + func->len)
+                       break;
+
+               if (insn->type == INSN_JUMP_UNCONDITIONAL) {
+                       struct instruction *dest = insn->jump_dest;
+                       struct symbol *dest_func;
+
+                       if (!dest)
+                               /* sibling call to another file */
+                               return 0;
+
+                       if (dest->sec != func->sec ||
+                           dest->offset < func->offset ||
+                           dest->offset >= func->offset + func->len) {
+                               /* local sibling call */
+                               dest_func = find_symbol_by_offset(dest->sec,
+                                                                 dest->offset);
+                               if (!dest_func)
+                                       continue;
+
+                               if (recursion == 5) {
+                                       WARN_FUNC("infinite recursion (objtool bug!)",
+                                                 dest->sec, dest->offset);
+                                       return -1;
+                               }
+
+                               return __dead_end_function(file, dest_func,
+                                                          recursion + 1);
+                       }
+               }
+
+               if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts))
+                       /* sibling call */
+                       return 0;
+       }
+
+       return 1;
+}
+
+static int dead_end_function(struct objtool_file *file, struct symbol *func)
+{
+       return __dead_end_function(file, func, 0);
+}
+
+static void clear_insn_state(struct insn_state *state)
+{
+       int i;
+
+       memset(state, 0, sizeof(*state));
+       state->cfa.base = CFI_UNDEFINED;
+       for (i = 0; i < CFI_NUM_REGS; i++)
+               state->regs[i].base = CFI_UNDEFINED;
+       state->drap_reg = CFI_UNDEFINED;
+}
+
+/*
+ * Call the arch-specific instruction decoder for all the instructions and add
+ * them to the global instruction list.
+ */
+static int decode_instructions(struct objtool_file *file)
+{
+       struct section *sec;
+       struct symbol *func;
+       unsigned long offset;
+       struct instruction *insn;
+       int ret;
+
+       for_each_sec(file, sec) {
+
+               if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+                       continue;
+
+               for (offset = 0; offset < sec->len; offset += insn->len) {
+                       insn = malloc(sizeof(*insn));
+                       if (!insn) {
+                               WARN("malloc failed");
+                               return -1;
+                       }
+                       memset(insn, 0, sizeof(*insn));
+                       INIT_LIST_HEAD(&insn->alts);
+                       clear_insn_state(&insn->state);
+
+                       insn->sec = sec;
+                       insn->offset = offset;
+
+                       ret = arch_decode_instruction(file->elf, sec, offset,
+                                                     sec->len - offset,
+                                                     &insn->len, &insn->type,
+                                                     &insn->immediate,
+                                                     &insn->stack_op);
+                       if (ret)
+                               return ret;
+
+                       if (!insn->type || insn->type > INSN_LAST) {
+                               WARN_FUNC("invalid instruction type %d",
+                                         insn->sec, insn->offset, insn->type);
+                               return -1;
+                       }
+
+                       hash_add(file->insn_hash, &insn->hash, insn->offset);
+                       list_add_tail(&insn->list, &file->insn_list);
+               }
+
+               list_for_each_entry(func, &sec->symbol_list, list) {
+                       if (func->type != STT_FUNC)
+                               continue;
+
+                       if (!find_insn(file, sec, func->offset)) {
+                               WARN("%s(): can't find starting instruction",
+                                    func->name);
+                               return -1;
+                       }
+
+                       func_for_each_insn(file, func, insn)
+                               if (!insn->func)
+                                       insn->func = func;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Find all uses of the unreachable() macro, which are code path dead ends.
+ */
+static int add_dead_ends(struct objtool_file *file)
+{
+       struct section *sec;
+       struct rela *rela;
+       struct instruction *insn;
+       bool found;
+
+       sec = find_section_by_name(file->elf, ".rela.discard.unreachable");
+       if (!sec)
+               return 0;
+
+       list_for_each_entry(rela, &sec->rela_list, list) {
+               if (rela->sym->type != STT_SECTION) {
+                       WARN("unexpected relocation symbol type in %s", sec->name);
+                       return -1;
+               }
+               insn = find_insn(file, rela->sym->sec, rela->addend);
+               if (insn)
+                       insn = list_prev_entry(insn, list);
+               else if (rela->addend == rela->sym->sec->len) {
+                       found = false;
+                       list_for_each_entry_reverse(insn, &file->insn_list, list) {
+                               if (insn->sec == rela->sym->sec) {
+                                       found = true;
+                                       break;
+                               }
+                       }
+
+                       if (!found) {
+                               WARN("can't find unreachable insn at %s+0x%x",
+                                    rela->sym->sec->name, rela->addend);
+                               return -1;
+                       }
+               } else {
+                       WARN("can't find unreachable insn at %s+0x%x",
+                            rela->sym->sec->name, rela->addend);
+                       return -1;
+               }
+
+               insn->dead_end = true;
+       }
+
+       return 0;
+}
+
+/*
+ * Warnings shouldn't be reported for ignored functions.
+ */
+static void add_ignores(struct objtool_file *file)
+{
+       struct instruction *insn;
+       struct section *sec;
+       struct symbol *func;
+
+       for_each_sec(file, sec) {
+               list_for_each_entry(func, &sec->symbol_list, list) {
+                       if (func->type != STT_FUNC)
+                               continue;
+
+                       if (!ignore_func(file, func))
+                               continue;
+
+                       func_for_each_insn(file, func, insn)
+                               insn->ignore = true;
+               }
+       }
+}
+
+/*
+ * Find the destination instructions for all jumps.
+ */
+static int add_jump_destinations(struct objtool_file *file)
+{
+       struct instruction *insn;
+       struct rela *rela;
+       struct section *dest_sec;
+       unsigned long dest_off;
+
+       for_each_insn(file, insn) {
+               if (insn->type != INSN_JUMP_CONDITIONAL &&
+                   insn->type != INSN_JUMP_UNCONDITIONAL)
+                       continue;
+
+               if (insn->ignore)
+                       continue;
+
+               rela = find_rela_by_dest_range(insn->sec, insn->offset,
+                                              insn->len);
+               if (!rela) {
+                       dest_sec = insn->sec;
+                       dest_off = insn->offset + insn->len + insn->immediate;
+               } else if (rela->sym->type == STT_SECTION) {
+                       dest_sec = rela->sym->sec;
+                       dest_off = rela->addend + 4;
+               } else if (rela->sym->sec->idx) {
+                       dest_sec = rela->sym->sec;
+                       dest_off = rela->sym->sym.st_value + rela->addend + 4;
+               } else {
+                       /* sibling call */
+                       insn->jump_dest = 0;
+                       continue;
+               }
+
+               insn->jump_dest = find_insn(file, dest_sec, dest_off);
+               if (!insn->jump_dest) {
+
+                       /*
+                        * This is a special case where an alt instruction
+                        * jumps past the end of the section.  These are
+                        * handled later in handle_group_alt().
+                        */
+                       if (!strcmp(insn->sec->name, ".altinstr_replacement"))
+                               continue;
+
+                       WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
+                                 insn->sec, insn->offset, dest_sec->name,
+                                 dest_off);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Find the destination instructions for all calls.
+ */
+static int add_call_destinations(struct objtool_file *file)
+{
+       struct instruction *insn;
+       unsigned long dest_off;
+       struct rela *rela;
+
+       for_each_insn(file, insn) {
+               if (insn->type != INSN_CALL)
+                       continue;
+
+               rela = find_rela_by_dest_range(insn->sec, insn->offset,
+                                              insn->len);
+               if (!rela) {
+                       dest_off = insn->offset + insn->len + insn->immediate;
+                       insn->call_dest = find_symbol_by_offset(insn->sec,
+                                                               dest_off);
+                       if (!insn->call_dest) {
+                               WARN_FUNC("can't find call dest symbol at offset 0x%lx",
+                                         insn->sec, insn->offset, dest_off);
+                               return -1;
+                       }
+               } else if (rela->sym->type == STT_SECTION) {
+                       insn->call_dest = find_symbol_by_offset(rela->sym->sec,
+                                                               rela->addend+4);
+                       if (!insn->call_dest ||
+                           insn->call_dest->type != STT_FUNC) {
+                               WARN_FUNC("can't find call dest symbol at %s+0x%x",
+                                         insn->sec, insn->offset,
+                                         rela->sym->sec->name,
+                                         rela->addend + 4);
+                               return -1;
+                       }
+               } else
+                       insn->call_dest = rela->sym;
+       }
+
+       return 0;
+}
+
+/*
+ * The .alternatives section requires some extra special care, over and above
+ * what other special sections require:
+ *
+ * 1. Because alternatives are patched in-place, we need to insert a fake jump
+ *    instruction at the end so that validate_branch() skips all the original
+ *    replaced instructions when validating the new instruction path.
+ *
+ * 2. An added wrinkle is that the new instruction length might be zero.  In
+ *    that case the old instructions are replaced with noops.  We simulate that
+ *    by creating a fake jump as the only new instruction.
+ *
+ * 3. In some cases, the alternative section includes an instruction which
+ *    conditionally jumps to the _end_ of the entry.  We have to modify these
+ *    jumps' destinations to point back to .text rather than the end of the
+ *    entry in .altinstr_replacement.
+ *
+ * 4. It has been requested that we don't validate the !POPCNT feature path
+ *    which is a "very very small percentage of machines".
+ */
+static int handle_group_alt(struct objtool_file *file,
+                           struct special_alt *special_alt,
+                           struct instruction *orig_insn,
+                           struct instruction **new_insn)
+{
+       struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump;
+       unsigned long dest_off;
+
+       last_orig_insn = NULL;
+       insn = orig_insn;
+       sec_for_each_insn_from(file, insn) {
+               if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
+                       break;
+
+               if (special_alt->skip_orig)
+                       insn->type = INSN_NOP;
+
+               insn->alt_group = true;
+               last_orig_insn = insn;
+       }
+
+       if (!next_insn_same_sec(file, last_orig_insn)) {
+               WARN("%s: don't know how to handle alternatives at end of section",
+                    special_alt->orig_sec->name);
+               return -1;
+       }
+
+       fake_jump = malloc(sizeof(*fake_jump));
+       if (!fake_jump) {
+               WARN("malloc failed");
+               return -1;
+       }
+       memset(fake_jump, 0, sizeof(*fake_jump));
+       INIT_LIST_HEAD(&fake_jump->alts);
+       clear_insn_state(&fake_jump->state);
+
+       fake_jump->sec = special_alt->new_sec;
+       fake_jump->offset = -1;
+       fake_jump->type = INSN_JUMP_UNCONDITIONAL;
+       fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
+       fake_jump->ignore = true;
+
+       if (!special_alt->new_len) {
+               *new_insn = fake_jump;
+               return 0;
+       }
+
+       last_new_insn = NULL;
+       insn = *new_insn;
+       sec_for_each_insn_from(file, insn) {
+               if (insn->offset >= special_alt->new_off + special_alt->new_len)
+                       break;
+
+               last_new_insn = insn;
+
+               if (insn->type != INSN_JUMP_CONDITIONAL &&
+                   insn->type != INSN_JUMP_UNCONDITIONAL)
+                       continue;
+
+               if (!insn->immediate)
+                       continue;
+
+               dest_off = insn->offset + insn->len + insn->immediate;
+               if (dest_off == special_alt->new_off + special_alt->new_len)
+                       insn->jump_dest = fake_jump;
+
+               if (!insn->jump_dest) {
+                       WARN_FUNC("can't find alternative jump destination",
+                                 insn->sec, insn->offset);
+                       return -1;
+               }
+       }
+
+       if (!last_new_insn) {
+               WARN_FUNC("can't find last new alternative instruction",
+                         special_alt->new_sec, special_alt->new_off);
+               return -1;
+       }
+
+       list_add(&fake_jump->list, &last_new_insn->list);
+
+       return 0;
+}
+
+/*
+ * A jump table entry can either convert a nop to a jump or a jump to a nop.
+ * If the original instruction is a jump, make the alt entry an effective nop
+ * by just skipping the original instruction.
+ */
+static int handle_jump_alt(struct objtool_file *file,
+                          struct special_alt *special_alt,
+                          struct instruction *orig_insn,
+                          struct instruction **new_insn)
+{
+       if (orig_insn->type == INSN_NOP)
+               return 0;
+
+       if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
+               WARN_FUNC("unsupported instruction at jump label",
+                         orig_insn->sec, orig_insn->offset);
+               return -1;
+       }
+
+       *new_insn = list_next_entry(orig_insn, list);
+       return 0;
+}
+
+/*
+ * Read all the special sections which have alternate instructions which can be
+ * patched in or redirected to at runtime.  Each instruction having alternate
+ * instruction(s) has them added to its insn->alts list, which will be
+ * traversed in validate_branch().
+ */
+static int add_special_section_alts(struct objtool_file *file)
+{
+       struct list_head special_alts;
+       struct instruction *orig_insn, *new_insn;
+       struct special_alt *special_alt, *tmp;
+       struct alternative *alt;
+       int ret;
+
+       ret = special_get_alts(file->elf, &special_alts);
+       if (ret)
+               return ret;
+
+       list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
+               alt = malloc(sizeof(*alt));
+               if (!alt) {
+                       WARN("malloc failed");
+                       ret = -1;
+                       goto out;
+               }
+
+               orig_insn = find_insn(file, special_alt->orig_sec,
+                                     special_alt->orig_off);
+               if (!orig_insn) {
+                       WARN_FUNC("special: can't find orig instruction",
+                                 special_alt->orig_sec, special_alt->orig_off);
+                       ret = -1;
+                       goto out;
+               }
+
+               new_insn = NULL;
+               if (!special_alt->group || special_alt->new_len) {
+                       new_insn = find_insn(file, special_alt->new_sec,
+                                            special_alt->new_off);
+                       if (!new_insn) {
+                               WARN_FUNC("special: can't find new instruction",
+                                         special_alt->new_sec,
+                                         special_alt->new_off);
+                               ret = -1;
+                               goto out;
+                       }
+               }
+
+               if (special_alt->group) {
+                       ret = handle_group_alt(file, special_alt, orig_insn,
+                                              &new_insn);
+                       if (ret)
+                               goto out;
+               } else if (special_alt->jump_or_nop) {
+                       ret = handle_jump_alt(file, special_alt, orig_insn,
+                                             &new_insn);
+                       if (ret)
+                               goto out;
+               }
+
+               alt->insn = new_insn;
+               list_add_tail(&alt->list, &orig_insn->alts);
+
+               list_del(&special_alt->list);
+               free(special_alt);
+       }
+
+out:
+       return ret;
+}
+
+static int add_switch_table(struct objtool_file *file, struct symbol *func,
+                           struct instruction *insn, struct rela *table,
+                           struct rela *next_table)
+{
+       struct rela *rela = table;
+       struct instruction *alt_insn;
+       struct alternative *alt;
+
+       list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
+               if (rela == next_table)
+                       break;
+
+               if (rela->sym->sec != insn->sec ||
+                   rela->addend <= func->offset ||
+                   rela->addend >= func->offset + func->len)
+                       break;
+
+               alt_insn = find_insn(file, insn->sec, rela->addend);
+               if (!alt_insn) {
+                       WARN("%s: can't find instruction at %s+0x%x",
+                            file->rodata->rela->name, insn->sec->name,
+                            rela->addend);
+                       return -1;
+               }
+
+               alt = malloc(sizeof(*alt));
+               if (!alt) {
+                       WARN("malloc failed");
+                       return -1;
+               }
+
+               alt->insn = alt_insn;
+               list_add_tail(&alt->list, &insn->alts);
+       }
+
+       return 0;
+}
+
+/*
+ * find_switch_table() - Given a dynamic jump, find the switch jump table in
+ * .rodata associated with it.
+ *
+ * There are 3 basic patterns:
+ *
+ * 1. jmpq *[rodata addr](,%reg,8)
+ *
+ *    This is the most common case by far.  It jumps to an address in a simple
+ *    jump table which is stored in .rodata.
+ *
+ * 2. jmpq *[rodata addr](%rip)
+ *
+ *    This is caused by a rare GCC quirk, currently only seen in three driver
+ *    functions in the kernel, only with certain obscure non-distro configs.
+ *
+ *    As part of an optimization, GCC makes a copy of an existing switch jump
+ *    table, modifies it, and then hard-codes the jump (albeit with an indirect
+ *    jump) to use a single entry in the table.  The rest of the jump table and
+ *    some of its jump targets remain as dead code.
+ *
+ *    In such a case we can just crudely ignore all unreachable instruction
+ *    warnings for the entire object file.  Ideally we would just ignore them
+ *    for the function, but that would require redesigning the code quite a
+ *    bit.  And honestly that's just not worth doing: unreachable instruction
+ *    warnings are of questionable value anyway, and this is such a rare issue.
+ *
+ * 3. mov [rodata addr],%reg1
+ *    ... some instructions ...
+ *    jmpq *(%reg1,%reg2,8)
+ *
+ *    This is a fairly uncommon pattern which is new for GCC 6.  As of this
+ *    writing, there are 11 occurrences of it in the allmodconfig kernel.
+ *
+ *    TODO: Once we have DWARF CFI and smarter instruction decoding logic,
+ *    ensure the same register is used in the mov and jump instructions.
+ */
+static struct rela *find_switch_table(struct objtool_file *file,
+                                     struct symbol *func,
+                                     struct instruction *insn)
+{
+       struct rela *text_rela, *rodata_rela;
+       struct instruction *orig_insn = insn;
+
+       text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
+       if (text_rela && text_rela->sym == file->rodata->sym) {
+               /* case 1 */
+               rodata_rela = find_rela_by_dest(file->rodata,
+                                               text_rela->addend);
+               if (rodata_rela)
+                       return rodata_rela;
+
+               /* case 2 */
+               rodata_rela = find_rela_by_dest(file->rodata,
+                                               text_rela->addend + 4);
+               if (!rodata_rela)
+                       return NULL;
+               file->ignore_unreachables = true;
+               return rodata_rela;
+       }
+
+       /* case 3 */
+       func_for_each_insn_continue_reverse(file, func, insn) {
+               if (insn->type == INSN_JUMP_DYNAMIC)
+                       break;
+
+               /* allow small jumps within the range */
+               if (insn->type == INSN_JUMP_UNCONDITIONAL &&
+                   insn->jump_dest &&
+                   (insn->jump_dest->offset <= insn->offset ||
+                    insn->jump_dest->offset > orig_insn->offset))
+                   break;
+
+               /* look for a relocation which references .rodata */
+               text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
+                                                   insn->len);
+               if (!text_rela || text_rela->sym != file->rodata->sym)
+                       continue;
+
+               /*
+                * Make sure the .rodata address isn't associated with a
+                * symbol.  gcc jump tables are anonymous data.
+                */
+               if (find_symbol_containing(file->rodata, text_rela->addend))
+                       continue;
+
+               return find_rela_by_dest(file->rodata, text_rela->addend);
+       }
+
+       return NULL;
+}
+
+static int add_func_switch_tables(struct objtool_file *file,
+                                 struct symbol *func)
+{
+       struct instruction *insn, *prev_jump = NULL;
+       struct rela *rela, *prev_rela = NULL;
+       int ret;
+
+       func_for_each_insn(file, func, insn) {
+               if (insn->type != INSN_JUMP_DYNAMIC)
+                       continue;
+
+               rela = find_switch_table(file, func, insn);
+               if (!rela)
+                       continue;
+
+               /*
+                * We found a switch table, but we don't know yet how big it
+                * is.  Don't add it until we reach the end of the function or
+                * the beginning of another switch table in the same function.
+                */
+               if (prev_jump) {
+                       ret = add_switch_table(file, func, prev_jump, prev_rela,
+                                              rela);
+                       if (ret)
+                               return ret;
+               }
+
+               prev_jump = insn;
+               prev_rela = rela;
+       }
+
+       if (prev_jump) {
+               ret = add_switch_table(file, func, prev_jump, prev_rela, NULL);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+/*
+ * For some switch statements, gcc generates a jump table in the .rodata
+ * section which contains a list of addresses within the function to jump to.
+ * This finds these jump tables and adds them to the insn->alts lists.
+ */
+static int add_switch_table_alts(struct objtool_file *file)
+{
+       struct section *sec;
+       struct symbol *func;
+       int ret;
+
+       if (!file->rodata || !file->rodata->rela)
+               return 0;
+
+       for_each_sec(file, sec) {
+               list_for_each_entry(func, &sec->symbol_list, list) {
+                       if (func->type != STT_FUNC)
+                               continue;
+
+                       ret = add_func_switch_tables(file, func);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       return 0;
+}
+
+static int decode_sections(struct objtool_file *file)
+{
+       int ret;
+
+       ret = decode_instructions(file);
+       if (ret)
+               return ret;
+
+       ret = add_dead_ends(file);
+       if (ret)
+               return ret;
+
+       add_ignores(file);
+
+       ret = add_jump_destinations(file);
+       if (ret)
+               return ret;
+
+       ret = add_call_destinations(file);
+       if (ret)
+               return ret;
+
+       ret = add_special_section_alts(file);
+       if (ret)
+               return ret;
+
+       ret = add_switch_table_alts(file);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static bool is_fentry_call(struct instruction *insn)
+{
+       if (insn->type == INSN_CALL &&
+           insn->call_dest->type == STT_NOTYPE &&
+           !strcmp(insn->call_dest->name, "__fentry__"))
+               return true;
+
+       return false;
+}
+
+static bool has_modified_stack_frame(struct insn_state *state)
+{
+       int i;
+
+       if (state->cfa.base != initial_func_cfi.cfa.base ||
+           state->cfa.offset != initial_func_cfi.cfa.offset ||
+           state->stack_size != initial_func_cfi.cfa.offset ||
+           state->drap)
+               return true;
+
+       for (i = 0; i < CFI_NUM_REGS; i++)
+               if (state->regs[i].base != initial_func_cfi.regs[i].base ||
+                   state->regs[i].offset != initial_func_cfi.regs[i].offset)
+                       return true;
+
+       return false;
+}
+
+static bool has_valid_stack_frame(struct insn_state *state)
+{
+       if (state->cfa.base == CFI_BP && state->regs[CFI_BP].base == CFI_CFA &&
+           state->regs[CFI_BP].offset == -16)
+               return true;
+
+       if (state->drap && state->regs[CFI_BP].base == CFI_BP)
+               return true;
+
+       return false;
+}
+
+static void save_reg(struct insn_state *state, unsigned char reg, int base,
+                    int offset)
+{
+       if ((arch_callee_saved_reg(reg) ||
+           (state->drap && reg == state->drap_reg)) &&
+           state->regs[reg].base == CFI_UNDEFINED) {
+               state->regs[reg].base = base;
+               state->regs[reg].offset = offset;
+       }
+}
+
+static void restore_reg(struct insn_state *state, unsigned char reg)
+{
+       state->regs[reg].base = CFI_UNDEFINED;
+       state->regs[reg].offset = 0;
+}
+
+/*
+ * A note about DRAP stack alignment:
+ *
+ * GCC has the concept of a DRAP register, which is used to help keep track of
+ * the stack pointer when aligning the stack.  r10 or r13 is used as the DRAP
+ * register.  The typical DRAP pattern is:
+ *
+ *   4c 8d 54 24 08            lea    0x8(%rsp),%r10
+ *   48 83 e4 c0               and    $0xffffffffffffffc0,%rsp
+ *   41 ff 72 f8               pushq  -0x8(%r10)
+ *   55                                push   %rbp
+ *   48 89 e5                  mov    %rsp,%rbp
+ *                             (more pushes)
+ *   41 52                     push   %r10
+ *                             ...
+ *   41 5a                     pop    %r10
+ *                             (more pops)
+ *   5d                                pop    %rbp
+ *   49 8d 62 f8               lea    -0x8(%r10),%rsp
+ *   c3                                retq
+ *
+ * There are some variations in the epilogues, like:
+ *
+ *   5b                                pop    %rbx
+ *   41 5a                     pop    %r10
+ *   41 5c                     pop    %r12
+ *   41 5d                     pop    %r13
+ *   41 5e                     pop    %r14
+ *   c9                                leaveq
+ *   49 8d 62 f8               lea    -0x8(%r10),%rsp
+ *   c3                                retq
+ *
+ * and:
+ *
+ *   4c 8b 55 e8               mov    -0x18(%rbp),%r10
+ *   48 8b 5d e0               mov    -0x20(%rbp),%rbx
+ *   4c 8b 65 f0               mov    -0x10(%rbp),%r12
+ *   4c 8b 6d f8               mov    -0x8(%rbp),%r13
+ *   c9                                leaveq
+ *   49 8d 62 f8               lea    -0x8(%r10),%rsp
+ *   c3                                retq
+ *
+ * Sometimes r13 is used as the DRAP register, in which case it's saved and
+ * restored beforehand:
+ *
+ *   41 55                     push   %r13
+ *   4c 8d 6c 24 10            lea    0x10(%rsp),%r13
+ *   48 83 e4 f0               and    $0xfffffffffffffff0,%rsp
+ *                             ...
+ *   49 8d 65 f0               lea    -0x10(%r13),%rsp
+ *   41 5d                     pop    %r13
+ *   c3                                retq
+ */
+static int update_insn_state(struct instruction *insn, struct insn_state *state)
+{
+       struct stack_op *op = &insn->stack_op;
+       struct cfi_reg *cfa = &state->cfa;
+       struct cfi_reg *regs = state->regs;
+
+       /* stack operations don't make sense with an undefined CFA */
+       if (cfa->base == CFI_UNDEFINED) {
+               if (insn->func) {
+                       WARN_FUNC("undefined stack state", insn->sec, insn->offset);
+                       return -1;
+               }
+               return 0;
+       }
+
+       switch (op->dest.type) {
+
+       case OP_DEST_REG:
+               switch (op->src.type) {
+
+               case OP_SRC_REG:
+                       if (cfa->base == op->src.reg && cfa->base == CFI_SP &&
+                           op->dest.reg == CFI_BP && regs[CFI_BP].base == CFI_CFA &&
+                           regs[CFI_BP].offset == -cfa->offset) {
+
+                               /* mov %rsp, %rbp */
+                               cfa->base = op->dest.reg;
+                               state->bp_scratch = false;
+                       } else if (state->drap) {
+
+                               /* drap: mov %rsp, %rbp */
+                               regs[CFI_BP].base = CFI_BP;
+                               regs[CFI_BP].offset = -state->stack_size;
+                               state->bp_scratch = false;
+                       } else if (!nofp) {
+
+                               WARN_FUNC("unknown stack-related register move",
+                                         insn->sec, insn->offset);
+                               return -1;
+                       }
+
+                       break;
+
+               case OP_SRC_ADD:
+                       if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) {
+
+                               /* add imm, %rsp */
+                               state->stack_size -= op->src.offset;
+                               if (cfa->base == CFI_SP)
+                                       cfa->offset -= op->src.offset;
+                               break;
+                       }
+
+                       if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) {
+
+                               /* lea disp(%rbp), %rsp */
+                               state->stack_size = -(op->src.offset + regs[CFI_BP].offset);
+                               break;
+                       }
+
+                       if (op->dest.reg != CFI_BP && op->src.reg == CFI_SP &&
+                           cfa->base == CFI_SP) {
+
+                               /* drap: lea disp(%rsp), %drap */
+                               state->drap_reg = op->dest.reg;
+                               break;
+                       }
+
+                       if (state->drap && op->dest.reg == CFI_SP &&
+                           op->src.reg == state->drap_reg) {
+
+                                /* drap: lea disp(%drap), %rsp */
+                               cfa->base = CFI_SP;
+                               cfa->offset = state->stack_size = -op->src.offset;
+                               state->drap_reg = CFI_UNDEFINED;
+                               state->drap = false;
+                               break;
+                       }
+
+                       if (op->dest.reg == state->cfa.base) {
+                               WARN_FUNC("unsupported stack register modification",
+                                         insn->sec, insn->offset);
+                               return -1;
+                       }
+
+                       break;
+
+               case OP_SRC_AND:
+                       if (op->dest.reg != CFI_SP ||
+                           (state->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) ||
+                           (state->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) {
+                               WARN_FUNC("unsupported stack pointer realignment",
+                                         insn->sec, insn->offset);
+                               return -1;
+                       }
+
+                       if (state->drap_reg != CFI_UNDEFINED) {
+                               /* drap: and imm, %rsp */
+                               cfa->base = state->drap_reg;
+                               cfa->offset = state->stack_size = 0;
+                               state->drap = true;
+
+                       }
+
+                       /*
+                        * Older versions of GCC (4.8ish) realign the stack
+                        * without DRAP, with a frame pointer.
+                        */
+
+                       break;
+
+               case OP_SRC_POP:
+                       if (!state->drap && op->dest.type == OP_DEST_REG &&
+                           op->dest.reg == cfa->base) {
+
+                               /* pop %rbp */
+                               cfa->base = CFI_SP;
+                       }
+
+                       if (regs[op->dest.reg].offset == -state->stack_size) {
+
+                               if (state->drap && cfa->base == CFI_BP_INDIRECT &&
+                                   op->dest.type == OP_DEST_REG &&
+                                   op->dest.reg == state->drap_reg) {
+
+                                       /* drap: pop %drap */
+                                       cfa->base = state->drap_reg;
+                                       cfa->offset = 0;
+                               }
+
+                               restore_reg(state, op->dest.reg);
+                       }
+
+                       state->stack_size -= 8;
+                       if (cfa->base == CFI_SP)
+                               cfa->offset -= 8;
+
+                       break;
+
+               case OP_SRC_REG_INDIRECT:
+                       if (state->drap && op->src.reg == CFI_BP &&
+                           op->src.offset == regs[op->dest.reg].offset) {
+
+                               /* drap: mov disp(%rbp), %reg */
+                               if (op->dest.reg == state->drap_reg) {
+                                       cfa->base = state->drap_reg;
+                                       cfa->offset = 0;
+                               }
+
+                               restore_reg(state, op->dest.reg);
+
+                       } else if (op->src.reg == cfa->base &&
+                           op->src.offset == regs[op->dest.reg].offset + cfa->offset) {
+
+                               /* mov disp(%rbp), %reg */
+                               /* mov disp(%rsp), %reg */
+                               restore_reg(state, op->dest.reg);
+                       }
+
+                       break;
+
+               default:
+                       WARN_FUNC("unknown stack-related instruction",
+                                 insn->sec, insn->offset);
+                       return -1;
+               }
+
+               break;
+
+       case OP_DEST_PUSH:
+               state->stack_size += 8;
+               if (cfa->base == CFI_SP)
+                       cfa->offset += 8;
+
+               if (op->src.type != OP_SRC_REG)
+                       break;
+
+               if (state->drap) {
+                       if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+
+                               /* drap: push %drap */
+                               cfa->base = CFI_BP_INDIRECT;
+                               cfa->offset = -state->stack_size;
+
+                               /* save drap so we know when to undefine it */
+                               save_reg(state, op->src.reg, CFI_CFA, -state->stack_size);
+
+                       } else if (op->src.reg == CFI_BP && cfa->base == state->drap_reg) {
+
+                               /* drap: push %rbp */
+                               state->stack_size = 0;
+
+                       } else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+
+                               /* drap: push %reg */
+                               save_reg(state, op->src.reg, CFI_BP, -state->stack_size);
+                       }
+
+               } else {
+
+                       /* push %reg */
+                       save_reg(state, op->src.reg, CFI_CFA, -state->stack_size);
+               }
+
+               /* detect when asm code uses rbp as a scratch register */
+               if (!nofp && insn->func && op->src.reg == CFI_BP &&
+                   cfa->base != CFI_BP)
+                       state->bp_scratch = true;
+               break;
+
+       case OP_DEST_REG_INDIRECT:
+
+               if (state->drap) {
+                       if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+
+                               /* drap: mov %drap, disp(%rbp) */
+                               cfa->base = CFI_BP_INDIRECT;
+                               cfa->offset = op->dest.offset;
+
+                               /* save drap so we know when to undefine it */
+                               save_reg(state, op->src.reg, CFI_CFA, op->dest.offset);
+                       }
+
+                       else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+
+                               /* drap: mov reg, disp(%rbp) */
+                               save_reg(state, op->src.reg, CFI_BP, op->dest.offset);
+                       }
+
+               } else if (op->dest.reg == cfa->base) {
+
+                       /* mov reg, disp(%rbp) */
+                       /* mov reg, disp(%rsp) */
+                       save_reg(state, op->src.reg, CFI_CFA,
+                                op->dest.offset - state->cfa.offset);
+               }
+
+               break;
+
+       case OP_DEST_LEAVE:
+               if ((!state->drap && cfa->base != CFI_BP) ||
+                   (state->drap && cfa->base != state->drap_reg)) {
+                       WARN_FUNC("leave instruction with modified stack frame",
+                                 insn->sec, insn->offset);
+                       return -1;
+               }
+
+               /* leave (mov %rbp, %rsp; pop %rbp) */
+
+               state->stack_size = -state->regs[CFI_BP].offset - 8;
+               restore_reg(state, CFI_BP);
+
+               if (!state->drap) {
+                       cfa->base = CFI_SP;
+                       cfa->offset -= 8;
+               }
+
+               break;
+
+       case OP_DEST_MEM:
+               if (op->src.type != OP_SRC_POP) {
+                       WARN_FUNC("unknown stack-related memory operation",
+                                 insn->sec, insn->offset);
+                       return -1;
+               }
+
+               /* pop mem */
+               state->stack_size -= 8;
+               if (cfa->base == CFI_SP)
+                       cfa->offset -= 8;
+
+               break;
+
+       default:
+               WARN_FUNC("unknown stack-related instruction",
+                         insn->sec, insn->offset);
+               return -1;
+       }
+
+       return 0;
+}
+
+static bool insn_state_match(struct instruction *insn, struct insn_state *state)
+{
+       struct insn_state *state1 = &insn->state, *state2 = state;
+       int i;
+
+       if (memcmp(&state1->cfa, &state2->cfa, sizeof(state1->cfa))) {
+               WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d",
+                         insn->sec, insn->offset,
+                         state1->cfa.base, state1->cfa.offset,
+                         state2->cfa.base, state2->cfa.offset);
+
+       } else if (memcmp(&state1->regs, &state2->regs, sizeof(state1->regs))) {
+               for (i = 0; i < CFI_NUM_REGS; i++) {
+                       if (!memcmp(&state1->regs[i], &state2->regs[i],
+                                   sizeof(struct cfi_reg)))
+                               continue;
+
+                       WARN_FUNC("stack state mismatch: reg1[%d]=%d%+d reg2[%d]=%d%+d",
+                                 insn->sec, insn->offset,
+                                 i, state1->regs[i].base, state1->regs[i].offset,
+                                 i, state2->regs[i].base, state2->regs[i].offset);
+                       break;
+               }
+
+       } else if (state1->drap != state2->drap ||
+                (state1->drap && state1->drap_reg != state2->drap_reg)) {
+               WARN_FUNC("stack state mismatch: drap1=%d(%d) drap2=%d(%d)",
+                         insn->sec, insn->offset,
+                         state1->drap, state1->drap_reg,
+                         state2->drap, state2->drap_reg);
+
+       } else
+               return true;
+
+       return false;
+}
+
+/*
+ * Follow the branch starting at the given instruction, and recursively follow
+ * any other branches (jumps).  Meanwhile, track the frame pointer state at
+ * each instruction and validate all the rules described in
+ * tools/objtool/Documentation/stack-validation.txt.
+ */
+static int validate_branch(struct objtool_file *file, struct instruction *first,
+                          struct insn_state state)
+{
+       struct alternative *alt;
+       struct instruction *insn;
+       struct section *sec;
+       struct symbol *func = NULL;
+       int ret;
+
+       insn = first;
+       sec = insn->sec;
+
+       if (insn->alt_group && list_empty(&insn->alts)) {
+               WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
+                         sec, insn->offset);
+               return -1;
+       }
+
+       while (1) {
+               if (file->c_file && insn->func) {
+                       if (func && func != insn->func) {
+                               WARN("%s() falls through to next function %s()",
+                                    func->name, insn->func->name);
+                               return 1;
+                       }
+               }
+
+               func = insn->func;
+
+               if (insn->visited) {
+                       if (!!insn_state_match(insn, &state))
+                               return 1;
+
+                       return 0;
+               }
+
+               insn->state = state;
+
+               insn->visited = true;
+
+               list_for_each_entry(alt, &insn->alts, list) {
+                       ret = validate_branch(file, alt->insn, state);
+                       if (ret)
+                               return 1;
+               }
+
+               switch (insn->type) {
+
+               case INSN_RETURN:
+                       if (func && has_modified_stack_frame(&state)) {
+                               WARN_FUNC("return with modified stack frame",
+                                         sec, insn->offset);
+                               return 1;
+                       }
+
+                       if (state.bp_scratch) {
+                               WARN("%s uses BP as a scratch register",
+                                    insn->func->name);
+                               return 1;
+                       }
+
+                       return 0;
+
+               case INSN_CALL:
+                       if (is_fentry_call(insn))
+                               break;
+
+                       ret = dead_end_function(file, insn->call_dest);
+                       if (ret == 1)
+                               return 0;
+                       if (ret == -1)
+                               return 1;
+
+                       /* fallthrough */
+               case INSN_CALL_DYNAMIC:
+                       if (!nofp && func && !has_valid_stack_frame(&state)) {
+                               WARN_FUNC("call without frame pointer save/setup",
+                                         sec, insn->offset);
+                               return 1;
+                       }
+                       break;
+
+               case INSN_JUMP_CONDITIONAL:
+               case INSN_JUMP_UNCONDITIONAL:
+                       if (insn->jump_dest) {
+                               ret = validate_branch(file, insn->jump_dest,
+                                                     state);
+                               if (ret)
+                                       return 1;
+                       } else if (func && has_modified_stack_frame(&state)) {
+                               WARN_FUNC("sibling call from callable instruction with modified stack frame",
+                                         sec, insn->offset);
+                               return 1;
+                       } /* else it's a sibling call */
+
+                       if (insn->type == INSN_JUMP_UNCONDITIONAL)
+                               return 0;
+
+                       break;
+
+               case INSN_JUMP_DYNAMIC:
+                       if (func && list_empty(&insn->alts) &&
+                           has_modified_stack_frame(&state)) {
+                               WARN_FUNC("sibling call from callable instruction with modified stack frame",
+                                         sec, insn->offset);
+                               return 1;
+                       }
+
+                       return 0;
+
+               case INSN_STACK:
+                       if (update_insn_state(insn, &state))
+                               return -1;
+
+                       break;
+
+               default:
+                       break;
+               }
+
+               if (insn->dead_end)
+                       return 0;
+
+               insn = next_insn_same_sec(file, insn);
+               if (!insn) {
+                       WARN("%s: unexpected end of section", sec->name);
+                       return 1;
+               }
+       }
+
+       return 0;
+}
+
+static bool is_kasan_insn(struct instruction *insn)
+{
+       return (insn->type == INSN_CALL &&
+               !strcmp(insn->call_dest->name, "__asan_handle_no_return"));
+}
+
+static bool is_ubsan_insn(struct instruction *insn)
+{
+       return (insn->type == INSN_CALL &&
+               !strcmp(insn->call_dest->name,
+                       "__ubsan_handle_builtin_unreachable"));
+}
+
+static bool ignore_unreachable_insn(struct instruction *insn)
+{
+       int i;
+
+       if (insn->ignore || insn->type == INSN_NOP)
+               return true;
+
+       /*
+        * Ignore any unused exceptions.  This can happen when a whitelisted
+        * function has an exception table entry.
+        */
+       if (!strcmp(insn->sec->name, ".fixup"))
+               return true;
+
+       /*
+        * Check if this (or a subsequent) instruction is related to
+        * CONFIG_UBSAN or CONFIG_KASAN.
+        *
+        * End the search at 5 instructions to avoid going into the weeds.
+        */
+       if (!insn->func)
+               return false;
+       for (i = 0; i < 5; i++) {
+
+               if (is_kasan_insn(insn) || is_ubsan_insn(insn))
+                       return true;
+
+               if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
+                       insn = insn->jump_dest;
+                       continue;
+               }
+
+               if (insn->offset + insn->len >= insn->func->offset + insn->func->len)
+                       break;
+               insn = list_next_entry(insn, list);
+       }
+
+       return false;
+}
+
+static int validate_functions(struct objtool_file *file)
+{
+       struct section *sec;
+       struct symbol *func;
+       struct instruction *insn;
+       struct insn_state state;
+       int ret, warnings = 0;
+
+       clear_insn_state(&state);
+
+       state.cfa = initial_func_cfi.cfa;
+       memcpy(&state.regs, &initial_func_cfi.regs,
+              CFI_NUM_REGS * sizeof(struct cfi_reg));
+       state.stack_size = initial_func_cfi.cfa.offset;
+
+       for_each_sec(file, sec) {
+               list_for_each_entry(func, &sec->symbol_list, list) {
+                       if (func->type != STT_FUNC)
+                               continue;
+
+                       insn = find_insn(file, sec, func->offset);
+                       if (!insn || insn->ignore)
+                               continue;
+
+                       ret = validate_branch(file, insn, state);
+                       warnings += ret;
+               }
+       }
+
+       return warnings;
+}
+
+static int validate_reachable_instructions(struct objtool_file *file)
+{
+       struct instruction *insn;
+
+       if (file->ignore_unreachables)
+               return 0;
+
+       for_each_insn(file, insn) {
+               if (insn->visited || ignore_unreachable_insn(insn))
+                       continue;
+
+               /*
+                * gcov produces a lot of unreachable instructions.  If we get
+                * an unreachable warning and the file has gcov enabled, just
+                * ignore it, and all other such warnings for the file.  Do
+                * this here because this is an expensive function.
+                */
+               if (gcov_enabled(file))
+                       return 0;
+
+               WARN_FUNC("unreachable instruction", insn->sec, insn->offset);
+               return 1;
+       }
+
+       return 0;
+}
+
+static void cleanup(struct objtool_file *file)
+{
+       struct instruction *insn, *tmpinsn;
+       struct alternative *alt, *tmpalt;
+
+       list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) {
+               list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) {
+                       list_del(&alt->list);
+                       free(alt);
+               }
+               list_del(&insn->list);
+               hash_del(&insn->hash);
+               free(insn);
+       }
+       elf_close(file->elf);
+}
+
+int check(const char *_objname, bool _nofp)
+{
+       struct objtool_file file;
+       int ret, warnings = 0;
+
+       objname = _objname;
+       nofp = _nofp;
+
+       file.elf = elf_open(objname);
+       if (!file.elf)
+               return 1;
+
+       INIT_LIST_HEAD(&file.insn_list);
+       hash_init(file.insn_hash);
+       file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
+       file.rodata = find_section_by_name(file.elf, ".rodata");
+       file.ignore_unreachables = false;
+       file.c_file = find_section_by_name(file.elf, ".comment");
+
+       arch_initial_func_cfi_state(&initial_func_cfi);
+
+       ret = decode_sections(&file);
+       if (ret < 0)
+               goto out;
+       warnings += ret;
+
+       if (list_empty(&file.insn_list))
+               goto out;
+
+       ret = validate_functions(&file);
+       if (ret < 0)
+               goto out;
+       warnings += ret;
+
+       if (!warnings) {
+               ret = validate_reachable_instructions(&file);
+               if (ret < 0)
+                       goto out;
+               warnings += ret;
+       }
+
+out:
+       cleanup(&file);
+
+       /* ignore warnings for now until we get all the code cleaned up */
+       if (ret || warnings)
+               return 0;
+       return 0;
+}
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
new file mode 100644 (file)
index 0000000..da85f5b
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _CHECK_H
+#define _CHECK_H
+
+#include <stdbool.h>
+#include "elf.h"
+#include "cfi.h"
+#include "arch.h"
+#include <linux/hashtable.h>
+
+struct insn_state {
+       struct cfi_reg cfa;
+       struct cfi_reg regs[CFI_NUM_REGS];
+       int stack_size;
+       bool bp_scratch;
+       bool drap;
+       int drap_reg;
+};
+
+struct instruction {
+       struct list_head list;
+       struct hlist_node hash;
+       struct section *sec;
+       unsigned long offset;
+       unsigned int len;
+       unsigned char type;
+       unsigned long immediate;
+       bool alt_group, visited, dead_end, ignore;
+       struct symbol *call_dest;
+       struct instruction *jump_dest;
+       struct list_head alts;
+       struct symbol *func;
+       struct stack_op stack_op;
+       struct insn_state state;
+};
+
+struct objtool_file {
+       struct elf *elf;
+       struct list_head insn_list;
+       DECLARE_HASHTABLE(insn_hash, 16);
+       struct section *rodata, *whitelist;
+       bool ignore_unreachables, c_file;
+};
+
+int check(const char *objname, bool nofp);
+
+#define for_each_insn(file, insn)                                      \
+       list_for_each_entry(insn, &file->insn_list, list)
+
+#endif /* _CHECK_H */
index d897702..1a7e8aa 100644 (file)
@@ -37,6 +37,9 @@
 #define ELF_C_READ_MMAP ELF_C_READ
 #endif
 
+#define WARN_ELF(format, ...)                                  \
+       WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1))
+
 struct section *find_section_by_name(struct elf *elf, const char *name)
 {
        struct section *sec;
@@ -139,12 +142,12 @@ static int read_sections(struct elf *elf)
        int i;
 
        if (elf_getshdrnum(elf->elf, &sections_nr)) {
-               perror("elf_getshdrnum");
+               WARN_ELF("elf_getshdrnum");
                return -1;
        }
 
        if (elf_getshdrstrndx(elf->elf, &shstrndx)) {
-               perror("elf_getshdrstrndx");
+               WARN_ELF("elf_getshdrstrndx");
                return -1;
        }
 
@@ -165,37 +168,36 @@ static int read_sections(struct elf *elf)
 
                s = elf_getscn(elf->elf, i);
                if (!s) {
-                       perror("elf_getscn");
+                       WARN_ELF("elf_getscn");
                        return -1;
                }
 
                sec->idx = elf_ndxscn(s);
 
                if (!gelf_getshdr(s, &sec->sh)) {
-                       perror("gelf_getshdr");
+                       WARN_ELF("gelf_getshdr");
                        return -1;
                }
 
                sec->name = elf_strptr(elf->elf, shstrndx, sec->sh.sh_name);
                if (!sec->name) {
-                       perror("elf_strptr");
+                       WARN_ELF("elf_strptr");
                        return -1;
                }
 
-               sec->elf_data = elf_getdata(s, NULL);
-               if (!sec->elf_data) {
-                       perror("elf_getdata");
+               sec->data = elf_getdata(s, NULL);
+               if (!sec->data) {
+                       WARN_ELF("elf_getdata");
                        return -1;
                }
 
-               if (sec->elf_data->d_off != 0 ||
-                   sec->elf_data->d_size != sec->sh.sh_size) {
+               if (sec->data->d_off != 0 ||
+                   sec->data->d_size != sec->sh.sh_size) {
                        WARN("unexpected data attributes for %s", sec->name);
                        return -1;
                }
 
-               sec->data = (unsigned long)sec->elf_data->d_buf;
-               sec->len = sec->elf_data->d_size;
+               sec->len = sec->data->d_size;
        }
 
        /* sanity check, one more call to elf_nextscn() should return NULL */
@@ -232,15 +234,15 @@ static int read_symbols(struct elf *elf)
 
                sym->idx = i;
 
-               if (!gelf_getsym(symtab->elf_data, i, &sym->sym)) {
-                       perror("gelf_getsym");
+               if (!gelf_getsym(symtab->data, i, &sym->sym)) {
+                       WARN_ELF("gelf_getsym");
                        goto err;
                }
 
                sym->name = elf_strptr(elf->elf, symtab->sh.sh_link,
                                       sym->sym.st_name);
                if (!sym->name) {
-                       perror("elf_strptr");
+                       WARN_ELF("elf_strptr");
                        goto err;
                }
 
@@ -322,8 +324,8 @@ static int read_relas(struct elf *elf)
                        }
                        memset(rela, 0, sizeof(*rela));
 
-                       if (!gelf_getrela(sec->elf_data, i, &rela->rela)) {
-                               perror("gelf_getrela");
+                       if (!gelf_getrela(sec->data, i, &rela->rela)) {
+                               WARN_ELF("gelf_getrela");
                                return -1;
                        }
 
@@ -362,12 +364,6 @@ struct elf *elf_open(const char *name)
 
        INIT_LIST_HEAD(&elf->sections);
 
-       elf->name = strdup(name);
-       if (!elf->name) {
-               perror("strdup");
-               goto err;
-       }
-
        elf->fd = open(name, O_RDONLY);
        if (elf->fd == -1) {
                perror("open");
@@ -376,12 +372,12 @@ struct elf *elf_open(const char *name)
 
        elf->elf = elf_begin(elf->fd, ELF_C_READ_MMAP, NULL);
        if (!elf->elf) {
-               perror("elf_begin");
+               WARN_ELF("elf_begin");
                goto err;
        }
 
        if (!gelf_getehdr(elf->elf, &elf->ehdr)) {
-               perror("gelf_getehdr");
+               WARN_ELF("gelf_getehdr");
                goto err;
        }
 
@@ -407,6 +403,12 @@ void elf_close(struct elf *elf)
        struct symbol *sym, *tmpsym;
        struct rela *rela, *tmprela;
 
+       if (elf->elf)
+               elf_end(elf->elf);
+
+       if (elf->fd > 0)
+               close(elf->fd);
+
        list_for_each_entry_safe(sec, tmpsec, &elf->sections, list) {
                list_for_each_entry_safe(sym, tmpsym, &sec->symbol_list, list) {
                        list_del(&sym->list);
@@ -421,11 +423,6 @@ void elf_close(struct elf *elf)
                list_del(&sec->list);
                free(sec);
        }
-       if (elf->name)
-               free(elf->name);
-       if (elf->fd > 0)
-               close(elf->fd);
-       if (elf->elf)
-               elf_end(elf->elf);
+
        free(elf);
 }
index 731973e..343968b 100644 (file)
@@ -37,10 +37,9 @@ struct section {
        DECLARE_HASHTABLE(rela_hash, 16);
        struct section *base, *rela;
        struct symbol *sym;
-       Elf_Data *elf_data;
+       Elf_Data *data;
        char *name;
        int idx;
-       unsigned long data;
        unsigned int len;
 };
 
@@ -86,6 +85,7 @@ struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
 struct symbol *find_containing_func(struct section *sec, unsigned long offset);
 void elf_close(struct elf *elf);
 
-
+#define for_each_sec(file, sec)                                                \
+       list_for_each_entry(sec, &file->elf->sections, list)
 
 #endif /* _OBJTOOL_ELF_H */
index bff8abb..84f001d 100644 (file)
@@ -91,16 +91,16 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
        alt->jump_or_nop = entry->jump_or_nop;
 
        if (alt->group) {
-               alt->orig_len = *(unsigned char *)(sec->data + offset +
+               alt->orig_len = *(unsigned char *)(sec->data->d_buf + offset +
                                                   entry->orig_len);
-               alt->new_len = *(unsigned char *)(sec->data + offset +
+               alt->new_len = *(unsigned char *)(sec->data->d_buf + offset +
                                                  entry->new_len);
        }
 
        if (entry->feature) {
                unsigned short feature;
 
-               feature = *(unsigned short *)(sec->data + offset +
+               feature = *(unsigned short *)(sec->data->d_buf + offset +
                                              entry->feature);
 
                /*
index ac7e075..afd9f7a 100644 (file)
 #ifndef _WARN_H
 #define _WARN_H
 
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "elf.h"
+
 extern const char *objname;
 
 static inline char *offstr(struct section *sec, unsigned long offset)
@@ -57,4 +64,7 @@ static inline char *offstr(struct section *sec, unsigned long offset)
        free(_str);                                     \
 })
 
+#define WARN_ELF(format, ...)                          \
+       WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1))
+
 #endif /* _WARN_H */
index 8354d04..1f4fbc9 100644 (file)
@@ -19,18 +19,18 @@ CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS)
 
 include $(srctree)/tools/scripts/Makefile.arch
 
-$(call detected_var,ARCH)
+$(call detected_var,SRCARCH)
 
 NO_PERF_REGS := 1
 
 # Additional ARCH settings for ppc
-ifeq ($(ARCH),powerpc)
+ifeq ($(SRCARCH),powerpc)
   NO_PERF_REGS := 0
   LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
 endif
 
 # Additional ARCH settings for x86
-ifeq ($(ARCH),x86)
+ifeq ($(SRCARCH),x86)
   $(call detected,CONFIG_X86)
   ifeq (${IS_64_BIT}, 1)
     CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated
@@ -43,12 +43,12 @@ ifeq ($(ARCH),x86)
   NO_PERF_REGS := 0
 endif
 
-ifeq ($(ARCH),arm)
+ifeq ($(SRCARCH),arm)
   NO_PERF_REGS := 0
   LIBUNWIND_LIBS = -lunwind -lunwind-arm
 endif
 
-ifeq ($(ARCH),arm64)
+ifeq ($(SRCARCH),arm64)
   NO_PERF_REGS := 0
   LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
 endif
@@ -61,7 +61,7 @@ endif
 # Disable it on all other architectures in case libdw unwind
 # support is detected in system. Add supported architectures
 # to the check.
-ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
+ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm))
   NO_LIBDW_DWARF_UNWIND := 1
 endif
 
@@ -115,9 +115,9 @@ endif
 FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
 FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
 
-FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
+FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
 # include ARCH specific config
--include $(src-perf)/arch/$(ARCH)/Makefile
+-include $(src-perf)/arch/$(SRCARCH)/Makefile
 
 ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
   CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
@@ -228,12 +228,12 @@ ifeq ($(DEBUG),0)
 endif
 
 INC_FLAGS += -I$(src-perf)/util/include
-INC_FLAGS += -I$(src-perf)/arch/$(ARCH)/include
+INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include
 INC_FLAGS += -I$(srctree)/tools/include/uapi
 INC_FLAGS += -I$(srctree)/tools/include/
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/
-INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/
 
 # $(obj-perf)      for generated common-cmds.h
 # $(obj-perf)/util for generated bison/flex headers
@@ -355,7 +355,7 @@ ifndef NO_LIBELF
 
   ifndef NO_DWARF
     ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
-      msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
+      msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled);
       NO_DWARF := 1
     else
       CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
@@ -380,7 +380,7 @@ ifndef NO_LIBELF
         CFLAGS += -DHAVE_BPF_PROLOGUE
         $(call detected,CONFIG_BPF_PROLOGUE)
       else
-        msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset());
+        msg := $(warning BPF prologue is not supported by architecture $(SRCARCH), missing regs_query_register_offset());
       endif
     else
       msg := $(warning DWARF support is off, BPF prologue is disabled);
@@ -406,7 +406,7 @@ ifdef PERF_HAVE_JITDUMP
   endif
 endif
 
-ifeq ($(ARCH),powerpc)
+ifeq ($(SRCARCH),powerpc)
   ifndef NO_DWARF
     CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX
   endif
@@ -487,7 +487,7 @@ else
 endif
 
 ifndef NO_LOCAL_LIBUNWIND
-  ifeq ($(ARCH),$(filter $(ARCH),arm arm64))
+  ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64))
     $(call feature_check,libunwind-debug-frame)
     ifneq ($(feature-libunwind-debug-frame), 1)
       msg := $(warning No debug_frame support found in libunwind);
@@ -740,7 +740,7 @@ ifeq (${IS_64_BIT}, 1)
       NO_PERF_READ_VDSO32 := 1
     endif
   endif
-  ifneq ($(ARCH), x86)
+  ifneq ($(SRCARCH), x86)
     NO_PERF_READ_VDSOX32 := 1
   endif
   ifndef NO_PERF_READ_VDSOX32
@@ -769,7 +769,7 @@ ifdef LIBBABELTRACE
 endif
 
 ifndef NO_AUXTRACE
-  ifeq ($(ARCH),x86)
+  ifeq ($(SRCARCH),x86)
     ifeq ($(feature-get_cpuid), 0)
       msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
       NO_AUXTRACE := 1
@@ -872,7 +872,7 @@ sysconfdir = $(prefix)/etc
 ETC_PERFCONFIG = etc/perfconfig
 endif
 ifndef lib
-ifeq ($(ARCH)$(IS_64_BIT), x861)
+ifeq ($(SRCARCH)$(IS_64_BIT), x861)
 lib = lib64
 else
 lib = lib
index 79fe31f..5008f51 100644 (file)
@@ -226,7 +226,7 @@ endif
 
 ifeq ($(config),0)
 include $(srctree)/tools/scripts/Makefile.arch
--include arch/$(ARCH)/Makefile
+-include arch/$(SRCARCH)/Makefile
 endif
 
 # The FEATURE_DUMP_EXPORT holds location of the actual
index 109eb75..d9b6af8 100644 (file)
@@ -1,2 +1,2 @@
 libperf-y += common.o
-libperf-y += $(ARCH)/
+libperf-y += $(SRCARCH)/
index 9213a12..999a4e8 100644 (file)
@@ -2,7 +2,7 @@ hostprogs := jevents
 
 jevents-y      += json.o jsmn.o jevents.o
 pmu-events-y   += pmu-events.o
-JDIR           =  pmu-events/arch/$(ARCH)
+JDIR           =  pmu-events/arch/$(SRCARCH)
 JSON           =  $(shell [ -d $(JDIR) ] &&                            \
                        find $(JDIR) -name '*.json' -o -name 'mapfile.csv')
 #
@@ -10,4 +10,4 @@ JSON          =  $(shell [ -d $(JDIR) ] &&                            \
 # directory and create tables in pmu-events.c.
 #
 $(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS)
-       $(Q)$(call echo-cmd,gen)$(JEVENTS) $(ARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)
+       $(Q)$(call echo-cmd,gen)$(JEVENTS) $(SRCARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)
index af58ebc..84222bd 100644 (file)
@@ -75,7 +75,7 @@ $(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/B
        $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
        $(Q)echo ';' >> $@
 
-ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64 powerpc))
+ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc))
 perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 endif
 
index 32873ec..cf00eba 100644 (file)
@@ -83,7 +83,7 @@ int test__task_exit(int subtest __maybe_unused)
 
        evsel = perf_evlist__first(evlist);
        evsel->attr.task = 1;
-       evsel->attr.sample_freq = 0;
+       evsel->attr.sample_freq = 1;
        evsel->attr.inherit = 0;
        evsel->attr.watermark = 0;
        evsel->attr.wakeup_events = 1;
index e4f7902..cda44b0 100644 (file)
@@ -273,8 +273,20 @@ struct perf_evsel *perf_evsel__new_cycles(void)
        struct perf_evsel *evsel;
 
        event_attr_init(&attr);
+       /*
+        * Unnamed union member, not supported as struct member named
+        * initializer in older compilers such as gcc 4.4.7
+        *
+        * Just for probing the precise_ip:
+        */
+       attr.sample_period = 1;
 
        perf_event_attr__set_max_precise_ip(&attr);
+       /*
+        * Now let the usual logic to set up the perf_event_attr defaults
+        * to kick in when we return and before perf_evsel__open() is called.
+        */
+       attr.sample_period = 0;
 
        evsel = perf_evsel__new(&attr);
        if (evsel == NULL)
index 5cac8d5..b5baff3 100644 (file)
@@ -841,7 +841,7 @@ static int write_group_desc(int fd, struct perf_header *h __maybe_unused,
 
 /*
  * default get_cpuid(): nothing gets recorded
- * actual implementation must be in arch/$(ARCH)/util/header.c
+ * actual implementation must be in arch/$(SRCARCH)/util/header.c
  */
 int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused)
 {
index d7f31cb..5de2b86 100644 (file)
@@ -1209,10 +1209,12 @@ int machine__create_kernel_maps(struct machine *machine)
         */
        map_groups__fixup_end(&machine->kmaps);
 
-       if (machine__get_running_kernel_start(machine, &name, &addr)) {
-       } else if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
-               machine__destroy_kernel_maps(machine);
-               return -1;
+       if (!machine__get_running_kernel_start(machine, &name, &addr)) {
+               if (name &&
+                   maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
+                       machine__destroy_kernel_maps(machine);
+                       return -1;
+               }
        }
 
        return 0;
index 84e7e69..a2670e9 100644 (file)
@@ -619,7 +619,7 @@ static int post_process_probe_trace_point(struct probe_trace_point *tp,
                                           struct map *map, unsigned long offs)
 {
        struct symbol *sym;
-       u64 addr = tp->address + tp->offset - offs;
+       u64 addr = tp->address - offs;
 
        sym = map__find_symbol(map, addr);
        if (!sym)
index da45c4b..7755a5e 100644 (file)
@@ -178,6 +178,14 @@ frame_callback(Dwfl_Frame *state, void *arg)
        Dwarf_Addr pc;
        bool isactivation;
 
+       if (!dwfl_frame_pc(state, &pc, NULL)) {
+               pr_err("%s", dwfl_errmsg(-1));
+               return DWARF_CB_ABORT;
+       }
+
+       // report the module before we query for isactivation
+       report_module(pc, ui);
+
        if (!dwfl_frame_pc(state, &pc, &isactivation)) {
                pr_err("%s", dwfl_errmsg(-1));
                return DWARF_CB_ABORT;
index 64cae1a..e1f75a1 100644 (file)
@@ -370,7 +370,7 @@ acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path,
 }
 EXPORT_SYMBOL(__wrap_acpi_evaluate_object);
 
-union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
+union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
                u64 rev, u64 func, union acpi_object *argv4)
 {
        union acpi_object *obj = ERR_PTR(-ENXIO);
@@ -379,11 +379,11 @@ union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid,
        rcu_read_lock();
        ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list);
        if (ops)
-               obj = ops->evaluate_dsm(handle, uuid, rev, func, argv4);
+               obj = ops->evaluate_dsm(handle, guid, rev, func, argv4);
        rcu_read_unlock();
 
        if (IS_ERR(obj))
-               return acpi_evaluate_dsm(handle, uuid, rev, func, argv4);
+               return acpi_evaluate_dsm(handle, guid, rev, func, argv4);
        return obj;
 }
 EXPORT_SYMBOL(__wrap_acpi_evaluate_dsm);
index c218717..28859da 100644 (file)
@@ -1559,7 +1559,7 @@ static unsigned long nfit_ctl_handle;
 union acpi_object *result;
 
 static union acpi_object *nfit_test_evaluate_dsm(acpi_handle handle,
-               const u8 *uuid, u64 rev, u64 func, union acpi_object *argv4)
+               const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4)
 {
        if (handle != &nfit_ctl_handle)
                return ERR_PTR(-ENXIO);
index f54c003..d3d63dd 100644 (file)
@@ -13,6 +13,7 @@
 #ifndef __NFIT_TEST_H__
 #define __NFIT_TEST_H__
 #include <linux/list.h>
+#include <linux/uuid.h>
 #include <linux/ioport.h>
 #include <linux/spinlock_types.h>
 
@@ -36,7 +37,8 @@ typedef void *acpi_handle;
 
 typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t);
 typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle,
-               const u8 *uuid, u64 rev, u64 func, union acpi_object *argv4);
+                const guid_t *guid, u64 rev, u64 func,
+                union acpi_object *argv4);
 void __iomem *__wrap_ioremap_nocache(resource_size_t offset,
                unsigned long size);
 void __wrap_iounmap(volatile void __iomem *addr);
index 19d0604..487cbfb 100644 (file)
@@ -1,23 +1,42 @@
 #ifndef __BPF_ENDIAN__
 #define __BPF_ENDIAN__
 
-#include <asm/byteorder.h>
+#include <linux/swab.h>
 
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-# define __bpf_ntohs(x)                __builtin_bswap16(x)
-# define __bpf_htons(x)                __builtin_bswap16(x)
-#elif __BYTE_ORDER == __BIG_ENDIAN
-# define __bpf_ntohs(x)                (x)
-# define __bpf_htons(x)                (x)
+/* LLVM's BPF target selects the endianness of the CPU
+ * it compiles on, or the user specifies (bpfel/bpfeb),
+ * respectively. The used __BYTE_ORDER__ is defined by
+ * the compiler, we cannot rely on __BYTE_ORDER from
+ * libc headers, since it doesn't reflect the actual
+ * requested byte order.
+ *
+ * Note, LLVM's BPF target has different __builtin_bswapX()
+ * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
+ * in bpfel and bpfeb case, which means below, that we map
+ * to cpu_to_be16(). We could use it unconditionally in BPF
+ * case, but better not rely on it, so that this header here
+ * can be used from application and BPF program side, which
+ * use different targets.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define __bpf_ntohs(x)                        __builtin_bswap16(x)
+# define __bpf_htons(x)                        __builtin_bswap16(x)
+# define __bpf_constant_ntohs(x)       ___constant_swab16(x)
+# define __bpf_constant_htons(x)       ___constant_swab16(x)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# define __bpf_ntohs(x)                        (x)
+# define __bpf_htons(x)                        (x)
+# define __bpf_constant_ntohs(x)       (x)
+# define __bpf_constant_htons(x)       (x)
 #else
-# error "Fix your __BYTE_ORDER?!"
+# error "Fix your compiler's __BYTE_ORDER__?!"
 #endif
 
 #define bpf_htons(x)                           \
        (__builtin_constant_p(x) ?              \
-        __constant_htons(x) : __bpf_htons(x))
+        __bpf_constant_htons(x) : __bpf_htons(x))
 #define bpf_ntohs(x)                           \
        (__builtin_constant_p(x) ?              \
-        __constant_ntohs(x) : __bpf_ntohs(x))
+        __bpf_constant_ntohs(x) : __bpf_ntohs(x))
 
-#endif
+#endif /* __BPF_ENDIAN__ */
index cabb19b..0ff8c55 100644 (file)
@@ -3749,6 +3749,72 @@ static struct bpf_test tests[] = {
                .errstr = "invalid bpf_context access",
        },
        {
+               "leak pointer into ctx 1",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+                                   offsetof(struct __sk_buff, cb[0])),
+                       BPF_LD_MAP_FD(BPF_REG_2, 0),
+                       BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2,
+                                     offsetof(struct __sk_buff, cb[0])),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 2 },
+               .errstr_unpriv = "R2 leaks addr into mem",
+               .result_unpriv = REJECT,
+               .result = ACCEPT,
+       },
+       {
+               "leak pointer into ctx 2",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+                                   offsetof(struct __sk_buff, cb[0])),
+                       BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10,
+                                     offsetof(struct __sk_buff, cb[0])),
+                       BPF_EXIT_INSN(),
+               },
+               .errstr_unpriv = "R10 leaks addr into mem",
+               .result_unpriv = REJECT,
+               .result = ACCEPT,
+       },
+       {
+               "leak pointer into ctx 3",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_2, 0),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2,
+                                     offsetof(struct __sk_buff, cb[0])),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 1 },
+               .errstr_unpriv = "R2 leaks addr into ctx",
+               .result_unpriv = REJECT,
+               .result = ACCEPT,
+       },
+       {
+               "leak pointer into map val",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+                       BPF_MOV64_IMM(BPF_REG_3, 0),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+                       BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 4 },
+               .errstr_unpriv = "R6 leaks addr into mem",
+               .result_unpriv = REJECT,
+               .result = ACCEPT,
+       },
+       {
                "helper access to map: full range",
                .insns = {
                        BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
index a676d3e..13f5198 100755 (executable)
@@ -305,7 +305,7 @@ function perf_test()
        echo "Running remote perf test $WITH DMA"
        write_file "" $REMOTE_PERF/run
        echo -n "  "
-       read_file $LOCAL_PERF/run
+       read_file $REMOTE_PERF/run
        echo "  Passed"
 
        _modprobe -r ntb_perf