Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 26 Jul 2013 19:42:40 +0000 (12:42 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 26 Jul 2013 19:42:40 +0000 (12:42 -0700)
Pull ARM SoC fixes from Olof Johansson:
 "This is a largeish batch of fixes, mostly because I missed -rc2 due to
  travel/vacation.  So in number these are a bit more than ideal unless
  you amortize them over two -rcs.

  Quick breakdown:
   - Defconfig updates
     - Making multi_v7_defconfig useful on more hardware to encourage
       single-image usage
     - Davinci and nomadik updates due to new code merged this merge
       window
   - Fixes for UART on Samsung platforms, both PM and clock-related
   - A handful of warning fixes from defconfig builds, including for
     max8925 backlight and pxamci (both with appropriate acks)
   - Exynos5440 fixes for LPAE configuration, PM
   - ...plus a bunch of other smaller changes all over the place

  I expect to switch to regressions-or-severe-bugs-only fixes from here
  on out"

* tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc: (37 commits)
  mfd: max8925: fix dt code for backlight
  ARM: omap5: Only select errata 798181 if SMP
  ARM: EXYNOS: Update CONFIG_ARCH_NR_GPIO for Exynos
  ARM: EXYNOS: Fix low level debug support
  ARM: SAMSUNG: Save/restore only selected uart's registers
  ARM: SAMSUNG: Add SAMSUNG_PM config option to select pm
  ARM: S3C24XX: Add missing clkdev entries for s3c2440 UART
  ARM: multi_v7_defconfig: Select USB chipidea driver
  ARM: pxa: propagate errors from regulator_enable() to pxamci
  ARM: zynq: fix compilation warning
  ARM: keystone: fix compilation warning
  ARM: highbank: Only touch common coherency control register fields
  ARM: footbridge: fix overlapping PCI mappings
  dmaengine: shdma: fix a build failure on platforms with no DMA support
  ARM: STi: Set correct ARM ERRATAs.
  ARM: dts: STi: Fix pinconf setup for STiH416 serial2
  ARM: nomadik: configure for NO_HZ and HRTIMERS
  ARM: nomadik: update defconfig base
  ARM: nomadik: Update MMC defconfigs
  ARM: davinci: defconfig: enable EDMA driver
  ...

236 files changed:
Documentation/ABI/testing/sysfs-driver-xen-blkback [new file with mode: 0644]
Documentation/ABI/testing/sysfs-driver-xen-blkfront [new file with mode: 0644]
Documentation/bcache.txt
Documentation/devicetree/bindings/vendor-prefixes.txt
Documentation/ja_JP/HOWTO
MAINTAINERS
arch/alpha/Kconfig
arch/alpha/include/asm/atomic.h
arch/alpha/include/asm/param.h
arch/alpha/include/asm/spinlock.h
arch/alpha/include/asm/unistd.h
arch/alpha/include/uapi/asm/param.h
arch/alpha/include/uapi/asm/unistd.h
arch/alpha/kernel/entry.S
arch/alpha/kernel/irq_alpha.c
arch/alpha/kernel/smp.c
arch/alpha/kernel/sys_dp264.c
arch/alpha/kernel/sys_marvel.c
arch/alpha/kernel/systbls.S
arch/alpha/kernel/time.c
arch/alpha/kernel/traps.c
arch/powerpc/include/asm/eeh.h
arch/powerpc/include/asm/hw_irq.h
arch/powerpc/include/asm/module.h
arch/powerpc/include/asm/pci-bridge.h
arch/powerpc/include/asm/reg.h
arch/powerpc/kernel/cputable.c
arch/powerpc/kernel/eeh.c
arch/powerpc/kernel/eeh_cache.c
arch/powerpc/kernel/eeh_driver.c
arch/powerpc/kernel/eeh_pe.c
arch/powerpc/kernel/eeh_sysfs.c
arch/powerpc/kernel/pci-common.c
arch/powerpc/kernel/pci-hotplug.c
arch/powerpc/kernel/pci_of_scan.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/vmlinux.lds.S
arch/powerpc/mm/hash_native_64.c
arch/powerpc/perf/core-book3s.c
arch/powerpc/perf/power8-pmu.c
arch/powerpc/platforms/powernv/eeh-powernv.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/pseries/Kconfig
arch/powerpc/platforms/pseries/eeh_pseries.c
arch/powerpc/platforms/pseries/lpar.c
arch/powerpc/platforms/pseries/ras.c
arch/x86/crypto/Makefile
arch/x86/crypto/crct10dif-pcl-asm_64.S [deleted file]
arch/x86/crypto/crct10dif-pclmul_glue.c [deleted file]
crypto/Kconfig
crypto/Makefile
crypto/crct10dif.c [deleted file]
crypto/tcrypt.c
crypto/testmgr.c
crypto/testmgr.h
drivers/ata/Kconfig
drivers/ata/Makefile
drivers/ata/ahci.c
drivers/ata/ahci_imx.c [new file with mode: 0644]
drivers/ata/ata_piix.c
drivers/ata/libata-scsi.c
drivers/ata/sata_inic162x.c
drivers/block/Kconfig
drivers/block/drbd/drbd_actlog.c
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_nl.c
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_state.c
drivers/block/rsxx/core.c
drivers/block/rsxx/cregs.c
drivers/block/rsxx/dev.c
drivers/block/rsxx/dma.c
drivers/block/rsxx/rsxx_priv.h
drivers/block/xen-blkback/blkback.c
drivers/block/xen-blkback/common.h
drivers/block/xen-blkback/xenbus.c
drivers/block/xen-blkfront.c
drivers/crypto/caam/caamhash.c
drivers/edac/edac_mc.c
drivers/edac/edac_mc_sysfs.c
drivers/edac/i5100_edac.c
drivers/gpio/gpio-msm-v2.c
drivers/gpio/gpio-omap.c
drivers/gpu/drm/drm_crtc_helper.c
drivers/gpu/drm/i915/i915_dma.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/intel_ddi.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_drv.h
drivers/gpu/drm/i915/intel_lvds.c
drivers/gpu/drm/i915/intel_panel.c
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c
drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c
drivers/gpu/drm/nouveau/core/engine/falcon.c
drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c
drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c
drivers/gpu/drm/nouveau/core/engine/vp/nve0.c
drivers/gpu/drm/nouveau/core/include/engine/falcon.h
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/nouveau/nouveau_display.c
drivers/gpu/drm/nouveau/nouveau_drm.c
drivers/gpu/drm/nouveau/nouveau_fbcon.c
drivers/gpu/drm/nouveau/nouveau_fence.c
drivers/gpu/drm/nouveau/nouveau_gem.c
drivers/gpu/drm/nouveau/nv50_display.c
drivers/hv/hv_balloon.c
drivers/hv/vmbus_drv.c
drivers/md/bcache/alloc.c
drivers/md/bcache/bcache.h
drivers/md/bcache/bset.c
drivers/md/bcache/bset.h
drivers/md/bcache/btree.c
drivers/md/bcache/btree.h
drivers/md/bcache/closure.c
drivers/md/bcache/debug.c
drivers/md/bcache/debug.h
drivers/md/bcache/io.c
drivers/md/bcache/journal.c
drivers/md/bcache/movinggc.c
drivers/md/bcache/request.c
drivers/md/bcache/request.h
drivers/md/bcache/super.c
drivers/md/bcache/sysfs.c
drivers/md/bcache/trace.c
drivers/md/bcache/util.c
drivers/md/bcache/util.h
drivers/md/bcache/writeback.c
drivers/md/bcache/writeback.h [new file with mode: 0644]
drivers/md/raid10.c
drivers/md/raid5.c
drivers/md/raid5.h
drivers/misc/atmel-ssc.c
drivers/misc/mei/hbm.c
drivers/misc/mei/hw-me.c
drivers/misc/mei/init.c
drivers/of/irq.c
drivers/pci/hotplug/rpadlpar_core.c
drivers/staging/android/logger.c
drivers/staging/comedi/TODO
drivers/staging/comedi/comedi_fops.c
drivers/staging/frontier/alphatrack.c
drivers/staging/gdm72xx/gdm_qos.c
drivers/staging/imx-drm/Kconfig
drivers/staging/tidspbridge/pmgr/dbll.c
drivers/staging/zram/zram_drv.c
drivers/thermal/x86_pkg_temp_thermal.c
drivers/tty/serial/8250/8250_early.c
drivers/tty/serial/Kconfig
drivers/tty/synclinkmp.c
drivers/usb/core/hub.c
drivers/usb/core/hub.h
drivers/usb/dwc3/Kconfig
drivers/usb/dwc3/core.c
drivers/usb/dwc3/core.h
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/Kconfig
drivers/usb/gadget/at91_udc.c
drivers/usb/gadget/f_ecm.c
drivers/usb/gadget/f_eem.c
drivers/usb/gadget/f_ncm.c
drivers/usb/gadget/f_phonet.c
drivers/usb/gadget/f_rndis.c
drivers/usb/gadget/f_subset.c
drivers/usb/gadget/fotg210-udc.c
drivers/usb/gadget/mv_u3d_core.c
drivers/usb/gadget/udc-core.c
drivers/usb/host/ehci-hub.c
drivers/usb/host/pci-quirks.h
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci-ring.c
drivers/usb/host/xhci.c
drivers/usb/misc/sisusbvga/sisusb.c
drivers/usb/phy/phy-omap-usb3.c
drivers/usb/phy/phy-samsung-usb2.c
drivers/usb/renesas_usbhs/mod_gadget.c
drivers/usb/serial/cp210x.c
drivers/usb/serial/mos7840.c
drivers/usb/serial/option.c
drivers/usb/serial/ti_usb_3410_5052.c
drivers/usb/storage/unusual_devs.h
drivers/vhost/net.c
drivers/vhost/scsi.c
drivers/vhost/test.c
drivers/vhost/vhost.h
fs/fuse/dir.c
fs/nfs/nfs4xdr.c
fs/nfsd/vfs.c
fs/xfs/xfs_dinode.h
fs/xfs/xfs_inode.c
fs/xfs/xfs_log_recover.c
include/linux/cgroup.h
include/linux/cgroup_subsys.h
include/linux/crc-t10dif.h
include/linux/drbd.h
include/linux/drbd_genl.h
include/linux/drbd_limits.h
include/linux/edac.h
include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
include/linux/usb.h
include/trace/events/bcache.h
include/trace/ftrace.h
include/uapi/linux/usb/ch11.h
include/xen/interface/io/blkif.h
include/xen/interface/io/ring.h
kernel/cgroup.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_event_perf.c
kernel/trace/trace_events.c
kernel/trace/trace_events_filter.c
kernel/trace/trace_functions.c
kernel/trace/trace_functions_graph.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_mmiotrace.c
kernel/trace/trace_output.c
kernel/trace/trace_syscalls.c
kernel/trace/trace_uprobe.c
kernel/wait.c
lib/Kconfig
lib/crc-t10dif.c
lib/mpi/longlong.h
sound/pci/hda/patch_sigmatel.c
sound/soc/cirrus/ep93xx-ac97.c
sound/soc/codecs/max98088.c
sound/soc/codecs/sgtl5000.c
sound/soc/soc-core.c
sound/soc/tegra/tegra20_ac97.c
sound/soc/tegra/tegra20_spdif.c
sound/usb/6fire/pcm.c
sound/usb/hiface/pcm.c
tools/hv/hv_kvp_daemon.c

diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkback b/Documentation/ABI/testing/sysfs-driver-xen-blkback
new file mode 100644 (file)
index 0000000..8bb43b6
--- /dev/null
@@ -0,0 +1,17 @@
+What:           /sys/module/xen_blkback/parameters/max_buffer_pages
+Date:           March 2013
+KernelVersion:  3.11
+Contact:        Roger Pau Monné <roger.pau@citrix.com>
+Description:
+                Maximum number of free pages to keep in each block
+                backend buffer.
+
+What:           /sys/module/xen_blkback/parameters/max_persistent_grants
+Date:           March 2013
+KernelVersion:  3.11
+Contact:        Roger Pau Monné <roger.pau@citrix.com>
+Description:
+                Maximum number of grants to map persistently in
+                blkback. If the frontend tries to use more than
+                max_persistent_grants, the LRU kicks in and starts
+                removing 5% of max_persistent_grants every 100ms.
diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkfront b/Documentation/ABI/testing/sysfs-driver-xen-blkfront
new file mode 100644 (file)
index 0000000..c0a6cb7
--- /dev/null
@@ -0,0 +1,10 @@
+What:           /sys/module/xen_blkfront/parameters/max
+Date:           June 2013
+KernelVersion:  3.11
+Contact:        Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+                Maximum number of segments that the frontend will negotiate
+                with the backend for indirect descriptors. The default value
+                is 32 - higher value means more potential throughput but more
+                memory usage. The backend picks the minimum of the frontend
+                and its default backend value.
index c3365f26b2d9f9da85d0ff5245bd6f89555f5256..32b6c3189d9826a53875ae6dc51ce62e9b86778b 100644 (file)
@@ -46,29 +46,33 @@ you format your backing devices and cache device at the same time, you won't
 have to manually attach:
   make-bcache -B /dev/sda /dev/sdb -C /dev/sdc
 
-To make bcache devices known to the kernel, echo them to /sys/fs/bcache/register:
+bcache-tools now ships udev rules, and bcache devices are known to the kernel
+immediately.  Without udev, you can manually register devices like this:
 
   echo /dev/sdb > /sys/fs/bcache/register
   echo /dev/sdc > /sys/fs/bcache/register
 
-To register your bcache devices automatically, you could add something like
-this to an init script:
+Registering the backing device makes the bcache device show up in /dev; you can
+now format it and use it as normal. But the first time using a new bcache
+device, it'll be running in passthrough mode until you attach it to a cache.
+See the section on attaching.
 
-  echo /dev/sd* > /sys/fs/bcache/register_quiet
+The devices show up as:
 
-It'll look for bcache superblocks and ignore everything that doesn't have one.
+  /dev/bcache<N>
 
-Registering the backing device makes the bcache show up in /dev; you can now
-format it and use it as normal. But the first time using a new bcache device,
-it'll be running in passthrough mode until you attach it to a cache. See the
-section on attaching.
+As well as (with udev):
 
-The devices show up at /dev/bcacheN, and can be controlled via sysfs from
-/sys/block/bcacheN/bcache:
+  /dev/bcache/by-uuid/<uuid>
+  /dev/bcache/by-label/<label>
+
+To get started:
 
   mkfs.ext4 /dev/bcache0
   mount /dev/bcache0 /mnt
 
+You can control bcache devices through sysfs at /sys/block/bcache<N>/bcache .
+
 Cache devices are managed as sets; multiple caches per set isn't supported yet
 but will allow for mirroring of metadata and dirty data in the future. Your new
 cache set shows up as /sys/fs/bcache/<UUID>
@@ -80,11 +84,11 @@ must be attached to your cache set to enable caching. Attaching a backing
 device to a cache set is done thusly, with the UUID of the cache set in
 /sys/fs/bcache:
 
-  echo <UUID> > /sys/block/bcache0/bcache/attach
+  echo <CSET-UUID> > /sys/block/bcache0/bcache/attach
 
 This only has to be done once. The next time you reboot, just reregister all
 your bcache devices. If a backing device has data in a cache somewhere, the
-/dev/bcache# device won't be created until the cache shows up - particularly
+/dev/bcache<N> device won't be created until the cache shows up - particularly
 important if you have writeback caching turned on.
 
 If you're booting up and your cache device is gone and never coming back, you
@@ -191,6 +195,9 @@ want for getting the best possible numbers when benchmarking.
 
 SYSFS - BACKING DEVICE:
 
+Available at /sys/block/<bdev>/bcache, /sys/block/bcache*/bcache and
+(if attached) /sys/fs/bcache/<cset-uuid>/bdev*
+
 attach
   Echo the UUID of a cache set to this file to enable caching.
 
@@ -300,6 +307,8 @@ cache_readaheads
 
 SYSFS - CACHE SET:
 
+Available at /sys/fs/bcache/<cset-uuid>
+
 average_key_size
   Average data per key in the btree.
 
@@ -390,6 +399,8 @@ trigger_gc
 
 SYSFS - CACHE DEVICE:
 
+Available at /sys/block/<cdev>/bcache
+
 block_size
   Minimum granularity of writes - should match hardware sector size.
 
index d5a79caec147d36f993c769ddab2a6c15fb0686a..366ce9b872407e1dfb1964079da4fe95d6094df4 100644 (file)
@@ -26,6 +26,7 @@ est   ESTeem Wireless Modems
 fsl    Freescale Semiconductor
 GEFanuc        GE Fanuc Intelligent Platforms Embedded Systems, Inc.
 gef    GE Fanuc Intelligent Platforms Embedded Systems, Inc.
+hisilicon      Hisilicon Limited.
 hp     Hewlett Packard
 ibm    International Business Machines (IBM)
 idt    Integrated Device Technologies, Inc.
@@ -43,6 +44,7 @@ nxp   NXP Semiconductors
 onnn   ON Semiconductor Corp.
 picochip       Picochip Ltd
 powervr        PowerVR (deprecated, use img)
+qca    Qualcomm Atheros, Inc.
 qcom   Qualcomm, Inc.
 ralink Mediatek/Ralink Technology Corp.
 ramtron        Ramtron International
index 050d37fe6d40a566f684891a626e17d519144472..8148a47fc70e17cbf76be1c6d88ad09f16c983b9 100644 (file)
@@ -11,14 +11,14 @@ for non English (read: Japanese) speakers and is not intended as a
 fork. So if you have any comments or updates for this file, please try
 to update the original English file first.
 
-Last Updated: 2011/03/31
+Last Updated: 2013/07/19
 ==================================
 これは、
-linux-2.6.38/Documentation/HOWTO
+linux-3.10/Documentation/HOWTO
 の和訳です。
 
-翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ >
-翻訳日: 2011/3/28
+翻訳団体: JF プロジェクト < http://linuxjf.sourceforge.jp/ >
+翻訳日: 2013/7/19
 翻訳者: Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com>
 校正者: 松倉さん <nbh--mats at nifty dot com>
          小林 雅典さん (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp>
@@ -245,7 +245,7 @@ Linux カーネルソースツリーの中に含まれる、きれいにし、
 自己参照方式で、索引がついた web 形式で、ソースコードを参照することが
 できます。この最新の素晴しいカーネルコードのリポジトリは以下で見つかり
 ます-
-       http://sosdg.org/~qiyong/lxr/
+       http://lxr.linux.no/+trees
 
 開発プロセス
 -----------------------
@@ -253,24 +253,24 @@ Linux カーネルソースツリーの中に含まれる、きれいにし、
 Linux カーネルの開発プロセスは現在幾つかの異なるメインカーネル「ブラン
 チ」と多数のサブシステム毎のカーネルブランチから構成されます。
 これらのブランチとは-
-  - メインの 2.6.x カーネルツリー
-  - 2.6.x.y -stable カーネルツリー
-  - 2.6.x -git カーネルパッチ
+  - メインの 3.x カーネルツリー
+  - 3.x.y -stable カーネルツリー
+  - 3.x -git カーネルパッチ
   - サブシステム毎のカーネルツリーとパッチ
-  - 統合テストのための 2.6.x -next カーネルツリー
+  - 統合テストのための 3.x -next カーネルツリー
 
-2.6.x カーネルツリー
+3.x カーネルツリー
 -----------------
 
-2.6.x カーネルは Linus Torvalds によってメンテナンスされ、kernel.org
-の pub/linux/kernel/v2.6/ ディレクトリに存在します。この開発プロセスは
+3.x カーネルは Linus Torvalds によってメンテナンスされ、kernel.org
+の pub/linux/kernel/v3.x/ ディレクトリに存在します。この開発プロセスは
 以下のとおり-
 
   - 新しいカーネルがリリースされた直後に、2週間の特別期間が設けられ、
     この期間中に、メンテナ達は Linus に大きな差分を送ることができます。
     このような差分は通常 -next カーネルに数週間含まれてきたパッチです。
     大きな変更は git(カーネルのソース管理ツール、詳細は
-    http://git-scm.com/  参照) を使って送るのが好ましいやり方ですが、パッ
+    http://git-scm.com/ 参照) を使って送るのが好ましいやり方ですが、パッ
     チファイルの形式のまま送るのでも十分です。
 
   - 2週間後、-rc1 カーネルがリリースされ、この後にはカーネル全体の安定
@@ -302,20 +302,20 @@ Andrew Morton が Linux-kernel メーリングリストにカーネルリリー
   実に認識されたバグの状況によりリリースされるのであり、前もって決めら
   れた計画によってリリースされるものではないからです。」
 
-2.6.x.y -stable カーネルツリー
+3.x.y -stable カーネルツリー
 ---------------------------
 
-バージョン番号が4つの数字に分かれているカーネルは -stable カーネルです。
-これには、2.6.x カーネルで見つかったセキュリティ問題や重大な後戻りに対
+バージョン番号が3つの数字に分かれているカーネルは -stable カーネルです。
+これには、3.x カーネルで見つかったセキュリティ問題や重大な後戻りに対
 する比較的小さい重要な修正が含まれます。
 
 これは、開発/実験的バージョンのテストに協力することに興味が無く、
 最新の安定したカーネルを使いたいユーザに推奨するブランチです。
 
-もし、2.6.x.y カーネルが存在しない場合には、番号が一番大きい 2.6.x が
+もし、3.x.y カーネルが存在しない場合には、番号が一番大きい 3.x が
 最新の安定版カーネルです。
 
-2.6.x.y は "stable" チーム <stable@kernel.org> でメンテされており、必
+3.x.y は "stable" チーム <stable@kernel.org> でメンテされており、必
 要に応じてリリースされます。通常のリリース期間は 2週間毎ですが、差し迫っ
 た問題がなければもう少し長くなることもあります。セキュリティ関連の問題
 の場合はこれに対してだいたいの場合、すぐにリリースがされます。
@@ -324,7 +324,7 @@ Andrew Morton が Linux-kernel メーリングリストにカーネルリリー
 イルにはどのような種類の変更が -stable ツリーに受け入れ可能か、またリ
 リースプロセスがどう動くかが記述されています。
 
-2.6.x -git パッチ
+3.x -git パッチ
 ------------------
 
 git リポジトリで管理されているLinus のカーネルツリーの毎日のスナップ
@@ -358,14 +358,14 @@ quilt シリーズとして公開されているパッチキューも使われ
 をつけることができます。大部分のこれらの patchwork のサイトは
 http://patchwork.kernel.org/ でリストされています。
 
-統合テストのための 2.6.x -next カーネルツリー
+統合テストのための 3.x -next カーネルツリー
 ---------------------------------------------
 
-サブシステムツリーの更新内容がメインラインの 2.6.x ツリーにマージされ
+サブシステムツリーの更新内容がメインラインの 3.x ツリーにマージされ
 る前に、それらは統合テストされる必要があります。この目的のため、実質的
 に全サブシステムツリーからほぼ毎日プルされてできる特別なテスト用のリ
 ポジトリが存在します-
-       http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git
+       http://git.kernel.org/?p=linux/kernel/git/next/linux-next.git
        http://linux.f-seidel.de/linux-next/pmwiki/
 
 このやり方によって、-next カーネルは次のマージ機会でどんなものがメイン
index bf61e04291abb2f2200544517ab144c89cb3ab8e..a26b10e52aea18cf39b61095cc563cd940ff1dac 100644 (file)
@@ -1642,7 +1642,7 @@ S:        Maintained
 F:     drivers/net/hamradio/baycom*
 
 BCACHE (BLOCK LAYER CACHE)
-M:     Kent Overstreet <koverstreet@google.com>
+M:     Kent Overstreet <kmo@daterainc.com>
 L:     linux-bcache@vger.kernel.org
 W:     http://bcache.evilpiepirate.org
 S:     Maintained:
@@ -3346,7 +3346,7 @@ F:        Documentation/firmware_class/
 F:     drivers/base/firmware*.c
 F:     include/linux/firmware.h
 
-FLASHSYSTEM DRIVER (IBM FlashSystem 70/80 PCI SSD Flash Card)
+FLASH ADAPTER DRIVER (IBM Flash Adapter 900GB Full Height PCI Flash Card)
 M:     Joshua Morris <josh.h.morris@us.ibm.com>
 M:     Philip Kelleher <pjk1939@linux.vnet.ibm.com>
 S:     Maintained
@@ -3622,11 +3622,9 @@ F:       drivers/isdn/gigaset/
 F:     include/uapi/linux/gigaset_dev.h
 
 GPIO SUBSYSTEM
-M:     Grant Likely <grant.likely@linaro.org>
 M:     Linus Walleij <linus.walleij@linaro.org>
 S:     Maintained
 L:     linux-gpio@vger.kernel.org
-T:     git git://git.secretlab.ca/git/linux-2.6.git
 F:     Documentation/gpio.txt
 F:     drivers/gpio/
 F:     include/linux/gpio*
@@ -4472,8 +4470,6 @@ F:        drivers/irqchip/
 
 IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY)
 M:     Benjamin Herrenschmidt <benh@kernel.crashing.org>
-M:     Grant Likely <grant.likely@linaro.org>
-T:     git git://git.secretlab.ca/git/linux-2.6.git irqdomain/next
 S:     Maintained
 F:     Documentation/IRQ-domain.txt
 F:     include/linux/irqdomain.h
@@ -4990,7 +4986,7 @@ F:        arch/powerpc/platforms/44x/
 
 LINUX FOR POWERPC EMBEDDED XILINX VIRTEX
 L:     linuxppc-dev@lists.ozlabs.org
-S:     Unmaintained
+S:     Orphan
 F:     arch/powerpc/*/*virtex*
 F:     arch/powerpc/*/*/*virtex*
 
@@ -5886,7 +5882,7 @@ OMAP DEVICE TREE SUPPORT
 M:     Benoît Cousson <b-cousson@ti.com>
 M:     Tony Lindgren <tony@atomide.com>
 L:     linux-omap@vger.kernel.org
-L:     devicetree-discuss@lists.ozlabs.org (moderated for non-subscribers)
+L:     devicetree@vger.kernel.org
 S:     Maintained
 F:     arch/arm/boot/dts/*omap*
 F:     arch/arm/boot/dts/*am3*
@@ -6050,17 +6046,28 @@ F:      drivers/i2c/busses/i2c-ocores.c
 OPEN FIRMWARE AND FLATTENED DEVICE TREE
 M:     Grant Likely <grant.likely@linaro.org>
 M:     Rob Herring <rob.herring@calxeda.com>
-L:     devicetree-discuss@lists.ozlabs.org (moderated for non-subscribers)
+L:     devicetree@vger.kernel.org
 W:     http://fdt.secretlab.ca
 T:     git git://git.secretlab.ca/git/linux-2.6.git
 S:     Maintained
-F:     Documentation/devicetree
-F:     drivers/of
+F:     drivers/of/
 F:     include/linux/of*.h
-F:     scripts/dtc
+F:     scripts/dtc/
 K:     of_get_property
 K:     of_match_table
 
+OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS
+M:     Rob Herring <rob.herring@calxeda.com>
+M:     Pawel Moll <pawel.moll@arm.com>
+M:     Mark Rutland <mark.rutland@arm.com>
+M:     Stephen Warren <swarren@wwwdotorg.org>
+M:     Ian Campbell <ian.campbell@citrix.com>
+L:     devicetree@vger.kernel.org
+S:     Maintained
+F:     Documentation/devicetree/
+F:     arch/*/boot/dts/
+F:     include/dt-bindings/
+
 OPENRISC ARCHITECTURE
 M:     Jonas Bonn <jonas@southpole.se>
 W:     http://openrisc.net
@@ -7746,7 +7753,6 @@ F:        drivers/clk/spear/
 
 SPI SUBSYSTEM
 M:     Mark Brown <broonie@kernel.org>
-M:     Grant Likely <grant.likely@linaro.org>
 L:     linux-spi@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git
 Q:     http://patchwork.kernel.org/project/spi-devel-general/list/
@@ -7812,7 +7818,7 @@ F:        drivers/staging/asus_oled/
 
 STAGING - COMEDI
 M:     Ian Abbott <abbotti@mev.co.uk>
-M:     Mori Hess <fmhess@users.sourceforge.net>
+M:     H Hartley Sweeten <hsweeten@visionengravers.com>
 S:     Odd Fixes
 F:     drivers/staging/comedi/
 
@@ -9288,7 +9294,7 @@ S:        Maintained
 F:     drivers/net/ethernet/xilinx/xilinx_axienet*
 
 XILINX SYSTEMACE DRIVER
-S:     Unmaintained
+S:     Orphan
 F:     drivers/block/xsysace.c
 
 XILINX UARTLITE SERIAL DRIVER
index 837a1f2d8b965a60b61252521f21129be9e80afe..082d9b4b54723d0253e7ca3355387886b81f72f4 100644 (file)
@@ -15,6 +15,7 @@ config ALPHA
        select ARCH_WANT_OPTIONAL_GPIOLIB
        select ARCH_WANT_IPC_PARSE_VERSION
        select ARCH_HAVE_NMI_SAFE_CMPXCHG
+       select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
        select GENERIC_SMP_IDLE_THREAD
        select GENERIC_CMOS_UPDATE
        select GENERIC_STRNCPY_FROM_USER
index c2cbe4fc391cd7d77d319cb0cbeabd19f7e5ecbb..78b03ef39f6f0c2960e0b9c4b45eeae47eff07fa 100644 (file)
@@ -186,17 +186,24 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v)
  */
 static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 {
-       int c, old;
-       c = atomic_read(v);
-       for (;;) {
-               if (unlikely(c == (u)))
-                       break;
-               old = atomic_cmpxchg((v), c, c + (a));
-               if (likely(old == c))
-                       break;
-               c = old;
-       }
-       return c;
+       int c, new, old;
+       smp_mb();
+       __asm__ __volatile__(
+       "1:     ldl_l   %[old],%[mem]\n"
+       "       cmpeq   %[old],%[u],%[c]\n"
+       "       addl    %[old],%[a],%[new]\n"
+       "       bne     %[c],2f\n"
+       "       stl_c   %[new],%[mem]\n"
+       "       beq     %[new],3f\n"
+       "2:\n"
+       ".subsection 2\n"
+       "3:     br      1b\n"
+       ".previous"
+       : [old] "=&r"(old), [new] "=&r"(new), [c] "=&r"(c)
+       : [mem] "m"(*v), [a] "rI"(a), [u] "rI"((long)u)
+       : "memory");
+       smp_mb();
+       return old;
 }
 
 
@@ -207,21 +214,56 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
  * @u: ...unless v is equal to u.
  *
  * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
+ * Returns true iff @v was not @u.
  */
 static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 {
-       long c, old;
-       c = atomic64_read(v);
-       for (;;) {
-               if (unlikely(c == (u)))
-                       break;
-               old = atomic64_cmpxchg((v), c, c + (a));
-               if (likely(old == c))
-                       break;
-               c = old;
-       }
-       return c != (u);
+       long c, tmp;
+       smp_mb();
+       __asm__ __volatile__(
+       "1:     ldq_l   %[tmp],%[mem]\n"
+       "       cmpeq   %[tmp],%[u],%[c]\n"
+       "       addq    %[tmp],%[a],%[tmp]\n"
+       "       bne     %[c],2f\n"
+       "       stq_c   %[tmp],%[mem]\n"
+       "       beq     %[tmp],3f\n"
+       "2:\n"
+       ".subsection 2\n"
+       "3:     br      1b\n"
+       ".previous"
+       : [tmp] "=&r"(tmp), [c] "=&r"(c)
+       : [mem] "m"(*v), [a] "rI"(a), [u] "rI"(u)
+       : "memory");
+       smp_mb();
+       return !c;
+}
+
+/*
+ * atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+static inline long atomic64_dec_if_positive(atomic64_t *v)
+{
+       long old, tmp;
+       smp_mb();
+       __asm__ __volatile__(
+       "1:     ldq_l   %[old],%[mem]\n"
+       "       subq    %[old],1,%[tmp]\n"
+       "       ble     %[old],2f\n"
+       "       stq_c   %[tmp],%[mem]\n"
+       "       beq     %[tmp],3f\n"
+       "2:\n"
+       ".subsection 2\n"
+       "3:     br      1b\n"
+       ".previous"
+       : [old] "=&r"(old), [tmp] "=&r"(tmp)
+       : [mem] "m"(*v)
+       : "memory");
+       smp_mb();
+       return old - 1;
 }
 
 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
index bf46af51941b058ad6b0de0df4156f796bbb5897..a5b68b268bcf34835a6376d8b8e2cb349d03feed 100644 (file)
@@ -3,7 +3,9 @@
 
 #include <uapi/asm/param.h>
 
-#define HZ             CONFIG_HZ
-#define USER_HZ                HZ
-# define CLOCKS_PER_SEC        HZ      /* frequency at which times() counts */
+# undef HZ
+# define HZ            CONFIG_HZ
+# define USER_HZ       1024
+# define CLOCKS_PER_SEC        USER_HZ /* frequency at which times() counts */
+
 #endif /* _ASM_ALPHA_PARAM_H */
index 3bba21e41b818e1ff8fb7f346721dc8e95f93950..37b570d01202e4ee62d5dcc11af345f1805e81a0 100644 (file)
@@ -168,8 +168,4 @@ static inline void arch_write_unlock(arch_rwlock_t * lock)
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
-#define arch_spin_relax(lock)  cpu_relax()
-#define arch_read_relax(lock)  cpu_relax()
-#define arch_write_relax(lock) cpu_relax()
-
 #endif /* _ALPHA_SPINLOCK_H */
index 43baee17acdf43c065bc6a9bd59b2ffa2c391f80..f2c94402e2c8b95028bace631c3cbf6f2aeca5b1 100644 (file)
@@ -3,8 +3,7 @@
 
 #include <uapi/asm/unistd.h>
 
-
-#define NR_SYSCALLS                    506
+#define NR_SYSCALLS                    508
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_STAT64
index 29daed819ebd0bd56bf3ca4fda4500cf4aaadfa4..dbcd9834af6d7f58eaad342356eee7a7dd12c5d2 100644 (file)
@@ -1,13 +1,7 @@
 #ifndef _UAPI_ASM_ALPHA_PARAM_H
 #define _UAPI_ASM_ALPHA_PARAM_H
 
-/* ??? Gross.  I don't want to parameterize this, and supposedly the
-   hardware ignores reprogramming.  We also need userland buy-in to the 
-   change in HZ, since this is visible in the wait4 resources etc.  */
-
-#ifndef __KERNEL__
 #define HZ             1024
-#endif
 
 #define EXEC_PAGESIZE  8192
 
@@ -17,5 +11,4 @@
 
 #define MAXHOSTNAMELEN 64      /* max length of hostname */
 
-
 #endif /* _UAPI_ASM_ALPHA_PARAM_H */
index 801d28bcea51f61c3b45a6fcd4504f01bc96b792..53ae7bb1bfd1b3841572ae69587b80dddd9c0657 100644 (file)
 #define __NR_sendmmsg                  503
 #define __NR_process_vm_readv          504
 #define __NR_process_vm_writev         505
+#define __NR_kcmp                      506
+#define __NR_finit_module              507
 
 #endif /* _UAPI_ALPHA_UNISTD_H */
index f62a994ef1263c1d9bea762371f7b797d5225cd3..a969b95ee5ac78a64d79dd4ffa815b4b4a0f5e98 100644 (file)
 
        .text
        .set noat
+       .cfi_sections   .debug_frame
 
 /* Stack offsets.  */
 #define SP_OFF                 184
 #define SWITCH_STACK_SIZE      320
 
+.macro CFI_START_OSF_FRAME     func
+       .align  4
+       .globl  \func
+       .type   \func,@function
+\func:
+       .cfi_startproc simple
+       .cfi_return_column 64
+       .cfi_def_cfa    $sp, 48
+       .cfi_rel_offset 64, 8
+       .cfi_rel_offset $gp, 16
+       .cfi_rel_offset $16, 24
+       .cfi_rel_offset $17, 32
+       .cfi_rel_offset $18, 40
+.endm
+
+.macro CFI_END_OSF_FRAME       func
+       .cfi_endproc
+       .size   \func, . - \func
+.endm
+
 /*
  * This defines the normal kernel pt-regs layout.
  *
  * the palcode-provided values are available to the signal handler.
  */
 
-#define SAVE_ALL                       \
-       subq    $sp, SP_OFF, $sp;       \
-       stq     $0, 0($sp);             \
-       stq     $1, 8($sp);             \
-       stq     $2, 16($sp);            \
-       stq     $3, 24($sp);            \
-       stq     $4, 32($sp);            \
-       stq     $28, 144($sp);          \
-       lda     $2, alpha_mv;           \
-       stq     $5, 40($sp);            \
-       stq     $6, 48($sp);            \
-       stq     $7, 56($sp);            \
-       stq     $8, 64($sp);            \
-       stq     $19, 72($sp);           \
-       stq     $20, 80($sp);           \
-       stq     $21, 88($sp);           \
-       ldq     $2, HAE_CACHE($2);      \
-       stq     $22, 96($sp);           \
-       stq     $23, 104($sp);          \
-       stq     $24, 112($sp);          \
-       stq     $25, 120($sp);          \
-       stq     $26, 128($sp);          \
-       stq     $27, 136($sp);          \
-       stq     $2, 152($sp);           \
-       stq     $16, 160($sp);          \
-       stq     $17, 168($sp);          \
+.macro SAVE_ALL
+       subq    $sp, SP_OFF, $sp
+       .cfi_adjust_cfa_offset  SP_OFF
+       stq     $0, 0($sp)
+       stq     $1, 8($sp)
+       stq     $2, 16($sp)
+       stq     $3, 24($sp)
+       stq     $4, 32($sp)
+       stq     $28, 144($sp)
+       .cfi_rel_offset $0, 0
+       .cfi_rel_offset $1, 8
+       .cfi_rel_offset $2, 16
+       .cfi_rel_offset $3, 24
+       .cfi_rel_offset $4, 32
+       .cfi_rel_offset $28, 144
+       lda     $2, alpha_mv
+       stq     $5, 40($sp)
+       stq     $6, 48($sp)
+       stq     $7, 56($sp)
+       stq     $8, 64($sp)
+       stq     $19, 72($sp)
+       stq     $20, 80($sp)
+       stq     $21, 88($sp)
+       ldq     $2, HAE_CACHE($2)
+       stq     $22, 96($sp)
+       stq     $23, 104($sp)
+       stq     $24, 112($sp)
+       stq     $25, 120($sp)
+       stq     $26, 128($sp)
+       stq     $27, 136($sp)
+       stq     $2, 152($sp)
+       stq     $16, 160($sp)
+       stq     $17, 168($sp)
        stq     $18, 176($sp)
+       .cfi_rel_offset $5, 40
+       .cfi_rel_offset $6, 48
+       .cfi_rel_offset $7, 56
+       .cfi_rel_offset $8, 64
+       .cfi_rel_offset $19, 72
+       .cfi_rel_offset $20, 80
+       .cfi_rel_offset $21, 88
+       .cfi_rel_offset $22, 96
+       .cfi_rel_offset $23, 104
+       .cfi_rel_offset $24, 112
+       .cfi_rel_offset $25, 120
+       .cfi_rel_offset $26, 128
+       .cfi_rel_offset $27, 136
+.endm
 
-#define RESTORE_ALL                    \
-       lda     $19, alpha_mv;          \
-       ldq     $0, 0($sp);             \
-       ldq     $1, 8($sp);             \
-       ldq     $2, 16($sp);            \
-       ldq     $3, 24($sp);            \
-       ldq     $21, 152($sp);          \
-       ldq     $20, HAE_CACHE($19);    \
-       ldq     $4, 32($sp);            \
-       ldq     $5, 40($sp);            \
-       ldq     $6, 48($sp);            \
-       ldq     $7, 56($sp);            \
-       subq    $20, $21, $20;          \
-       ldq     $8, 64($sp);            \
-       beq     $20, 99f;               \
-       ldq     $20, HAE_REG($19);      \
-       stq     $21, HAE_CACHE($19);    \
-       stq     $21, 0($20);            \
-99:;                                   \
-       ldq     $19, 72($sp);           \
-       ldq     $20, 80($sp);           \
-       ldq     $21, 88($sp);           \
-       ldq     $22, 96($sp);           \
-       ldq     $23, 104($sp);          \
-       ldq     $24, 112($sp);          \
-       ldq     $25, 120($sp);          \
-       ldq     $26, 128($sp);          \
-       ldq     $27, 136($sp);          \
-       ldq     $28, 144($sp);          \
+.macro RESTORE_ALL
+       lda     $19, alpha_mv
+       ldq     $0, 0($sp)
+       ldq     $1, 8($sp)
+       ldq     $2, 16($sp)
+       ldq     $3, 24($sp)
+       ldq     $21, 152($sp)
+       ldq     $20, HAE_CACHE($19)
+       ldq     $4, 32($sp)
+       ldq     $5, 40($sp)
+       ldq     $6, 48($sp)
+       ldq     $7, 56($sp)
+       subq    $20, $21, $20
+       ldq     $8, 64($sp)
+       beq     $20, 99f
+       ldq     $20, HAE_REG($19)
+       stq     $21, HAE_CACHE($19)
+       stq     $21, 0($20)
+99:    ldq     $19, 72($sp)
+       ldq     $20, 80($sp)
+       ldq     $21, 88($sp)
+       ldq     $22, 96($sp)
+       ldq     $23, 104($sp)
+       ldq     $24, 112($sp)
+       ldq     $25, 120($sp)
+       ldq     $26, 128($sp)
+       ldq     $27, 136($sp)
+       ldq     $28, 144($sp)
        addq    $sp, SP_OFF, $sp
+       .cfi_restore    $0
+       .cfi_restore    $1
+       .cfi_restore    $2
+       .cfi_restore    $3
+       .cfi_restore    $4
+       .cfi_restore    $5
+       .cfi_restore    $6
+       .cfi_restore    $7
+       .cfi_restore    $8
+       .cfi_restore    $19
+       .cfi_restore    $20
+       .cfi_restore    $21
+       .cfi_restore    $22
+       .cfi_restore    $23
+       .cfi_restore    $24
+       .cfi_restore    $25
+       .cfi_restore    $26
+       .cfi_restore    $27
+       .cfi_restore    $28
+       .cfi_adjust_cfa_offset  -SP_OFF
+.endm
+
+.macro DO_SWITCH_STACK
+       bsr     $1, do_switch_stack
+       .cfi_adjust_cfa_offset  SWITCH_STACK_SIZE
+       .cfi_rel_offset $9, 0
+       .cfi_rel_offset $10, 8
+       .cfi_rel_offset $11, 16
+       .cfi_rel_offset $12, 24
+       .cfi_rel_offset $13, 32
+       .cfi_rel_offset $14, 40
+       .cfi_rel_offset $15, 48
+       /* We don't really care about the FP registers for debugging.  */
+.endm
+
+.macro UNDO_SWITCH_STACK
+       bsr     $1, undo_switch_stack
+       .cfi_restore    $9
+       .cfi_restore    $10
+       .cfi_restore    $11
+       .cfi_restore    $12
+       .cfi_restore    $13
+       .cfi_restore    $14
+       .cfi_restore    $15
+       .cfi_adjust_cfa_offset  -SWITCH_STACK_SIZE
+.endm
 
 /*
  * Non-syscall kernel entry points.
  */
 
-       .align  4
-       .globl  entInt
-       .ent    entInt
-entInt:
+CFI_START_OSF_FRAME entInt
        SAVE_ALL
        lda     $8, 0x3fff
        lda     $26, ret_from_sys_call
        bic     $sp, $8, $8
        mov     $sp, $19
        jsr     $31, do_entInt
-.end entInt
+CFI_END_OSF_FRAME entInt
 
-       .align  4
-       .globl  entArith
-       .ent    entArith
-entArith:
+CFI_START_OSF_FRAME entArith
        SAVE_ALL
        lda     $8, 0x3fff
        lda     $26, ret_from_sys_call
        bic     $sp, $8, $8
        mov     $sp, $18
        jsr     $31, do_entArith
-.end entArith
+CFI_END_OSF_FRAME entArith
 
-       .align  4
-       .globl  entMM
-       .ent    entMM
-entMM:
+CFI_START_OSF_FRAME entMM
        SAVE_ALL
 /* save $9 - $15 so the inline exception code can manipulate them.  */
        subq    $sp, 56, $sp
+       .cfi_adjust_cfa_offset  56
        stq     $9, 0($sp)
        stq     $10, 8($sp)
        stq     $11, 16($sp)
@@ -128,6 +207,13 @@ entMM:
        stq     $13, 32($sp)
        stq     $14, 40($sp)
        stq     $15, 48($sp)
+       .cfi_rel_offset $9, 0
+       .cfi_rel_offset $10, 8
+       .cfi_rel_offset $11, 16
+       .cfi_rel_offset $12, 24
+       .cfi_rel_offset $13, 32
+       .cfi_rel_offset $14, 40
+       .cfi_rel_offset $15, 48
        addq    $sp, 56, $19
 /* handle the fault */
        lda     $8, 0x3fff
@@ -142,28 +228,33 @@ entMM:
        ldq     $14, 40($sp)
        ldq     $15, 48($sp)
        addq    $sp, 56, $sp
+       .cfi_restore    $9
+       .cfi_restore    $10
+       .cfi_restore    $11
+       .cfi_restore    $12
+       .cfi_restore    $13
+       .cfi_restore    $14
+       .cfi_restore    $15
+       .cfi_adjust_cfa_offset  -56
 /* finish up the syscall as normal.  */
        br      ret_from_sys_call
-.end entMM
+CFI_END_OSF_FRAME entMM
 
-       .align  4
-       .globl  entIF
-       .ent    entIF
-entIF:
+CFI_START_OSF_FRAME entIF
        SAVE_ALL
        lda     $8, 0x3fff
        lda     $26, ret_from_sys_call
        bic     $sp, $8, $8
        mov     $sp, $17
        jsr     $31, do_entIF
-.end entIF
+CFI_END_OSF_FRAME entIF
 
-       .align  4
-       .globl  entUna
-       .ent    entUna
-entUna:
+CFI_START_OSF_FRAME entUna
        lda     $sp, -256($sp)
+       .cfi_adjust_cfa_offset  256
        stq     $0, 0($sp)
+       .cfi_rel_offset $0, 0
+       .cfi_remember_state
        ldq     $0, 256($sp)    /* get PS */
        stq     $1, 8($sp)
        stq     $2, 16($sp)
@@ -195,6 +286,32 @@ entUna:
        stq     $28, 224($sp)
        mov     $sp, $19
        stq     $gp, 232($sp)
+       .cfi_rel_offset $1, 1*8
+       .cfi_rel_offset $2, 2*8
+       .cfi_rel_offset $3, 3*8
+       .cfi_rel_offset $4, 4*8
+       .cfi_rel_offset $5, 5*8
+       .cfi_rel_offset $6, 6*8
+       .cfi_rel_offset $7, 7*8
+       .cfi_rel_offset $8, 8*8
+       .cfi_rel_offset $9, 9*8
+       .cfi_rel_offset $10, 10*8
+       .cfi_rel_offset $11, 11*8
+       .cfi_rel_offset $12, 12*8
+       .cfi_rel_offset $13, 13*8
+       .cfi_rel_offset $14, 14*8
+       .cfi_rel_offset $15, 15*8
+       .cfi_rel_offset $19, 19*8
+       .cfi_rel_offset $20, 20*8
+       .cfi_rel_offset $21, 21*8
+       .cfi_rel_offset $22, 22*8
+       .cfi_rel_offset $23, 23*8
+       .cfi_rel_offset $24, 24*8
+       .cfi_rel_offset $25, 25*8
+       .cfi_rel_offset $26, 26*8
+       .cfi_rel_offset $27, 27*8
+       .cfi_rel_offset $28, 28*8
+       .cfi_rel_offset $29, 29*8
        lda     $8, 0x3fff
        stq     $31, 248($sp)
        bic     $sp, $8, $8
@@ -228,16 +345,45 @@ entUna:
        ldq     $28, 224($sp)
        ldq     $gp, 232($sp)
        lda     $sp, 256($sp)
+       .cfi_restore    $1
+       .cfi_restore    $2
+       .cfi_restore    $3
+       .cfi_restore    $4
+       .cfi_restore    $5
+       .cfi_restore    $6
+       .cfi_restore    $7
+       .cfi_restore    $8
+       .cfi_restore    $9
+       .cfi_restore    $10
+       .cfi_restore    $11
+       .cfi_restore    $12
+       .cfi_restore    $13
+       .cfi_restore    $14
+       .cfi_restore    $15
+       .cfi_restore    $19
+       .cfi_restore    $20
+       .cfi_restore    $21
+       .cfi_restore    $22
+       .cfi_restore    $23
+       .cfi_restore    $24
+       .cfi_restore    $25
+       .cfi_restore    $26
+       .cfi_restore    $27
+       .cfi_restore    $28
+       .cfi_restore    $29
+       .cfi_adjust_cfa_offset  -256
        call_pal PAL_rti
-.end entUna
 
        .align  4
-       .ent    entUnaUser
 entUnaUser:
+       .cfi_restore_state
        ldq     $0, 0($sp)      /* restore original $0 */
        lda     $sp, 256($sp)   /* pop entUna's stack frame */
+       .cfi_restore    $0
+       .cfi_adjust_cfa_offset  -256
        SAVE_ALL                /* setup normal kernel stack */
        lda     $sp, -56($sp)
+       .cfi_adjust_cfa_offset  56
        stq     $9, 0($sp)
        stq     $10, 8($sp)
        stq     $11, 16($sp)
@@ -245,6 +391,13 @@ entUnaUser:
        stq     $13, 32($sp)
        stq     $14, 40($sp)
        stq     $15, 48($sp)
+       .cfi_rel_offset $9, 0
+       .cfi_rel_offset $10, 8
+       .cfi_rel_offset $11, 16
+       .cfi_rel_offset $12, 24
+       .cfi_rel_offset $13, 32
+       .cfi_rel_offset $14, 40
+       .cfi_rel_offset $15, 48
        lda     $8, 0x3fff
        addq    $sp, 56, $19
        bic     $sp, $8, $8
@@ -257,20 +410,25 @@ entUnaUser:
        ldq     $14, 40($sp)
        ldq     $15, 48($sp)
        lda     $sp, 56($sp)
+       .cfi_restore    $9
+       .cfi_restore    $10
+       .cfi_restore    $11
+       .cfi_restore    $12
+       .cfi_restore    $13
+       .cfi_restore    $14
+       .cfi_restore    $15
+       .cfi_adjust_cfa_offset  -56
        br      ret_from_sys_call
-.end entUnaUser
+CFI_END_OSF_FRAME entUna
 
-       .align  4
-       .globl  entDbg
-       .ent    entDbg
-entDbg:
+CFI_START_OSF_FRAME entDbg
        SAVE_ALL
        lda     $8, 0x3fff
        lda     $26, ret_from_sys_call
        bic     $sp, $8, $8
        mov     $sp, $16
        jsr     $31, do_entDbg
-.end entDbg
+CFI_END_OSF_FRAME entDbg
 \f
 /*
  * The system call entry point is special.  Most importantly, it looks
@@ -285,8 +443,12 @@ entDbg:
 
        .align  4
        .globl  entSys
-       .globl  ret_from_sys_call
-       .ent    entSys
+       .type   entSys, @function
+       .cfi_startproc simple
+       .cfi_return_column 64
+       .cfi_def_cfa    $sp, 48
+       .cfi_rel_offset 64, 8
+       .cfi_rel_offset $gp, 16
 entSys:
        SAVE_ALL
        lda     $8, 0x3fff
@@ -300,6 +462,9 @@ entSys:
        stq     $17, SP_OFF+32($sp)
        s8addq  $0, $5, $5
        stq     $18, SP_OFF+40($sp)
+       .cfi_rel_offset $16, SP_OFF+24
+       .cfi_rel_offset $17, SP_OFF+32
+       .cfi_rel_offset $18, SP_OFF+40
        blbs    $3, strace
        beq     $4, 1f
        ldq     $27, 0($5)
@@ -310,6 +475,7 @@ entSys:
        stq     $31, 72($sp)            /* a3=0 => no error */
 
        .align  4
+       .globl  ret_from_sys_call
 ret_from_sys_call:
        cmovne  $26, 0, $18             /* $18 = 0 => non-restartable */
        ldq     $0, SP_OFF($sp)
@@ -324,10 +490,12 @@ ret_to_user:
        and     $17, _TIF_WORK_MASK, $2
        bne     $2, work_pending
 restore_all:
+       .cfi_remember_state
        RESTORE_ALL
        call_pal PAL_rti
 
 ret_to_kernel:
+       .cfi_restore_state
        lda     $16, 7
        call_pal PAL_swpipl
        br restore_all
@@ -356,7 +524,6 @@ $ret_success:
        stq     $0, 0($sp)
        stq     $31, 72($sp)    /* a3=0 => no error */
        br      ret_from_sys_call
-.end entSys
 
 /*
  * Do all cleanup when returning from all interrupts and system calls.
@@ -370,7 +537,7 @@ $ret_success:
  */
 
        .align  4
-       .ent    work_pending
+       .type   work_pending, @function
 work_pending:
        and     $17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2
        bne     $2, $work_notifysig
@@ -387,23 +554,22 @@ $work_resched:
 
 $work_notifysig:
        mov     $sp, $16
-       bsr     $1, do_switch_stack
+       DO_SWITCH_STACK
        jsr     $26, do_work_pending
-       bsr     $1, undo_switch_stack
+       UNDO_SWITCH_STACK
        br      restore_all
-.end work_pending
 
 /*
  * PTRACE syscall handler
  */
 
        .align  4
-       .ent    strace
+       .type   strace, @function
 strace:
        /* set up signal stack, call syscall_trace */
-       bsr     $1, do_switch_stack
+       DO_SWITCH_STACK
        jsr     $26, syscall_trace_enter /* returns the syscall number */
-       bsr     $1, undo_switch_stack
+       UNDO_SWITCH_STACK
 
        /* get the arguments back.. */
        ldq     $16, SP_OFF+24($sp)
@@ -431,9 +597,9 @@ ret_from_straced:
 $strace_success:
        stq     $0, 0($sp)              /* save return value */
 
-       bsr     $1, do_switch_stack
+       DO_SWITCH_STACK
        jsr     $26, syscall_trace_leave
-       bsr     $1, undo_switch_stack
+       UNDO_SWITCH_STACK
        br      $31, ret_from_sys_call
 
        .align  3
@@ -447,26 +613,31 @@ $strace_error:
        stq     $0, 0($sp)
        stq     $1, 72($sp)     /* a3 for return */
 
-       bsr     $1, do_switch_stack
+       DO_SWITCH_STACK
        mov     $18, $9         /* save old syscall number */
        mov     $19, $10        /* save old a3 */
        jsr     $26, syscall_trace_leave
        mov     $9, $18
        mov     $10, $19
-       bsr     $1, undo_switch_stack
+       UNDO_SWITCH_STACK
 
        mov     $31, $26        /* tell "ret_from_sys_call" we can restart */
        br      ret_from_sys_call
-.end strace
+CFI_END_OSF_FRAME entSys
 \f
 /*
  * Save and restore the switch stack -- aka the balance of the user context.
  */
 
        .align  4
-       .ent    do_switch_stack
+       .type   do_switch_stack, @function
+       .cfi_startproc simple
+       .cfi_return_column 64
+       .cfi_def_cfa $sp, 0
+       .cfi_register 64, $1
 do_switch_stack:
        lda     $sp, -SWITCH_STACK_SIZE($sp)
+       .cfi_adjust_cfa_offset  SWITCH_STACK_SIZE
        stq     $9, 0($sp)
        stq     $10, 8($sp)
        stq     $11, 16($sp)
@@ -510,10 +681,14 @@ do_switch_stack:
        stt     $f0, 312($sp)   # save fpcr in slot of $f31
        ldt     $f0, 64($sp)    # dont let "do_switch_stack" change fp state.
        ret     $31, ($1), 1
-.end do_switch_stack
+       .cfi_endproc
+       .size   do_switch_stack, .-do_switch_stack
 
        .align  4
-       .ent    undo_switch_stack
+       .type   undo_switch_stack, @function
+       .cfi_startproc simple
+       .cfi_def_cfa $sp, 0
+       .cfi_register 64, $1
 undo_switch_stack:
        ldq     $9, 0($sp)
        ldq     $10, 8($sp)
@@ -558,7 +733,8 @@ undo_switch_stack:
        ldt     $f30, 304($sp)
        lda     $sp, SWITCH_STACK_SIZE($sp)
        ret     $31, ($1), 1
-.end undo_switch_stack
+       .cfi_endproc
+       .size   undo_switch_stack, .-undo_switch_stack
 \f
 /*
  * The meat of the context switch code.
@@ -566,17 +742,18 @@ undo_switch_stack:
 
        .align  4
        .globl  alpha_switch_to
-       .ent    alpha_switch_to
+       .type   alpha_switch_to, @function
+       .cfi_startproc
 alpha_switch_to:
-       .prologue 0
-       bsr     $1, do_switch_stack
+       DO_SWITCH_STACK
        call_pal PAL_swpctx
        lda     $8, 0x3fff
-       bsr     $1, undo_switch_stack
+       UNDO_SWITCH_STACK
        bic     $sp, $8, $8
        mov     $17, $0
        ret
-.end alpha_switch_to
+       .cfi_endproc
+       .size   alpha_switch_to, .-alpha_switch_to
 
 /*
  * New processes begin life here.
index f433fc11877a30ae096dd54d263b37b094132fac..28e4429596f3f208c1b868887112bad4faa33987 100644 (file)
@@ -236,7 +236,7 @@ void __init
 init_rtc_irq(void)
 {
        irq_set_chip_and_handler_name(RTC_IRQ, &dummy_irq_chip,
-                                     handle_simple_irq, "RTC");
+                                     handle_percpu_irq, "RTC");
        setup_irq(RTC_IRQ, &timer_irqaction);
 }
 
index 53b18a620e1c2a72a91bfa885653d81263a6ac7e..9dbbcb3b914675f80e3e0f097ced7e4545b533f9 100644 (file)
@@ -264,9 +264,10 @@ recv_secondary_console_msg(void)
                if (cnt <= 0 || cnt >= 80)
                        strcpy(buf, "<<< BOGUS MSG >>>");
                else {
-                       cp1 = (char *) &cpu->ipc_buffer[11];
+                       cp1 = (char *) &cpu->ipc_buffer[1];
                        cp2 = buf;
-                       strcpy(cp2, cp1);
+                       memcpy(cp2, cp1, cnt);
+                       cp2[cnt] = '\0';
                        
                        while ((cp2 = strchr(cp2, '\r')) != 0) {
                                *cp2 = ' ';
index 5bf401f7ea97f5379067facc21762c9dfdd1732d..6c35159bc00eb47860516bda8868801a74bbe868 100644 (file)
@@ -190,9 +190,6 @@ static struct irq_chip clipper_irq_type = {
 static void
 dp264_device_interrupt(unsigned long vector)
 {
-#if 1
-       printk("dp264_device_interrupt: NOT IMPLEMENTED YET!!\n");
-#else
        unsigned long pld;
        unsigned int i;
 
@@ -210,12 +207,7 @@ dp264_device_interrupt(unsigned long vector)
                        isa_device_interrupt(vector);
                else
                        handle_irq(16 + i);
-#if 0
-               TSUNAMI_cchip->dir0.csr = 1UL << i; mb();
-               tmp = TSUNAMI_cchip->dir0.csr;
-#endif
        }
-#endif
 }
 
 static void 
index 407accc808777bc4a6f36fa6b912b92686d09a0b..c92e389ff2192973f95f51073a5ce50e5f132a3b 100644 (file)
@@ -317,8 +317,9 @@ marvel_init_irq(void)
 }
 
 static int 
-marvel_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
+marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin)
 {
+       struct pci_dev *dev = (struct pci_dev *)cdev;
        struct pci_controller *hose = dev->sysdata;
        struct io7_port *io7_port = hose->sysdata;
        struct io7 *io7 = io7_port->io7;
index 4284ec798ec94c89ebe0e4b05e1eb2d1c6dbb0ed..dca9b3fb0071d8c24c1c5e1048a0da8461e12d8e 100644 (file)
@@ -524,6 +524,8 @@ sys_call_table:
        .quad sys_sendmmsg
        .quad sys_process_vm_readv
        .quad sys_process_vm_writev             /* 505 */
+       .quad sys_kcmp
+       .quad sys_finit_module
 
        .size sys_call_table, . - sys_call_table
        .type sys_call_table, @object
index e336694ca042127c39a4cef780dee557b2f068cd..ea3395036556ceea61c74c08452d5e7a7061e566 100644 (file)
@@ -105,9 +105,7 @@ void arch_irq_work_raise(void)
 
 static inline __u32 rpcc(void)
 {
-    __u32 result;
-    asm volatile ("rpcc %0" : "=r"(result));
-    return result;
+       return __builtin_alpha_rpcc();
 }
 
 int update_persistent_clock(struct timespec now)
index be1fba334bd07c04d42f3f004725b8d25b26841d..bd0665cdc840d3e9a79b752ec3a88279ede69344 100644 (file)
@@ -66,8 +66,8 @@ dik_show_regs(struct pt_regs *regs, unsigned long *r9_15)
 {
        printk("pc = [<%016lx>]  ra = [<%016lx>]  ps = %04lx    %s\n",
               regs->pc, regs->r26, regs->ps, print_tainted());
-       print_symbol("pc is at %s\n", regs->pc);
-       print_symbol("ra is at %s\n", regs->r26 );
+       printk("pc is at %pSR\n", (void *)regs->pc);
+       printk("ra is at %pSR\n", (void *)regs->r26);
        printk("v0 = %016lx  t0 = %016lx  t1 = %016lx\n",
               regs->r0, regs->r1, regs->r2);
        printk("t2 = %016lx  t3 = %016lx  t4 = %016lx\n",
@@ -132,9 +132,7 @@ dik_show_trace(unsigned long *sp)
                        continue;
                if (tmp >= (unsigned long) &_etext)
                        continue;
-               printk("[<%lx>]", tmp);
-               print_symbol(" %s", tmp);
-               printk("\n");
+               printk("[<%lx>] %pSR\n", tmp, (void *)tmp);
                if (i > 40) {
                        printk(" ...");
                        break;
index 09a8743143f37a046e8d57e3d65f58f9a1bd3cb4..d3e5e9bc8f946fa3fd7158a3ecc931561c71c1a3 100644 (file)
@@ -55,6 +55,8 @@ struct device_node;
 #define EEH_PE_RECOVERING      (1 << 1)        /* Recovering PE        */
 #define EEH_PE_PHB_DEAD                (1 << 2)        /* Dead PHB             */
 
+#define EEH_PE_KEEP            (1 << 8)        /* Keep PE on hotplug   */
+
 struct eeh_pe {
        int type;                       /* PE type: PHB/Bus/Device      */
        int state;                      /* PE EEH dependent mode        */
@@ -72,8 +74,8 @@ struct eeh_pe {
        struct list_head child;         /* Child PEs                    */
 };
 
-#define eeh_pe_for_each_dev(pe, edev) \
-               list_for_each_entry(edev, &pe->edevs, list)
+#define eeh_pe_for_each_dev(pe, edev, tmp) \
+               list_for_each_entry_safe(edev, tmp, &pe->edevs, list)
 
 /*
  * The struct is used to trace EEH state for the associated
@@ -82,7 +84,13 @@ struct eeh_pe {
  * another tree except the currently existing tree of PCI
  * buses and PCI devices
  */
-#define EEH_DEV_IRQ_DISABLED   (1<<0)  /* Interrupt disabled           */
+#define EEH_DEV_BRIDGE         (1 << 0)        /* PCI bridge           */
+#define EEH_DEV_ROOT_PORT      (1 << 1)        /* PCIe root port       */
+#define EEH_DEV_DS_PORT                (1 << 2)        /* Downstream port      */
+#define EEH_DEV_IRQ_DISABLED   (1 << 3)        /* Interrupt disabled   */
+#define EEH_DEV_DISCONNECTED   (1 << 4)        /* Removing from PE     */
+
+#define EEH_DEV_SYSFS          (1 << 8)        /* Sysfs created        */
 
 struct eeh_dev {
        int mode;                       /* EEH mode                     */
@@ -90,11 +98,13 @@ struct eeh_dev {
        int config_addr;                /* Config address               */
        int pe_config_addr;             /* PE config address            */
        u32 config_space[16];           /* Saved PCI config space       */
+       u8 pcie_cap;                    /* Saved PCIe capability        */
        struct eeh_pe *pe;              /* Associated PE                */
        struct list_head list;          /* Form link list in the PE     */
        struct pci_controller *phb;     /* Associated PHB               */
        struct device_node *dn;         /* Associated device node       */
        struct pci_dev *pdev;           /* Associated PCI device        */
+       struct pci_bus *bus;            /* PCI bus for partial hotplug  */
 };
 
 static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev)
@@ -193,8 +203,10 @@ int eeh_phb_pe_create(struct pci_controller *phb);
 struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
 struct eeh_pe *eeh_pe_get(struct eeh_dev *edev);
 int eeh_add_to_parent_pe(struct eeh_dev *edev);
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe);
+int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
 void eeh_pe_update_time_stamp(struct eeh_pe *pe);
+void *eeh_pe_traverse(struct eeh_pe *root,
+               eeh_traverse_func fn, void *flag);
 void *eeh_pe_dev_traverse(struct eeh_pe *root,
                eeh_traverse_func fn, void *flag);
 void eeh_pe_restore_bars(struct eeh_pe *pe);
@@ -209,10 +221,12 @@ unsigned long eeh_check_failure(const volatile void __iomem *token,
                                unsigned long val);
 int eeh_dev_check_failure(struct eeh_dev *edev);
 void eeh_addr_cache_build(void);
+void eeh_add_device_early(struct device_node *);
 void eeh_add_device_tree_early(struct device_node *);
+void eeh_add_device_late(struct pci_dev *);
 void eeh_add_device_tree_late(struct pci_bus *);
 void eeh_add_sysfs_files(struct pci_bus *);
-void eeh_remove_bus_device(struct pci_dev *, int);
+void eeh_remove_device(struct pci_dev *);
 
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
@@ -252,13 +266,17 @@ static inline unsigned long eeh_check_failure(const volatile void __iomem *token
 
 static inline void eeh_addr_cache_build(void) { }
 
+static inline void eeh_add_device_early(struct device_node *dn) { }
+
 static inline void eeh_add_device_tree_early(struct device_node *dn) { }
 
+static inline void eeh_add_device_late(struct pci_dev *dev) { }
+
 static inline void eeh_add_device_tree_late(struct pci_bus *bus) { }
 
 static inline void eeh_add_sysfs_files(struct pci_bus *bus) { }
 
-static inline void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { }
+static inline void eeh_remove_device(struct pci_dev *dev) { }
 
 #define EEH_POSSIBLE_ERROR(val, type) (0)
 #define EEH_IO_ERROR_VALUE(size) (-1UL)
index ba713f166fa57fb2f9b08002a8a5cf8c0162d196..10be1dd01c6b4bcd79f88725f9b5067dc8a0db8a 100644 (file)
@@ -96,10 +96,11 @@ static inline bool arch_irqs_disabled(void)
 #endif
 
 #define hard_irq_disable()     do {                    \
-       u8 _was_enabled = get_paca()->soft_enabled;     \
+       u8 _was_enabled;                                \
        __hard_irq_disable();                           \
-       get_paca()->soft_enabled = 0;                   \
-       get_paca()->irq_happened |= PACA_IRQ_HARD_DIS;  \
+       _was_enabled = local_paca->soft_enabled;        \
+       local_paca->soft_enabled = 0;                   \
+       local_paca->irq_happened |= PACA_IRQ_HARD_DIS;  \
        if (_was_enabled)                               \
                trace_hardirqs_off();                   \
 } while(0)
index c1df590ec4440a96f81b97f6048ad79a7f326179..49fa55bfbac4fba38f26368cbbdcdf1513aa590c 100644 (file)
@@ -82,10 +82,9 @@ struct exception_table_entry;
 void sort_ex_table(struct exception_table_entry *start,
                   struct exception_table_entry *finish);
 
-#ifdef CONFIG_MODVERSIONS
+#if defined(CONFIG_MODVERSIONS) && defined(CONFIG_PPC64)
 #define ARCH_RELOCATES_KCRCTAB
-
-extern const unsigned long reloc_start[];
+#define reloc_start PHYSICAL_START
 #endif
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_MODULE_H */
index 2c1d8cb9b26562a295ab6b6016261b0d692df30b..32d0d2018faf84afe15814f13cb7f1fe4933fa4e 100644 (file)
@@ -209,7 +209,6 @@ static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn)
 extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn);
 
 /** Remove all of the PCI devices under this bus */
-extern void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe);
 extern void pcibios_remove_pci_devices(struct pci_bus *bus);
 
 /** Discover new pci devices under this bus, and add them */
index 5d7d9c2a547373dbb692ebded8b519df476bb2e4..a6840e4e24f7abe68d918d6dc576bcab18d0183d 100644 (file)
 #define PVR_970MP      0x0044
 #define PVR_970GX      0x0045
 #define PVR_POWER7p    0x004A
-#define PVR_POWER8     0x004B
+#define PVR_POWER8E    0x004B
+#define PVR_POWER8     0x004D
 #define PVR_BE         0x0070
 #define PVR_PA6T       0x0090
 
index 2a45d0f043852a33cc41826c7835c59471b1ed60..22973a74df7342b1146a3dd2881432c3cd8ad2b8 100644 (file)
@@ -494,9 +494,27 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .cpu_restore            = __restore_cpu_power7,
                .platform               = "power7+",
        },
-       {       /* Power8 */
+       {       /* Power8E */
                .pvr_mask               = 0xffff0000,
                .pvr_value              = 0x004b0000,
+               .cpu_name               = "POWER8E (raw)",
+               .cpu_features           = CPU_FTRS_POWER8,
+               .cpu_user_features      = COMMON_USER_POWER8,
+               .cpu_user_features2     = COMMON_USER2_POWER8,
+               .mmu_features           = MMU_FTRS_POWER8,
+               .icache_bsize           = 128,
+               .dcache_bsize           = 128,
+               .num_pmcs               = 6,
+               .pmc_type               = PPC_PMC_IBM,
+               .oprofile_cpu_type      = "ppc64/power8",
+               .oprofile_type          = PPC_OPROFILE_INVALID,
+               .cpu_setup              = __setup_cpu_power8,
+               .cpu_restore            = __restore_cpu_power8,
+               .platform               = "power8",
+       },
+       {       /* Power8 */
+               .pvr_mask               = 0xffff0000,
+               .pvr_value              = 0x004d0000,
                .cpu_name               = "POWER8 (raw)",
                .cpu_features           = CPU_FTRS_POWER8,
                .cpu_user_features      = COMMON_USER_POWER8,
index 39954fe941b87e45a95951f9c39bb2beae6724db..ea9414c8088d01289411eded7d67315c9a2e8661 100644 (file)
@@ -231,7 +231,7 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
 void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 {
        size_t loglen = 0;
-       struct eeh_dev *edev;
+       struct eeh_dev *edev, *tmp;
        bool valid_cfg_log = true;
 
        /*
@@ -251,7 +251,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
                eeh_pe_restore_bars(pe);
 
                pci_regs_buf[0] = 0;
-               eeh_pe_for_each_dev(pe, edev) {
+               eeh_pe_for_each_dev(pe, edev, tmp) {
                        loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen,
                                                      EEH_PCI_REGS_LOG_LEN - loglen);
                }
@@ -499,8 +499,6 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
        }
 
        eeh_dev_check_failure(edev);
-
-       pci_dev_put(eeh_dev_to_pci_dev(edev));
        return val;
 }
 
@@ -838,7 +836,7 @@ core_initcall_sync(eeh_init);
  * on the CEC architecture, type of the device, on earlier boot
  * command-line arguments & etc.
  */
-static void eeh_add_device_early(struct device_node *dn)
+void eeh_add_device_early(struct device_node *dn)
 {
        struct pci_controller *phb;
 
@@ -886,7 +884,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
  * This routine must be used to complete EEH initialization for PCI
  * devices that were added after system boot (e.g. hotplug, dlpar).
  */
-static void eeh_add_device_late(struct pci_dev *dev)
+void eeh_add_device_late(struct pci_dev *dev)
 {
        struct device_node *dn;
        struct eeh_dev *edev;
@@ -902,9 +900,23 @@ static void eeh_add_device_late(struct pci_dev *dev)
                pr_debug("EEH: Already referenced !\n");
                return;
        }
-       WARN_ON(edev->pdev);
 
-       pci_dev_get(dev);
+       /*
+        * The EEH cache might not be removed correctly because of
+        * unbalanced kref to the device during unplug time, which
+        * relies on pcibios_release_device(). So we have to remove
+        * that here explicitly.
+        */
+       if (edev->pdev) {
+               eeh_rmv_from_parent_pe(edev);
+               eeh_addr_cache_rmv_dev(edev->pdev);
+               eeh_sysfs_remove_device(edev->pdev);
+               edev->mode &= ~EEH_DEV_SYSFS;
+
+               edev->pdev = NULL;
+               dev->dev.archdata.edev = NULL;
+       }
+
        edev->pdev = dev;
        dev->dev.archdata.edev = edev;
 
@@ -967,7 +979,6 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
 /**
  * eeh_remove_device - Undo EEH setup for the indicated pci device
  * @dev: pci device to be removed
- * @purge_pe: remove the PE or not
  *
  * This routine should be called when a device is removed from
  * a running system (e.g. by hotplug or dlpar).  It unregisters
@@ -975,7 +986,7 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
  * this device will no longer be detected after this call; thus,
  * i/o errors affecting this slot may leave this device unusable.
  */
-static void eeh_remove_device(struct pci_dev *dev, int purge_pe)
+void eeh_remove_device(struct pci_dev *dev)
 {
        struct eeh_dev *edev;
 
@@ -986,42 +997,29 @@ static void eeh_remove_device(struct pci_dev *dev, int purge_pe)
        /* Unregister the device with the EEH/PCI address search system */
        pr_debug("EEH: Removing device %s\n", pci_name(dev));
 
-       if (!edev || !edev->pdev) {
+       if (!edev || !edev->pdev || !edev->pe) {
                pr_debug("EEH: Not referenced !\n");
                return;
        }
+
+       /*
+        * During the hotplug for EEH error recovery, we need the EEH
+        * device attached to the parent PE in order for BAR restore
+        * a bit later. So we keep it for BAR restore and remove it
+        * from the parent PE during the BAR resotre.
+        */
        edev->pdev = NULL;
        dev->dev.archdata.edev = NULL;
-       pci_dev_put(dev);
+       if (!(edev->pe->state & EEH_PE_KEEP))
+               eeh_rmv_from_parent_pe(edev);
+       else
+               edev->mode |= EEH_DEV_DISCONNECTED;
 
-       eeh_rmv_from_parent_pe(edev, purge_pe);
        eeh_addr_cache_rmv_dev(dev);
        eeh_sysfs_remove_device(dev);
+       edev->mode &= ~EEH_DEV_SYSFS;
 }
 
-/**
- * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
- * @dev: PCI device
- * @purge_pe: remove the corresponding PE or not
- *
- * This routine must be called when a device is removed from the
- * running system through hotplug or dlpar. The corresponding
- * PCI address cache will be removed.
- */
-void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe)
-{
-       struct pci_bus *bus = dev->subordinate;
-       struct pci_dev *child, *tmp;
-
-       eeh_remove_device(dev, purge_pe);
-
-       if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
-               list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
-                        eeh_remove_bus_device(child, purge_pe);
-       }
-}
-EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
-
 static int proc_eeh_show(struct seq_file *m, void *v)
 {
        if (0 == eeh_subsystem_enabled) {
index f9ac1232a746c595d696ef7b66469a093070c877..e8c9fd546a5c477b4c7755527ffb19dd64c04eca 100644 (file)
@@ -68,16 +68,12 @@ static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
                struct pci_io_addr_range *piar;
                piar = rb_entry(n, struct pci_io_addr_range, rb_node);
 
-               if (addr < piar->addr_lo) {
+               if (addr < piar->addr_lo)
                        n = n->rb_left;
-               } else {
-                       if (addr > piar->addr_hi) {
-                               n = n->rb_right;
-                       } else {
-                               pci_dev_get(piar->pcidev);
-                               return piar->edev;
-                       }
-               }
+               else if (addr > piar->addr_hi)
+                       n = n->rb_right;
+               else
+                       return piar->edev;
        }
 
        return NULL;
@@ -156,7 +152,6 @@ eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
        if (!piar)
                return NULL;
 
-       pci_dev_get(dev);
        piar->addr_lo = alo;
        piar->addr_hi = ahi;
        piar->edev = pci_dev_to_eeh_dev(dev);
@@ -250,7 +245,6 @@ restart:
 
                if (piar->pcidev == dev) {
                        rb_erase(n, &pci_io_addr_cache_root.rb_root);
-                       pci_dev_put(piar->pcidev);
                        kfree(piar);
                        goto restart;
                }
@@ -302,12 +296,10 @@ void eeh_addr_cache_build(void)
                if (!edev)
                        continue;
 
-               pci_dev_get(dev);  /* matching put is in eeh_remove_device() */
                dev->dev.archdata.edev = edev;
                edev->pdev = dev;
 
                eeh_addr_cache_insert_dev(dev);
-
                eeh_sysfs_add_device(dev);
        }
 
index 2b1ce17cae504d95a5be5e04e16828a5e3f7516e..36bed5a12750b639e2c305153585869be536ddef 100644 (file)
@@ -143,10 +143,14 @@ static void eeh_disable_irq(struct pci_dev *dev)
 static void eeh_enable_irq(struct pci_dev *dev)
 {
        struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
+       struct irq_desc *desc;
 
        if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
                edev->mode &= ~EEH_DEV_IRQ_DISABLED;
-               enable_irq(dev->irq);
+
+               desc = irq_to_desc(dev->irq);
+               if (desc && desc->depth > 0)
+                       enable_irq(dev->irq);
        }
 }
 
@@ -338,6 +342,54 @@ static void *eeh_report_failure(void *data, void *userdata)
        return NULL;
 }
 
+static void *eeh_rmv_device(void *data, void *userdata)
+{
+       struct pci_driver *driver;
+       struct eeh_dev *edev = (struct eeh_dev *)data;
+       struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+       int *removed = (int *)userdata;
+
+       /*
+        * Actually, we should remove the PCI bridges as well.
+        * However, that's lots of complexity to do that,
+        * particularly some of devices under the bridge might
+        * support EEH. So we just care about PCI devices for
+        * simplicity here.
+        */
+       if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
+               return NULL;
+       driver = eeh_pcid_get(dev);
+       if (driver && driver->err_handler)
+               return NULL;
+
+       /* Remove it from PCI subsystem */
+       pr_debug("EEH: Removing %s without EEH sensitive driver\n",
+                pci_name(dev));
+       edev->bus = dev->bus;
+       edev->mode |= EEH_DEV_DISCONNECTED;
+       (*removed)++;
+
+       pci_stop_and_remove_bus_device(dev);
+
+       return NULL;
+}
+
+static void *eeh_pe_detach_dev(void *data, void *userdata)
+{
+       struct eeh_pe *pe = (struct eeh_pe *)data;
+       struct eeh_dev *edev, *tmp;
+
+       eeh_pe_for_each_dev(pe, edev, tmp) {
+               if (!(edev->mode & EEH_DEV_DISCONNECTED))
+                       continue;
+
+               edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
+               eeh_rmv_from_parent_pe(edev);
+       }
+
+       return NULL;
+}
+
 /**
  * eeh_reset_device - Perform actual reset of a pci slot
  * @pe: EEH PE
@@ -349,8 +401,9 @@ static void *eeh_report_failure(void *data, void *userdata)
  */
 static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 {
+       struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
        struct timeval tstamp;
-       int cnt, rc;
+       int cnt, rc, removed = 0;
 
        /* pcibios will clear the counter; save the value */
        cnt = pe->freeze_count;
@@ -362,8 +415,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
         * devices are expected to be attached soon when calling
         * into pcibios_add_pci_devices().
         */
+       eeh_pe_state_mark(pe, EEH_PE_KEEP);
        if (bus)
-               __pcibios_remove_pci_devices(bus, 0);
+               pcibios_remove_pci_devices(bus);
+       else if (frozen_bus)
+               eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
 
        /* Reset the pci controller. (Asserts RST#; resets config space).
         * Reconfigure bridges and devices. Don't try to bring the system
@@ -384,9 +440,24 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
         * potentially weird things happen.
         */
        if (bus) {
+               pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
                ssleep(5);
+
+               /*
+                * The EEH device is still connected with its parent
+                * PE. We should disconnect it so the binding can be
+                * rebuilt when adding PCI devices.
+                */
+               eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
                pcibios_add_pci_devices(bus);
+       } else if (frozen_bus && removed) {
+               pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
+               ssleep(5);
+
+               eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
+               pcibios_add_pci_devices(frozen_bus);
        }
+       eeh_pe_state_clear(pe, EEH_PE_KEEP);
 
        pe->tstamp = tstamp;
        pe->freeze_count = cnt;
index 016588a6f5ede19c9c45b0ab6a3bd44cfce962ae..f9450537e335a350354dec35ff5fdf15876ef0a6 100644 (file)
@@ -149,8 +149,8 @@ static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
  * callback returns something other than NULL, or no more PEs
  * to be traversed.
  */
-static void *eeh_pe_traverse(struct eeh_pe *root,
-                       eeh_traverse_func fn, void *flag)
+void *eeh_pe_traverse(struct eeh_pe *root,
+                     eeh_traverse_func fn, void *flag)
 {
        struct eeh_pe *pe;
        void *ret;
@@ -176,7 +176,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
                eeh_traverse_func fn, void *flag)
 {
        struct eeh_pe *pe;
-       struct eeh_dev *edev;
+       struct eeh_dev *edev, *tmp;
        void *ret;
 
        if (!root) {
@@ -186,7 +186,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
 
        /* Traverse root PE */
        for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
-               eeh_pe_for_each_dev(pe, edev) {
+               eeh_pe_for_each_dev(pe, edev, tmp) {
                        ret = fn(edev, flag);
                        if (ret)
                                return ret;
@@ -333,7 +333,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
                while (parent) {
                        if (!(parent->type & EEH_PE_INVALID))
                                break;
-                       parent->type &= ~EEH_PE_INVALID;
+                       parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP);
                        parent = parent->parent;
                }
                pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
@@ -397,21 +397,20 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 /**
  * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
  * @edev: EEH device
- * @purge_pe: remove PE or not
  *
  * The PE hierarchy tree might be changed when doing PCI hotplug.
  * Also, the PCI devices or buses could be removed from the system
  * during EEH recovery. So we have to call the function remove the
  * corresponding PE accordingly if necessary.
  */
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe)
+int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
 {
        struct eeh_pe *pe, *parent, *child;
        int cnt;
 
        if (!edev->pe) {
-               pr_warning("%s: No PE found for EEH device %s\n",
-                       __func__, edev->dn->full_name);
+               pr_debug("%s: No PE found for EEH device %s\n",
+                        __func__, edev->dn->full_name);
                return -EEXIST;
        }
 
@@ -431,7 +430,7 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe)
                if (pe->type & EEH_PE_PHB)
                        break;
 
-               if (purge_pe) {
+               if (!(pe->state & EEH_PE_KEEP)) {
                        if (list_empty(&pe->edevs) &&
                            list_empty(&pe->child_list)) {
                                list_del(&pe->child);
@@ -502,7 +501,7 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
 {
        struct eeh_pe *pe = (struct eeh_pe *)data;
        int state = *((int *)flag);
-       struct eeh_dev *tmp;
+       struct eeh_dev *edev, *tmp;
        struct pci_dev *pdev;
 
        /*
@@ -512,8 +511,8 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
         * the PCI device driver.
         */
        pe->state |= state;
-       eeh_pe_for_each_dev(pe, tmp) {
-               pdev = eeh_dev_to_pci_dev(tmp);
+       eeh_pe_for_each_dev(pe, edev, tmp) {
+               pdev = eeh_dev_to_pci_dev(edev);
                if (pdev)
                        pdev->error_state = pci_channel_io_frozen;
        }
@@ -579,7 +578,7 @@ void eeh_pe_state_clear(struct eeh_pe *pe, int state)
  * blocked on normal path during the stage. So we need utilize
  * eeh operations, which is always permitted.
  */
-static void eeh_bridge_check_link(struct pci_dev *pdev,
+static void eeh_bridge_check_link(struct eeh_dev *edev,
                                  struct device_node *dn)
 {
        int cap;
@@ -590,16 +589,17 @@ static void eeh_bridge_check_link(struct pci_dev *pdev,
         * We only check root port and downstream ports of
         * PCIe switches
         */
-       if (!pci_is_pcie(pdev) ||
-           (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT &&
-            pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM))
+       if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
                return;
 
-       pr_debug("%s: Check PCIe link for %s ...\n",
-                __func__, pci_name(pdev));
+       pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
+                __func__, edev->phb->global_number,
+                edev->config_addr >> 8,
+                PCI_SLOT(edev->config_addr & 0xFF),
+                PCI_FUNC(edev->config_addr & 0xFF));
 
        /* Check slot status */
-       cap = pdev->pcie_cap;
+       cap = edev->pcie_cap;
        eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val);
        if (!(val & PCI_EXP_SLTSTA_PDS)) {
                pr_debug("  No card in the slot (0x%04x) !\n", val);
@@ -653,8 +653,7 @@ static void eeh_bridge_check_link(struct pci_dev *pdev,
 #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
 #define SAVED_BYTE(OFF)        (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
 
-static void eeh_restore_bridge_bars(struct pci_dev *pdev,
-                                   struct eeh_dev *edev,
+static void eeh_restore_bridge_bars(struct eeh_dev *edev,
                                    struct device_node *dn)
 {
        int i;
@@ -680,7 +679,7 @@ static void eeh_restore_bridge_bars(struct pci_dev *pdev,
        eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]);
 
        /* Check the PCIe link is ready */
-       eeh_bridge_check_link(pdev, dn);
+       eeh_bridge_check_link(edev, dn);
 }
 
 static void eeh_restore_device_bars(struct eeh_dev *edev,
@@ -729,19 +728,12 @@ static void eeh_restore_device_bars(struct eeh_dev *edev,
  */
 static void *eeh_restore_one_device_bars(void *data, void *flag)
 {
-       struct pci_dev *pdev = NULL;
        struct eeh_dev *edev = (struct eeh_dev *)data;
        struct device_node *dn = eeh_dev_to_of_node(edev);
 
-       /* Trace the PCI bridge */
-       if (eeh_probe_mode_dev()) {
-               pdev = eeh_dev_to_pci_dev(edev);
-               if (pdev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
-                        pdev = NULL;
-        }
-
-       if (pdev)
-               eeh_restore_bridge_bars(pdev, edev, dn);
+       /* Do special restore for bridges */
+       if (edev->mode & EEH_DEV_BRIDGE)
+               eeh_restore_bridge_bars(edev, dn);
        else
                eeh_restore_device_bars(edev, dn);
 
index e7ae3484918c29639ff1a1baa597579a973fe6cd..5d753d4f2c75938e5fdd2c75b821ff98877cd2c9 100644 (file)
@@ -56,19 +56,40 @@ EEH_SHOW_ATTR(eeh_pe_config_addr,  pe_config_addr,  "0x%x");
 
 void eeh_sysfs_add_device(struct pci_dev *pdev)
 {
+       struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
        int rc=0;
 
+       if (edev && (edev->mode & EEH_DEV_SYSFS))
+               return;
+
        rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
        rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
        rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
 
        if (rc)
                printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
+       else if (edev)
+               edev->mode |= EEH_DEV_SYSFS;
 }
 
 void eeh_sysfs_remove_device(struct pci_dev *pdev)
 {
+       struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+
+       /*
+        * The parent directory might have been removed. We needn't
+        * continue for that case.
+        */
+       if (!pdev->dev.kobj.sd) {
+               if (edev)
+                       edev->mode &= ~EEH_DEV_SYSFS;
+               return;
+       }
+
        device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
        device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
        device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
+
+       if (edev)
+               edev->mode &= ~EEH_DEV_SYSFS;
 }
index f46914a0f33ea6fd3cacbdddb76288722dfb42cb..7d22a675fe1a3338d5f1a9d64eb44ce70d8f8f7b 100644 (file)
@@ -1462,6 +1462,8 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus)
        /* Allocate bus and devices resources */
        pcibios_allocate_bus_resources(bus);
        pcibios_claim_one_bus(bus);
+       if (!pci_has_flag(PCI_PROBE_ONLY))
+               pci_assign_unassigned_bus_resources(bus);
 
        /* Fixup EEH */
        eeh_add_device_tree_late(bus);
index 3f608800c06b59082116c3bcd642d26146b357c2..c1e17ae68a08047a0a60154aed49684d8b4aad2f 100644 (file)
 #include <asm/eeh.h>
 
 /**
- * __pcibios_remove_pci_devices - remove all devices under this bus
+ * pcibios_release_device - release PCI device
+ * @dev: PCI device
+ *
+ * The function is called before releasing the indicated PCI device.
+ */
+void pcibios_release_device(struct pci_dev *dev)
+{
+       eeh_remove_device(dev);
+}
+
+/**
+ * pcibios_remove_pci_devices - remove all devices under this bus
  * @bus: the indicated PCI bus
- * @purge_pe: destroy the PE on removal of PCI devices
  *
  * Remove all of the PCI devices under this bus both from the
  * linux pci device tree, and from the powerpc EEH address cache.
- * By default, the corresponding PE will be destroied during the
- * normal PCI hotplug path. For PCI hotplug during EEH recovery,
- * the corresponding PE won't be destroied and deallocated.
  */
-void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe)
+void pcibios_remove_pci_devices(struct pci_bus *bus)
 {
        struct pci_dev *dev, *tmp;
        struct pci_bus *child_bus;
 
        /* First go down child busses */
        list_for_each_entry(child_bus, &bus->children, node)
-               __pcibios_remove_pci_devices(child_bus, purge_pe);
+               pcibios_remove_pci_devices(child_bus);
 
        pr_debug("PCI: Removing devices on bus %04x:%02x\n",
                 pci_domain_nr(bus),  bus->number);
        list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
-               pr_debug("     * Removing %s...\n", pci_name(dev));
-               eeh_remove_bus_device(dev, purge_pe);
+               pr_debug("   Removing %s...\n", pci_name(dev));
                pci_stop_and_remove_bus_device(dev);
        }
 }
 
-/**
- * pcibios_remove_pci_devices - remove all devices under this bus
- * @bus: the indicated PCI bus
- *
- * Remove all of the PCI devices under this bus both from the
- * linux pci device tree, and from the powerpc EEH address cache.
- */
-void pcibios_remove_pci_devices(struct pci_bus *bus)
-{
-       __pcibios_remove_pci_devices(bus, 1);
-}
 EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
 
 /**
@@ -76,7 +71,7 @@ EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
  */
 void pcibios_add_pci_devices(struct pci_bus * bus)
 {
-       int slotno, num, mode, pass, max;
+       int slotno, mode, pass, max;
        struct pci_dev *dev;
        struct device_node *dn = pci_bus_to_OF_node(bus);
 
@@ -90,11 +85,15 @@ void pcibios_add_pci_devices(struct pci_bus * bus)
                /* use ofdt-based probe */
                of_rescan_bus(dn, bus);
        } else if (mode == PCI_PROBE_NORMAL) {
-               /* use legacy probe */
+               /*
+                * Use legacy probe. In the partial hotplug case, we
+                * probably have grandchildren devices unplugged. So
+                * we don't check the return value from pci_scan_slot() in
+                * order for fully rescan all the way down to pick them up.
+                * They can have been removed during partial hotplug.
+                */
                slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
-               num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
-               if (!num)
-                       return;
+               pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
                pcibios_setup_bus_devices(bus);
                max = bus->busn_res.start;
                for (pass = 0; pass < 2; pass++) {
index 6b0ba5854d9960aff03bbd01b810543c9e369588..15d9105323bf5048009428c4bfc83d06edffb38f 100644 (file)
@@ -230,11 +230,14 @@ void of_scan_pci_bridge(struct pci_dev *dev)
                return;
        }
 
-       bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
+       bus = pci_find_bus(pci_domain_nr(dev->bus), busrange[0]);
        if (!bus) {
-               printk(KERN_ERR "Failed to create pci bus for %s\n",
-                      node->full_name);
-               return;
+               bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
+               if (!bus) {
+                       printk(KERN_ERR "Failed to create pci bus for %s\n",
+                              node->full_name);
+                       return;
+               }
        }
 
        bus->primary = dev->bus->number;
@@ -292,6 +295,38 @@ void of_scan_pci_bridge(struct pci_dev *dev)
 }
 EXPORT_SYMBOL(of_scan_pci_bridge);
 
+static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
+                           struct device_node *dn)
+{
+       struct pci_dev *dev = NULL;
+       const u32 *reg;
+       int reglen, devfn;
+
+       pr_debug("  * %s\n", dn->full_name);
+       if (!of_device_is_available(dn))
+               return NULL;
+
+       reg = of_get_property(dn, "reg", &reglen);
+       if (reg == NULL || reglen < 20)
+               return NULL;
+       devfn = (reg[0] >> 8) & 0xff;
+
+       /* Check if the PCI device is already there */
+       dev = pci_get_slot(bus, devfn);
+       if (dev) {
+               pci_dev_put(dev);
+               return dev;
+       }
+
+       /* create a new pci_dev for this device */
+       dev = of_create_pci_dev(dn, bus, devfn);
+       if (!dev)
+               return NULL;
+
+       pr_debug("  dev header type: %x\n", dev->hdr_type);
+       return dev;
+}
+
 /**
  * __of_scan_bus - given a PCI bus node, setup bus and scan for child devices
  * @node: device tree node for the PCI bus
@@ -302,8 +337,6 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
                          int rescan_existing)
 {
        struct device_node *child;
-       const u32 *reg;
-       int reglen, devfn;
        struct pci_dev *dev;
 
        pr_debug("of_scan_bus(%s) bus no %d...\n",
@@ -311,16 +344,7 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
 
        /* Scan direct children */
        for_each_child_of_node(node, child) {
-               pr_debug("  * %s\n", child->full_name);
-               if (!of_device_is_available(child))
-                       continue;
-               reg = of_get_property(child, "reg", &reglen);
-               if (reg == NULL || reglen < 20)
-                       continue;
-               devfn = (reg[0] >> 8) & 0xff;
-
-               /* create a new pci_dev for this device */
-               dev = of_create_pci_dev(child, bus, devfn);
+               dev = of_scan_pci_dev(bus, child);
                if (!dev)
                        continue;
                pr_debug("    dev header type: %x\n", dev->hdr_type);
index 5eccda9fd33f5396b5a5c231db1c8051bcacf747..607902424e7377046943a14f7a8aeb56471327e4 100644 (file)
@@ -644,7 +644,8 @@ unsigned char ibm_architecture_vec[] = {
        W(0xfffe0000), W(0x003a0000),   /* POWER5/POWER5+ */
        W(0xffff0000), W(0x003e0000),   /* POWER6 */
        W(0xffff0000), W(0x003f0000),   /* POWER7 */
-       W(0xffff0000), W(0x004b0000),   /* POWER8 */
+       W(0xffff0000), W(0x004b0000),   /* POWER8E */
+       W(0xffff0000), W(0x004d0000),   /* POWER8 */
        W(0xffffffff), W(0x0f000004),   /* all 2.07-compliant */
        W(0xffffffff), W(0x0f000003),   /* all 2.06-compliant */
        W(0xffffffff), W(0x0f000002),   /* all 2.05-compliant */
@@ -706,7 +707,7 @@ unsigned char ibm_architecture_vec[] = {
         * must match by the macro below. Update the definition if
         * the structure layout changes.
         */
-#define IBM_ARCH_VEC_NRCORES_OFFSET    117
+#define IBM_ARCH_VEC_NRCORES_OFFSET    125
        W(NR_CPUS),                     /* number of cores supported */
        0,
        0,
index 654e479802f2098afb74c0fea0bb0b68e25707f5..f096e72262f41d121398f8fa27d5286bd8641bf4 100644 (file)
@@ -38,9 +38,6 @@ jiffies = jiffies_64 + 4;
 #endif
 SECTIONS
 {
-       . = 0;
-       reloc_start = .;
-
        . = KERNELBASE;
 
 /*
index 3f0c30ae4791db7597c3fac74740e09e0fb3b7c1..c33d939120c970f3f800b42b5dfa3739e28ac16f 100644 (file)
@@ -43,6 +43,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
 {
        unsigned long va;
        unsigned int penc;
+       unsigned long sllp;
 
        /*
         * We need 14 to 65 bits of va for a tlibe of 4K page
@@ -64,7 +65,9 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
                /* clear out bits after (52) [0....52.....63] */
                va &= ~((1ul << (64 - 52)) - 1);
                va |= ssize << 8;
-               va |= mmu_psize_defs[apsize].sllp << 6;
+               sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
+                       ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
+               va |= sllp << 5;
                asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
                             : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
                             : "memory");
@@ -98,6 +101,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
 {
        unsigned long va;
        unsigned int penc;
+       unsigned long sllp;
 
        /* VPN_SHIFT can be atmost 12 */
        va = vpn << VPN_SHIFT;
@@ -113,7 +117,9 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
                /* clear out bits after(52) [0....52.....63] */
                va &= ~((1ul << (64 - 52)) - 1);
                va |= ssize << 8;
-               va |= mmu_psize_defs[apsize].sllp << 6;
+               sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
+                       ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
+               va |= sllp << 5;
                asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
                             : : "r"(va) : "memory");
                break;
@@ -554,6 +560,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
                        seg_off |= vpi << shift;
                }
                *vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
+               break;
        case MMU_SEGSIZE_1T:
                /* We only have 40 - 23 bits of seg_off in avpn */
                seg_off = (avpn & 0x1ffff) << 23;
@@ -563,6 +570,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
                        seg_off |= vpi << shift;
                }
                *vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
+               break;
        default:
                *vpn = size = 0;
        }
index a3985aee77fec1ecd2fe9756257e3a46b9643b94..24a45f91c65f69480119055e6d8820f52f07582b 100644 (file)
@@ -1252,8 +1252,11 @@ nocheck:
 
        ret = 0;
  out:
-       if (has_branch_stack(event))
+       if (has_branch_stack(event)) {
                power_pmu_bhrb_enable(event);
+               cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+                                       event->attr.branch_sample_type);
+       }
 
        perf_pmu_enable(event->pmu);
        local_irq_restore(flags);
index 96a64d6a8bdf3dfbba1c5a57f16bcb38d3c8a7ce..7466374d2787cca701ecef97da54b723dee09026 100644 (file)
@@ -561,18 +561,13 @@ static int power8_generic_events[] = {
 static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 {
        u64 pmu_bhrb_filter = 0;
-       u64 br_privilege = branch_sample_type & ONLY_PLM;
 
-       /* BHRB and regular PMU events share the same prvillege state
+       /* BHRB and regular PMU events share the same privilege state
         * filter configuration. BHRB is always recorded along with a
-        * regular PMU event. So privilege state filter criteria for BHRB
-        * and the companion PMU events has to be the same. As a default
-        * "perf record" tool sets all privillege bits ON when no filter
-        * criteria is provided in the command line. So as along as all
-        * privillege bits are ON or they are OFF, we are good to go.
+        * regular PMU event. As the privilege state filter is handled
+        * in the basic PMC configuration of the accompanying regular
+        * PMU event, we ignore any separate BHRB specific request.
         */
-       if ((br_privilege != 7) && (br_privilege != 0))
-               return -1;
 
        /* No branch filter requested */
        if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
@@ -621,10 +616,19 @@ static struct power_pmu power8_pmu = {
 
 static int __init init_power8_pmu(void)
 {
+       int rc;
+
        if (!cur_cpu_spec->oprofile_cpu_type ||
            strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
                return -ENODEV;
 
-       return register_power_pmu(&power8_pmu);
+       rc = register_power_pmu(&power8_pmu);
+       if (rc)
+               return rc;
+
+       /* Tell userspace that EBB is supported */
+       cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+       return 0;
 }
 early_initcall(init_power8_pmu);
index 969cce73055af337f3b15c9c12df27dc3aebb4ed..79663d26e6eaa1de8749c6994316737f991106ab 100644 (file)
@@ -114,7 +114,7 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
         * the root bridge. So it's not reasonable to continue
         * the probing.
         */
-       if (!dn || !edev)
+       if (!dn || !edev || edev->pe)
                return 0;
 
        /* Skip for PCI-ISA bridge */
@@ -122,8 +122,19 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
                return 0;
 
        /* Initialize eeh device */
-       edev->class_code        = dev->class;
-       edev->mode              = 0;
+       edev->class_code = dev->class;
+       edev->mode      &= 0xFFFFFF00;
+       if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+               edev->mode |= EEH_DEV_BRIDGE;
+       if (pci_is_pcie(dev)) {
+               edev->pcie_cap = pci_pcie_cap(dev);
+
+               if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
+                       edev->mode |= EEH_DEV_ROOT_PORT;
+               else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)
+                       edev->mode |= EEH_DEV_DS_PORT;
+       }
+
        edev->config_addr       = ((dev->bus->number << 8) | dev->devfn);
        edev->pe_config_addr    = phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff);
 
index 49b57b9f835d787977817db8f093d40c354ac7d5..d8140b125e62508720388d683a5028199d62ff28 100644 (file)
@@ -1266,7 +1266,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
                opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
 }
 
-void pnv_pci_init_ioda2_phb(struct device_node *np)
+void __init pnv_pci_init_ioda2_phb(struct device_node *np)
 {
        pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
 }
index 1bd3399146ed6f945f24308ea95883bffdce8d28..62b4f8025de005ad77bdbfcd42024d25614eb79c 100644 (file)
@@ -19,7 +19,6 @@ config PPC_PSERIES
        select ZLIB_DEFLATE
        select PPC_DOORBELL
        select HAVE_CONTEXT_TRACKING
-       select HOTPLUG if SMP
        select HOTPLUG_CPU if SMP
        default y
 
index b456b157d33d107842a42d3b0c3d0da36e9ea121..7fbc25b1813fe59ea80eceba65cb2a76027c0eec 100644 (file)
@@ -133,6 +133,48 @@ static int pseries_eeh_init(void)
        return 0;
 }
 
+static int pseries_eeh_cap_start(struct device_node *dn)
+{
+       struct pci_dn *pdn = PCI_DN(dn);
+       u32 status;
+
+       if (!pdn)
+               return 0;
+
+       rtas_read_config(pdn, PCI_STATUS, 2, &status);
+       if (!(status & PCI_STATUS_CAP_LIST))
+               return 0;
+
+       return PCI_CAPABILITY_LIST;
+}
+
+
+static int pseries_eeh_find_cap(struct device_node *dn, int cap)
+{
+       struct pci_dn *pdn = PCI_DN(dn);
+       int pos = pseries_eeh_cap_start(dn);
+       int cnt = 48;   /* Maximal number of capabilities */
+       u32 id;
+
+       if (!pos)
+               return 0;
+
+        while (cnt--) {
+               rtas_read_config(pdn, pos, 1, &pos);
+               if (pos < 0x40)
+                       break;
+               pos &= ~3;
+               rtas_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
+               if (id == 0xff)
+                       break;
+               if (id == cap)
+                       return pos;
+               pos += PCI_CAP_LIST_NEXT;
+       }
+
+       return 0;
+}
+
 /**
  * pseries_eeh_of_probe - EEH probe on the given device
  * @dn: OF node
@@ -146,14 +188,16 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
 {
        struct eeh_dev *edev;
        struct eeh_pe pe;
+       struct pci_dn *pdn = PCI_DN(dn);
        const u32 *class_code, *vendor_id, *device_id;
        const u32 *regs;
+       u32 pcie_flags;
        int enable = 0;
        int ret;
 
        /* Retrieve OF node and eeh device */
        edev = of_node_to_eeh_dev(dn);
-       if (!of_device_is_available(dn))
+       if (edev->pe || !of_device_is_available(dn))
                return NULL;
 
        /* Retrieve class/vendor/device IDs */
@@ -167,9 +211,26 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
        if (dn->type && !strcmp(dn->type, "isa"))
                return NULL;
 
-       /* Update class code and mode of eeh device */
+       /*
+        * Update class code and mode of eeh device. We need
+        * correctly reflects that current device is root port
+        * or PCIe switch downstream port.
+        */
        edev->class_code = *class_code;
-       edev->mode = 0;
+       edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP);
+       edev->mode &= 0xFFFFFF00;
+       if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
+               edev->mode |= EEH_DEV_BRIDGE;
+               if (edev->pcie_cap) {
+                       rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
+                                        2, &pcie_flags);
+                       pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
+                       if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
+                               edev->mode |= EEH_DEV_ROOT_PORT;
+                       else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
+                               edev->mode |= EEH_DEV_DS_PORT;
+               }
+       }
 
        /* Retrieve the device address */
        regs = of_get_property(dn, "reg", NULL);
index 02d6e21619bb6c677771c0dc6ecaabb5e1d417d5..8bad880bd177a2efaf9887b0509593401112aee4 100644 (file)
@@ -146,7 +146,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
        flags = 0;
 
        /* Make pHyp happy */
-       if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU))
+       if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))
                hpte_r &= ~_PAGE_COHERENT;
        if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
                flags |= H_COALESCE_CAND;
index 7b3cbde8c78378e25fb0d82ca60b58e20ffe51f5..721c0586b284d42aec0dfe69bdc178e191bcf222 100644 (file)
@@ -287,6 +287,9 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
        unsigned long *savep;
        struct rtas_error_log *h, *errhdr = NULL;
 
+       /* Mask top two bits */
+       regs->gpr[3] &= ~(0x3UL << 62);
+
        if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
                printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
                return NULL;
index 7d6ba9db1be99696784aeba62343eeebff9c32dd..6c63c358a7e689070881361b003f102c154e9c96 100644 (file)
@@ -27,7 +27,6 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
 obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
 obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
 obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
-obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
 
 # These modules require assembler to support AVX.
 ifeq ($(avx_supported),yes)
@@ -82,4 +81,3 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
 crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
 sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
 sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
-crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
deleted file mode 100644 (file)
index 35e9756..0000000
+++ /dev/null
@@ -1,643 +0,0 @@
-########################################################################
-# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
-#
-# Copyright (c) 2013, Intel Corporation
-#
-# Authors:
-#     Erdinc Ozturk <erdinc.ozturk@intel.com>
-#     Vinodh Gopal <vinodh.gopal@intel.com>
-#     James Guilford <james.guilford@intel.com>
-#     Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the
-#   distribution.
-#
-# * Neither the name of the Intel Corporation nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-#
-# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-########################################################################
-#       Function API:
-#       UINT16 crc_t10dif_pcl(
-#               UINT16 init_crc, //initial CRC value, 16 bits
-#               const unsigned char *buf, //buffer pointer to calculate CRC on
-#               UINT64 len //buffer length in bytes (64-bit data)
-#       );
-#
-#       Reference paper titled "Fast CRC Computation for Generic
-#      Polynomials Using PCLMULQDQ Instruction"
-#       URL: http://www.intel.com/content/dam/www/public/us/en/documents
-#  /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-#
-#
-
-#include <linux/linkage.h>
-
-.text
-
-#define        arg1 %rdi
-#define        arg2 %rsi
-#define        arg3 %rdx
-
-#define        arg1_low32 %edi
-
-ENTRY(crc_t10dif_pcl)
-.align 16
-
-       # adjust the 16-bit initial_crc value, scale it to 32 bits
-       shl     $16, arg1_low32
-
-       # Allocate Stack Space
-       mov     %rsp, %rcx
-       sub     $16*2, %rsp
-       # align stack to 16 byte boundary
-       and     $~(0x10 - 1), %rsp
-
-       # check if smaller than 256
-       cmp     $256, arg3
-
-       # for sizes less than 128, we can't fold 64B at a time...
-       jl      _less_than_128
-
-
-       # load the initial crc value
-       movd    arg1_low32, %xmm10      # initial crc
-
-       # crc value does not need to be byte-reflected, but it needs
-       # to be moved to the high part of the register.
-       # because data will be byte-reflected and will align with
-       # initial crc at correct place.
-       pslldq  $12, %xmm10
-
-       movdqa  SHUF_MASK(%rip), %xmm11
-       # receive the initial 64B data, xor the initial crc value
-       movdqu  16*0(arg2), %xmm0
-       movdqu  16*1(arg2), %xmm1
-       movdqu  16*2(arg2), %xmm2
-       movdqu  16*3(arg2), %xmm3
-       movdqu  16*4(arg2), %xmm4
-       movdqu  16*5(arg2), %xmm5
-       movdqu  16*6(arg2), %xmm6
-       movdqu  16*7(arg2), %xmm7
-
-       pshufb  %xmm11, %xmm0
-       # XOR the initial_crc value
-       pxor    %xmm10, %xmm0
-       pshufb  %xmm11, %xmm1
-       pshufb  %xmm11, %xmm2
-       pshufb  %xmm11, %xmm3
-       pshufb  %xmm11, %xmm4
-       pshufb  %xmm11, %xmm5
-       pshufb  %xmm11, %xmm6
-       pshufb  %xmm11, %xmm7
-
-       movdqa  rk3(%rip), %xmm10       #xmm10 has rk3 and rk4
-                                       #imm value of pclmulqdq instruction
-                                       #will determine which constant to use
-
-       #################################################################
-       # we subtract 256 instead of 128 to save one instruction from the loop
-       sub     $256, arg3
-
-       # at this section of the code, there is 64*x+y (0<=y<64) bytes of
-       # buffer. The _fold_64_B_loop will fold 64B at a time
-       # until we have 64+y Bytes of buffer
-
-
-       # fold 64B at a time. This section of the code folds 4 xmm
-       # registers in parallel
-_fold_64_B_loop:
-
-       # update the buffer pointer
-       add     $128, arg2              #    buf += 64#
-
-       movdqu  16*0(arg2), %xmm9
-       movdqu  16*1(arg2), %xmm12
-       pshufb  %xmm11, %xmm9
-       pshufb  %xmm11, %xmm12
-       movdqa  %xmm0, %xmm8
-       movdqa  %xmm1, %xmm13
-       pclmulqdq       $0x0 , %xmm10, %xmm0
-       pclmulqdq       $0x11, %xmm10, %xmm8
-       pclmulqdq       $0x0 , %xmm10, %xmm1
-       pclmulqdq       $0x11, %xmm10, %xmm13
-       pxor    %xmm9 , %xmm0
-       xorps   %xmm8 , %xmm0
-       pxor    %xmm12, %xmm1
-       xorps   %xmm13, %xmm1
-
-       movdqu  16*2(arg2), %xmm9
-       movdqu  16*3(arg2), %xmm12
-       pshufb  %xmm11, %xmm9
-       pshufb  %xmm11, %xmm12
-       movdqa  %xmm2, %xmm8
-       movdqa  %xmm3, %xmm13
-       pclmulqdq       $0x0, %xmm10, %xmm2
-       pclmulqdq       $0x11, %xmm10, %xmm8
-       pclmulqdq       $0x0, %xmm10, %xmm3
-       pclmulqdq       $0x11, %xmm10, %xmm13
-       pxor    %xmm9 , %xmm2
-       xorps   %xmm8 , %xmm2
-       pxor    %xmm12, %xmm3
-       xorps   %xmm13, %xmm3
-
-       movdqu  16*4(arg2), %xmm9
-       movdqu  16*5(arg2), %xmm12
-       pshufb  %xmm11, %xmm9
-       pshufb  %xmm11, %xmm12
-       movdqa  %xmm4, %xmm8
-       movdqa  %xmm5, %xmm13
-       pclmulqdq       $0x0,  %xmm10, %xmm4
-       pclmulqdq       $0x11, %xmm10, %xmm8
-       pclmulqdq       $0x0,  %xmm10, %xmm5
-       pclmulqdq       $0x11, %xmm10, %xmm13
-       pxor    %xmm9 ,  %xmm4
-       xorps   %xmm8 ,  %xmm4
-       pxor    %xmm12,  %xmm5
-       xorps   %xmm13,  %xmm5
-
-       movdqu  16*6(arg2), %xmm9
-       movdqu  16*7(arg2), %xmm12
-       pshufb  %xmm11, %xmm9
-       pshufb  %xmm11, %xmm12
-       movdqa  %xmm6 , %xmm8
-       movdqa  %xmm7 , %xmm13
-       pclmulqdq       $0x0 , %xmm10, %xmm6
-       pclmulqdq       $0x11, %xmm10, %xmm8
-       pclmulqdq       $0x0 , %xmm10, %xmm7
-       pclmulqdq       $0x11, %xmm10, %xmm13
-       pxor    %xmm9 , %xmm6
-       xorps   %xmm8 , %xmm6
-       pxor    %xmm12, %xmm7
-       xorps   %xmm13, %xmm7
-
-       sub     $128, arg3
-
-       # check if there is another 64B in the buffer to be able to fold
-       jge     _fold_64_B_loop
-       ##################################################################
-
-
-       add     $128, arg2
-       # at this point, the buffer pointer is pointing at the last y Bytes
-       # of the buffer the 64B of folded data is in 4 of the xmm
-       # registers: xmm0, xmm1, xmm2, xmm3
-
-
-       # fold the 8 xmm registers to 1 xmm register with different constants
-
-       movdqa  rk9(%rip), %xmm10
-       movdqa  %xmm0, %xmm8
-       pclmulqdq       $0x11, %xmm10, %xmm0
-       pclmulqdq       $0x0 , %xmm10, %xmm8
-       pxor    %xmm8, %xmm7
-       xorps   %xmm0, %xmm7
-
-       movdqa  rk11(%rip), %xmm10
-       movdqa  %xmm1, %xmm8
-       pclmulqdq        $0x11, %xmm10, %xmm1
-       pclmulqdq        $0x0 , %xmm10, %xmm8
-       pxor    %xmm8, %xmm7
-       xorps   %xmm1, %xmm7
-
-       movdqa  rk13(%rip), %xmm10
-       movdqa  %xmm2, %xmm8
-       pclmulqdq        $0x11, %xmm10, %xmm2
-       pclmulqdq        $0x0 , %xmm10, %xmm8
-       pxor    %xmm8, %xmm7
-       pxor    %xmm2, %xmm7
-
-       movdqa  rk15(%rip), %xmm10
-       movdqa  %xmm3, %xmm8
-       pclmulqdq       $0x11, %xmm10, %xmm3
-       pclmulqdq       $0x0 , %xmm10, %xmm8
-       pxor    %xmm8, %xmm7
-       xorps   %xmm3, %xmm7
-
-       movdqa  rk17(%rip), %xmm10
-       movdqa  %xmm4, %xmm8
-       pclmulqdq       $0x11, %xmm10, %xmm4
-       pclmulqdq       $0x0 , %xmm10, %xmm8
-       pxor    %xmm8, %xmm7
-       pxor    %xmm4, %xmm7
-
-       movdqa  rk19(%rip), %xmm10
-       movdqa  %xmm5, %xmm8
-       pclmulqdq       $0x11, %xmm10, %xmm5
-       pclmulqdq       $0x0 , %xmm10, %xmm8
-       pxor    %xmm8, %xmm7
-       xorps   %xmm5, %xmm7
-
-       movdqa  rk1(%rip), %xmm10       #xmm10 has rk1 and rk2
-                                       #imm value of pclmulqdq instruction
-                                       #will determine which constant to use
-       movdqa  %xmm6, %xmm8
-       pclmulqdq       $0x11, %xmm10, %xmm6
-       pclmulqdq       $0x0 , %xmm10, %xmm8
-       pxor    %xmm8, %xmm7
-       pxor    %xmm6, %xmm7
-
-
-       # instead of 64, we add 48 to the loop counter to save 1 instruction
-       # from the loop instead of a cmp instruction, we use the negative
-       # flag with the jl instruction
-       add     $128-16, arg3
-       jl      _final_reduction_for_128
-
-       # now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7
-       # and the rest is in memory. We can fold 16 bytes at a time if y>=16
-       # continue folding 16B at a time
-
-_16B_reduction_loop:
-       movdqa  %xmm7, %xmm8
-       pclmulqdq       $0x11, %xmm10, %xmm7
-       pclmulqdq       $0x0 , %xmm10, %xmm8
-       pxor    %xmm8, %xmm7
-       movdqu  (arg2), %xmm0
-       pshufb  %xmm11, %xmm0
-       pxor    %xmm0 , %xmm7
-       add     $16, arg2
-       sub     $16, arg3
-       # instead of a cmp instruction, we utilize the flags with the
-       # jge instruction equivalent of: cmp arg3, 16-16
-       # check if there is any more 16B in the buffer to be able to fold
-       jge     _16B_reduction_loop
-
-       #now we have 16+z bytes left to reduce, where 0<= z < 16.
-       #first, we reduce the data in the xmm7 register
-
-
-_final_reduction_for_128:
-       # check if any more data to fold. If not, compute the CRC of
-       # the final 128 bits
-       add     $16, arg3
-       je      _128_done
-
-       # here we are getting data that is less than 16 bytes.
-       # since we know that there was data before the pointer, we can
-       # offset the input pointer before the actual point, to receive
-       # exactly 16 bytes. after that the registers need to be adjusted.
-_get_last_two_xmms:
-       movdqa  %xmm7, %xmm2
-
-       movdqu  -16(arg2, arg3), %xmm1
-       pshufb  %xmm11, %xmm1
-
-       # get rid of the extra data that was loaded before
-       # load the shift constant
-       lea     pshufb_shf_table+16(%rip), %rax
-       sub     arg3, %rax
-       movdqu  (%rax), %xmm0
-
-       # shift xmm2 to the left by arg3 bytes
-       pshufb  %xmm0, %xmm2
-
-       # shift xmm7 to the right by 16-arg3 bytes
-       pxor    mask1(%rip), %xmm0
-       pshufb  %xmm0, %xmm7
-       pblendvb        %xmm2, %xmm1    #xmm0 is implicit
-
-       # fold 16 Bytes
-       movdqa  %xmm1, %xmm2
-       movdqa  %xmm7, %xmm8
-       pclmulqdq       $0x11, %xmm10, %xmm7
-       pclmulqdq       $0x0 , %xmm10, %xmm8
-       pxor    %xmm8, %xmm7
-       pxor    %xmm2, %xmm7
-
-_128_done:
-       # compute crc of a 128-bit value
-       movdqa  rk5(%rip), %xmm10       # rk5 and rk6 in xmm10
-       movdqa  %xmm7, %xmm0
-
-       #64b fold
-       pclmulqdq       $0x1, %xmm10, %xmm7
-       pslldq  $8   ,  %xmm0
-       pxor    %xmm0,  %xmm7
-
-       #32b fold
-       movdqa  %xmm7, %xmm0
-
-       pand    mask2(%rip), %xmm0
-
-       psrldq  $12, %xmm7
-       pclmulqdq       $0x10, %xmm10, %xmm7
-       pxor    %xmm0, %xmm7
-
-       #barrett reduction
-_barrett:
-       movdqa  rk7(%rip), %xmm10       # rk7 and rk8 in xmm10
-       movdqa  %xmm7, %xmm0
-       pclmulqdq       $0x01, %xmm10, %xmm7
-       pslldq  $4, %xmm7
-       pclmulqdq       $0x11, %xmm10, %xmm7
-
-       pslldq  $4, %xmm7
-       pxor    %xmm0, %xmm7
-       pextrd  $1, %xmm7, %eax
-
-_cleanup:
-       # scale the result back to 16 bits
-       shr     $16, %eax
-       mov     %rcx, %rsp
-       ret
-
-########################################################################
-
-.align 16
-_less_than_128:
-
-       # check if there is enough buffer to be able to fold 16B at a time
-       cmp     $32, arg3
-       jl      _less_than_32
-       movdqa  SHUF_MASK(%rip), %xmm11
-
-       # now if there is, load the constants
-       movdqa  rk1(%rip), %xmm10       # rk1 and rk2 in xmm10
-
-       movd    arg1_low32, %xmm0       # get the initial crc value
-       pslldq  $12, %xmm0      # align it to its correct place
-       movdqu  (arg2), %xmm7   # load the plaintext
-       pshufb  %xmm11, %xmm7   # byte-reflect the plaintext
-       pxor    %xmm0, %xmm7
-
-
-       # update the buffer pointer
-       add     $16, arg2
-
-       # update the counter. subtract 32 instead of 16 to save one
-       # instruction from the loop
-       sub     $32, arg3
-
-       jmp     _16B_reduction_loop
-
-
-.align 16
-_less_than_32:
-       # mov initial crc to the return value. this is necessary for
-       # zero-length buffers.
-       mov     arg1_low32, %eax
-       test    arg3, arg3
-       je      _cleanup
-
-       movdqa  SHUF_MASK(%rip), %xmm11
-
-       movd    arg1_low32, %xmm0       # get the initial crc value
-       pslldq  $12, %xmm0      # align it to its correct place
-
-       cmp     $16, arg3
-       je      _exact_16_left
-       jl      _less_than_16_left
-
-       movdqu  (arg2), %xmm7   # load the plaintext
-       pshufb  %xmm11, %xmm7   # byte-reflect the plaintext
-       pxor    %xmm0 , %xmm7   # xor the initial crc value
-       add     $16, arg2
-       sub     $16, arg3
-       movdqa  rk1(%rip), %xmm10       # rk1 and rk2 in xmm10
-       jmp     _get_last_two_xmms
-
-
-.align 16
-_less_than_16_left:
-       # use stack space to load data less than 16 bytes, zero-out
-       # the 16B in memory first.
-
-       pxor    %xmm1, %xmm1
-       mov     %rsp, %r11
-       movdqa  %xmm1, (%r11)
-
-       cmp     $4, arg3
-       jl      _only_less_than_4
-
-       # backup the counter value
-       mov     arg3, %r9
-       cmp     $8, arg3
-       jl      _less_than_8_left
-
-       # load 8 Bytes
-       mov     (arg2), %rax
-       mov     %rax, (%r11)
-       add     $8, %r11
-       sub     $8, arg3
-       add     $8, arg2
-_less_than_8_left:
-
-       cmp     $4, arg3
-       jl      _less_than_4_left
-
-       # load 4 Bytes
-       mov     (arg2), %eax
-       mov     %eax, (%r11)
-       add     $4, %r11
-       sub     $4, arg3
-       add     $4, arg2
-_less_than_4_left:
-
-       cmp     $2, arg3
-       jl      _less_than_2_left
-
-       # load 2 Bytes
-       mov     (arg2), %ax
-       mov     %ax, (%r11)
-       add     $2, %r11
-       sub     $2, arg3
-       add     $2, arg2
-_less_than_2_left:
-       cmp     $1, arg3
-        jl      _zero_left
-
-       # load 1 Byte
-       mov     (arg2), %al
-       mov     %al, (%r11)
-_zero_left:
-       movdqa  (%rsp), %xmm7
-       pshufb  %xmm11, %xmm7
-       pxor    %xmm0 , %xmm7   # xor the initial crc value
-
-       # shl r9, 4
-       lea     pshufb_shf_table+16(%rip), %rax
-       sub     %r9, %rax
-       movdqu  (%rax), %xmm0
-       pxor    mask1(%rip), %xmm0
-
-       pshufb  %xmm0, %xmm7
-       jmp     _128_done
-
-.align 16
-_exact_16_left:
-       movdqu  (arg2), %xmm7
-       pshufb  %xmm11, %xmm7
-       pxor    %xmm0 , %xmm7   # xor the initial crc value
-
-       jmp     _128_done
-
-_only_less_than_4:
-       cmp     $3, arg3
-       jl      _only_less_than_3
-
-       # load 3 Bytes
-       mov     (arg2), %al
-       mov     %al, (%r11)
-
-       mov     1(arg2), %al
-       mov     %al, 1(%r11)
-
-       mov     2(arg2), %al
-       mov     %al, 2(%r11)
-
-       movdqa   (%rsp), %xmm7
-       pshufb   %xmm11, %xmm7
-       pxor     %xmm0 , %xmm7  # xor the initial crc value
-
-       psrldq  $5, %xmm7
-
-       jmp     _barrett
-_only_less_than_3:
-       cmp     $2, arg3
-       jl      _only_less_than_2
-
-       # load 2 Bytes
-       mov     (arg2), %al
-       mov     %al, (%r11)
-
-       mov     1(arg2), %al
-       mov     %al, 1(%r11)
-
-       movdqa  (%rsp), %xmm7
-       pshufb  %xmm11, %xmm7
-       pxor    %xmm0 , %xmm7   # xor the initial crc value
-
-       psrldq  $6, %xmm7
-
-       jmp     _barrett
-_only_less_than_2:
-
-       # load 1 Byte
-       mov     (arg2), %al
-       mov     %al, (%r11)
-
-       movdqa  (%rsp), %xmm7
-       pshufb  %xmm11, %xmm7
-       pxor    %xmm0 , %xmm7   # xor the initial crc value
-
-       psrldq  $7, %xmm7
-
-       jmp     _barrett
-
-ENDPROC(crc_t10dif_pcl)
-
-.data
-
-# precomputed constants
-# these constants are precomputed from the poly:
-# 0x8bb70000 (0x8bb7 scaled to 32 bits)
-.align 16
-# Q = 0x18BB70000
-# rk1 = 2^(32*3) mod Q << 32
-# rk2 = 2^(32*5) mod Q << 32
-# rk3 = 2^(32*15) mod Q << 32
-# rk4 = 2^(32*17) mod Q << 32
-# rk5 = 2^(32*3) mod Q << 32
-# rk6 = 2^(32*2) mod Q << 32
-# rk7 = floor(2^64/Q)
-# rk8 = Q
-rk1:
-.quad 0x2d56000000000000
-rk2:
-.quad 0x06df000000000000
-rk3:
-.quad 0x9d9d000000000000
-rk4:
-.quad 0x7cf5000000000000
-rk5:
-.quad 0x2d56000000000000
-rk6:
-.quad 0x1368000000000000
-rk7:
-.quad 0x00000001f65a57f8
-rk8:
-.quad 0x000000018bb70000
-
-rk9:
-.quad 0xceae000000000000
-rk10:
-.quad 0xbfd6000000000000
-rk11:
-.quad 0x1e16000000000000
-rk12:
-.quad 0x713c000000000000
-rk13:
-.quad 0xf7f9000000000000
-rk14:
-.quad 0x80a6000000000000
-rk15:
-.quad 0x044c000000000000
-rk16:
-.quad 0xe658000000000000
-rk17:
-.quad 0xad18000000000000
-rk18:
-.quad 0xa497000000000000
-rk19:
-.quad 0x6ee3000000000000
-rk20:
-.quad 0xe7b5000000000000
-
-
-
-mask1:
-.octa 0x80808080808080808080808080808080
-mask2:
-.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
-
-SHUF_MASK:
-.octa 0x000102030405060708090A0B0C0D0E0F
-
-pshufb_shf_table:
-# use these values for shift constants for the pshufb instruction
-# different alignments result in values as shown:
-#      DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
-#      DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
-#      DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
-#      DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
-#      DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
-#      DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
-#      DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9  (16-7) / shr7
-#      DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8  (16-8) / shr8
-#      DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7  (16-9) / shr9
-#      DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6  (16-10) / shr10
-#      DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5  (16-11) / shr11
-#      DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4  (16-12) / shr12
-#      DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3  (16-13) / shr13
-#      DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2  (16-14) / shr14
-#      DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1  (16-15) / shr15
-.octa 0x8f8e8d8c8b8a89888786858483828100
-.octa 0x000e0d0c0b0a09080706050403020100
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
deleted file mode 100644 (file)
index 7845d7f..0000000
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Cryptographic API.
- *
- * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions
- *
- * Copyright (C) 2013 Intel Corporation
- * Author: Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/crc-t10dif.h>
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <asm/i387.h>
-#include <asm/cpufeature.h>
-#include <asm/cpu_device_id.h>
-
-asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
-                               size_t len);
-
-struct chksum_desc_ctx {
-       __u16 crc;
-};
-
-/*
- * Steps through buffer one byte at at time, calculates reflected
- * crc using table.
- */
-
-static int chksum_init(struct shash_desc *desc)
-{
-       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-       ctx->crc = 0;
-
-       return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
-                        unsigned int length)
-{
-       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-       if (irq_fpu_usable()) {
-               kernel_fpu_begin();
-               ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
-               kernel_fpu_end();
-       } else
-               ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
-       return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
-       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-       *(__u16 *)out = ctx->crc;
-       return 0;
-}
-
-static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
-                       u8 *out)
-{
-       if (irq_fpu_usable()) {
-               kernel_fpu_begin();
-               *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
-               kernel_fpu_end();
-       } else
-               *(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
-       return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
-                       unsigned int len, u8 *out)
-{
-       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-       return __chksum_finup(&ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
-                        unsigned int length, u8 *out)
-{
-       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-       return __chksum_finup(&ctx->crc, data, length, out);
-}
-
-static struct shash_alg alg = {
-       .digestsize             =       CRC_T10DIF_DIGEST_SIZE,
-       .init           =       chksum_init,
-       .update         =       chksum_update,
-       .final          =       chksum_final,
-       .finup          =       chksum_finup,
-       .digest         =       chksum_digest,
-       .descsize               =       sizeof(struct chksum_desc_ctx),
-       .base                   =       {
-               .cra_name               =       "crct10dif",
-               .cra_driver_name        =       "crct10dif-pclmul",
-               .cra_priority           =       200,
-               .cra_blocksize          =       CRC_T10DIF_BLOCK_SIZE,
-               .cra_module             =       THIS_MODULE,
-       }
-};
-
-static const struct x86_cpu_id crct10dif_cpu_id[] = {
-       X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
-       {}
-};
-MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);
-
-static int __init crct10dif_intel_mod_init(void)
-{
-       if (!x86_match_cpu(crct10dif_cpu_id))
-               return -ENODEV;
-
-       return crypto_register_shash(&alg);
-}
-
-static void __exit crct10dif_intel_mod_fini(void)
-{
-       crypto_unregister_shash(&alg);
-}
-
-module_init(crct10dif_intel_mod_init);
-module_exit(crct10dif_intel_mod_fini);
-
-MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
-MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ.");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS("crct10dif");
-MODULE_ALIAS("crct10dif-pclmul");
index 69ce573f1224560b4f5c7532e21053b27c651da4..aca01164f00200a8d14efa8a82e4a400b67d0bf1 100644 (file)
@@ -376,25 +376,6 @@ config CRYPTO_CRC32_PCLMUL
          which will enable any routine to use the CRC-32-IEEE 802.3 checksum
          and gain better performance as compared with the table implementation.
 
-config CRYPTO_CRCT10DIF
-       tristate "CRCT10DIF algorithm"
-       select CRYPTO_HASH
-       help
-         CRC T10 Data Integrity Field computation is being cast as
-         a crypto transform.  This allows for faster crc t10 diff
-         transforms to be used if they are available.
-
-config CRYPTO_CRCT10DIF_PCLMUL
-       tristate "CRCT10DIF PCLMULQDQ hardware acceleration"
-       depends on X86 && 64BIT && CRC_T10DIF
-       select CRYPTO_HASH
-       help
-         For x86_64 processors with SSE4.2 and PCLMULQDQ supported,
-         CRC T10 DIF PCLMULQDQ computation can be hardware
-         accelerated PCLMULQDQ instruction. This option will create
-         'crct10dif-plcmul' module, which is faster when computing the
-         crct10dif checksum as compared with the generic table implementation.
-
 config CRYPTO_GHASH
        tristate "GHASH digest algorithm"
        select CRYPTO_GF128MUL
index 2d5ed08a239fa3e79110c9dfd077d30da8954192..2ba0df2f908f5985ef23f0794c1f49e6fd5f3720 100644 (file)
@@ -83,7 +83,6 @@ obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
 obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
 obj-$(CONFIG_CRYPTO_CRC32) += crc32.o
-obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o
 obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
diff --git a/crypto/crct10dif.c b/crypto/crct10dif.c
deleted file mode 100644 (file)
index 92aca96..0000000
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Cryptographic API.
- *
- * T10 Data Integrity Field CRC16 Crypto Transform
- *
- * Copyright (c) 2007 Oracle Corporation.  All rights reserved.
- * Written by Martin K. Petersen <martin.petersen@oracle.com>
- * Copyright (C) 2013 Intel Corporation
- * Author: Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/crc-t10dif.h>
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-
-struct chksum_desc_ctx {
-       __u16 crc;
-};
-
-/* Table generated using the following polynomium:
- * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
- * gt: 0x8bb7
- */
-static const __u16 t10_dif_crc_table[256] = {
-       0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
-       0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
-       0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
-       0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
-       0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
-       0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
-       0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
-       0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
-       0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
-       0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
-       0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
-       0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
-       0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
-       0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
-       0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
-       0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
-       0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
-       0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
-       0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
-       0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
-       0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
-       0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
-       0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
-       0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
-       0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
-       0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
-       0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
-       0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
-       0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
-       0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
-       0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
-       0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
-};
-
-__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len)
-{
-       unsigned int i;
-
-       for (i = 0 ; i < len ; i++)
-               crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
-
-       return crc;
-}
-EXPORT_SYMBOL(crc_t10dif_generic);
-
-/*
- * Steps through buffer one byte at at time, calculates reflected
- * crc using table.
- */
-
-static int chksum_init(struct shash_desc *desc)
-{
-       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-       ctx->crc = 0;
-
-       return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
-                        unsigned int length)
-{
-       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-       ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
-       return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
-       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-       *(__u16 *)out = ctx->crc;
-       return 0;
-}
-
-static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
-                       u8 *out)
-{
-       *(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
-       return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
-                       unsigned int len, u8 *out)
-{
-       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-       return __chksum_finup(&ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
-                        unsigned int length, u8 *out)
-{
-       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-       return __chksum_finup(&ctx->crc, data, length, out);
-}
-
-static struct shash_alg alg = {
-       .digestsize             =       CRC_T10DIF_DIGEST_SIZE,
-       .init           =       chksum_init,
-       .update         =       chksum_update,
-       .final          =       chksum_final,
-       .finup          =       chksum_finup,
-       .digest         =       chksum_digest,
-       .descsize               =       sizeof(struct chksum_desc_ctx),
-       .base                   =       {
-               .cra_name               =       "crct10dif",
-               .cra_driver_name        =       "crct10dif-generic",
-               .cra_priority           =       100,
-               .cra_blocksize          =       CRC_T10DIF_BLOCK_SIZE,
-               .cra_module             =       THIS_MODULE,
-       }
-};
-
-static int __init crct10dif_mod_init(void)
-{
-       int ret;
-
-       ret = crypto_register_shash(&alg);
-       return ret;
-}
-
-static void __exit crct10dif_mod_fini(void)
-{
-       crypto_unregister_shash(&alg);
-}
-
-module_init(crct10dif_mod_init);
-module_exit(crct10dif_mod_fini);
-
-MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
-MODULE_DESCRIPTION("T10 DIF CRC calculation.");
-MODULE_LICENSE("GPL");
index 25a5934f0e50a011f73db08a84735ae76b2a7f60..66d254ce0d11b35ca55e07b8b0fd786c0b0f0c10 100644 (file)
@@ -1174,10 +1174,6 @@ static int do_test(int m)
                ret += tcrypt_test("ghash");
                break;
 
-       case 47:
-               ret += tcrypt_test("crct10dif");
-               break;
-
        case 100:
                ret += tcrypt_test("hmac(md5)");
                break;
@@ -1502,10 +1498,6 @@ static int do_test(int m)
                test_hash_speed("crc32c", sec, generic_hash_speed_template);
                if (mode > 300 && mode < 400) break;
 
-       case 320:
-               test_hash_speed("crct10dif", sec, generic_hash_speed_template);
-               if (mode > 300 && mode < 400) break;
-
        case 399:
                break;
 
index 2f00607039e2ad7c808d273f08756a8e463732d4..ecddf921a9db401ee30bea1bf24b30f0ee8dd85d 100644 (file)
@@ -2045,16 +2045,6 @@ static const struct alg_test_desc alg_test_descs[] = {
                                .count = CRC32C_TEST_VECTORS
                        }
                }
-       }, {
-               .alg = "crct10dif",
-               .test = alg_test_hash,
-               .fips_allowed = 1,
-               .suite = {
-                       .hash = {
-                               .vecs = crct10dif_tv_template,
-                               .count = CRCT10DIF_TEST_VECTORS
-                       }
-               }
        }, {
                .alg = "cryptd(__driver-cbc-aes-aesni)",
                .test = alg_test_null,
index 7d44aa3d6b4472646dd9b93a77d97a0a58762eae..1e701bc075b907521c0e0cd4a06e41858cda0a5f 100644 (file)
@@ -450,39 +450,6 @@ static struct hash_testvec rmd320_tv_template[] = {
        }
 };
 
-#define CRCT10DIF_TEST_VECTORS 3
-static struct hash_testvec crct10dif_tv_template[] = {
-       {
-               .plaintext = "abc",
-               .psize  = 3,
-#ifdef __LITTLE_ENDIAN
-               .digest = "\x3b\x44",
-#else
-               .digest = "\x44\x3b",
-#endif
-       }, {
-               .plaintext = "1234567890123456789012345678901234567890"
-                            "123456789012345678901234567890123456789",
-               .psize  = 79,
-#ifdef __LITTLE_ENDIAN
-               .digest = "\x70\x4b",
-#else
-               .digest = "\x4b\x70",
-#endif
-       }, {
-               .plaintext =
-               "abcddddddddddddddddddddddddddddddddddddddddddddddddddddd",
-               .psize  = 56,
-#ifdef __LITTLE_ENDIAN
-               .digest = "\xe3\x9c",
-#else
-               .digest = "\x9c\xe3",
-#endif
-               .np     = 2,
-               .tap    = { 28, 28 }
-       }
-};
-
 /*
  * SHA1 test vectors  from from FIPS PUB 180-1
  * Long vector from CAVS 5.0
index 80dc988f01e4f1eebb7d12311ab98c8ac48dc013..4e737728aee207a5d644d6fa0d39b0e3c402bd47 100644 (file)
@@ -97,6 +97,15 @@ config SATA_AHCI_PLATFORM
 
          If unsure, say N.
 
+config AHCI_IMX
+       tristate "Freescale i.MX AHCI SATA support"
+       depends on SATA_AHCI_PLATFORM && MFD_SYSCON
+       help
+         This option enables support for the Freescale i.MX SoC's
+         onboard AHCI SATA.
+
+         If unsure, say N.
+
 config SATA_FSL
        tristate "Freescale 3.0Gbps SATA support"
        depends on FSL_SOC
@@ -107,7 +116,7 @@ config SATA_FSL
          If unsure, say N.
 
 config SATA_INIC162X
-       tristate "Initio 162x SATA support"
+       tristate "Initio 162x SATA support (Very Experimental)"
        depends on PCI
        help
          This option enables support for Initio 162x Serial ATA.
index c04d0fd038a312485712ed2be8a42d77b32df5c9..46518c622460942cdf8e2971dd95f3b069961ada 100644 (file)
@@ -10,6 +10,7 @@ obj-$(CONFIG_SATA_INIC162X)   += sata_inic162x.o
 obj-$(CONFIG_SATA_SIL24)       += sata_sil24.o
 obj-$(CONFIG_SATA_DWC)         += sata_dwc_460ex.o
 obj-$(CONFIG_SATA_HIGHBANK)    += sata_highbank.o libahci.o
+obj-$(CONFIG_AHCI_IMX)         += ahci_imx.o
 
 # SFF w/ custom DMA
 obj-$(CONFIG_PDC_ADMA)         += pdc_adma.o
index 5064f3ea20f1b6f362928bd7a5b3cce39894b259..db4380d7003115ef8c4d05c1ca3543adce0efecb 100644 (file)
@@ -1146,11 +1146,18 @@ int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis)
                return rc;
 
        for (i = 0; i < host->n_ports; i++) {
+               const char* desc;
                struct ahci_port_priv *pp = host->ports[i]->private_data;
 
+               /* pp is NULL for dummy ports */
+               if (pp)
+                       desc = pp->irq_desc;
+               else
+                       desc = dev_driver_string(host->dev);
+
                rc = devm_request_threaded_irq(host->dev,
                        irq + i, ahci_hw_interrupt, ahci_thread_fn, IRQF_SHARED,
-                       pp->irq_desc, host->ports[i]);
+                       desc, host->ports[i]);
                if (rc)
                        goto out_free_irqs;
        }
diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c
new file mode 100644 (file)
index 0000000..58debb0
--- /dev/null
@@ -0,0 +1,236 @@
+/*
+ * Freescale IMX AHCI SATA platform driver
+ * Copyright 2013 Freescale Semiconductor, Inc.
+ *
+ * based on the AHCI SATA platform driver by Jeff Garzik and Anton Vorontsov
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/ahci_platform.h>
+#include <linux/of_device.h>
+#include <linux/mfd/syscon.h>
+#include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
+#include "ahci.h"
+
+enum {
+       HOST_TIMER1MS = 0xe0, /* Timer 1-ms */
+};
+
+struct imx_ahci_priv {
+       struct platform_device *ahci_pdev;
+       struct clk *sata_ref_clk;
+       struct clk *ahb_clk;
+       struct regmap *gpr;
+};
+
+static int imx6q_sata_init(struct device *dev, void __iomem *mmio)
+{
+       int ret = 0;
+       unsigned int reg_val;
+       struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent);
+
+       imxpriv->gpr =
+               syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr");
+       if (IS_ERR(imxpriv->gpr)) {
+               dev_err(dev, "failed to find fsl,imx6q-iomux-gpr regmap\n");
+               return PTR_ERR(imxpriv->gpr);
+       }
+
+       ret = clk_prepare_enable(imxpriv->sata_ref_clk);
+       if (ret < 0) {
+               dev_err(dev, "prepare-enable sata_ref clock err:%d\n", ret);
+               return ret;
+       }
+
+       /*
+        * set PHY Paremeters, two steps to configure the GPR13,
+        * one write for rest of parameters, mask of first write
+        * is 0x07fffffd, and the other one write for setting
+        * the mpll_clk_en.
+        */
+       regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK
+                       | IMX6Q_GPR13_SATA_RX_LOS_LVL_MASK
+                       | IMX6Q_GPR13_SATA_RX_DPLL_MODE_MASK
+                       | IMX6Q_GPR13_SATA_SPD_MODE_MASK
+                       | IMX6Q_GPR13_SATA_MPLL_SS_EN
+                       | IMX6Q_GPR13_SATA_TX_ATTEN_MASK
+                       | IMX6Q_GPR13_SATA_TX_BOOST_MASK
+                       | IMX6Q_GPR13_SATA_TX_LVL_MASK
+                       | IMX6Q_GPR13_SATA_TX_EDGE_RATE
+                       , IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB
+                       | IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M
+                       | IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_4F
+                       | IMX6Q_GPR13_SATA_SPD_MODE_3P0G
+                       | IMX6Q_GPR13_SATA_MPLL_SS_EN
+                       | IMX6Q_GPR13_SATA_TX_ATTEN_9_16
+                       | IMX6Q_GPR13_SATA_TX_BOOST_3_33_DB
+                       | IMX6Q_GPR13_SATA_TX_LVL_1_025_V);
+       regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_MPLL_CLK_EN,
+                       IMX6Q_GPR13_SATA_MPLL_CLK_EN);
+       usleep_range(100, 200);
+
+       /*
+        * Configure the HWINIT bits of the HOST_CAP and HOST_PORTS_IMPL,
+        * and IP vendor specific register HOST_TIMER1MS.
+        * Configure CAP_SSS (support stagered spin up).
+        * Implement the port0.
+        * Get the ahb clock rate, and configure the TIMER1MS register.
+        */
+       reg_val = readl(mmio + HOST_CAP);
+       if (!(reg_val & HOST_CAP_SSS)) {
+               reg_val |= HOST_CAP_SSS;
+               writel(reg_val, mmio + HOST_CAP);
+       }
+       reg_val = readl(mmio + HOST_PORTS_IMPL);
+       if (!(reg_val & 0x1)) {
+               reg_val |= 0x1;
+               writel(reg_val, mmio + HOST_PORTS_IMPL);
+       }
+
+       reg_val = clk_get_rate(imxpriv->ahb_clk) / 1000;
+       writel(reg_val, mmio + HOST_TIMER1MS);
+
+       return 0;
+}
+
+static void imx6q_sata_exit(struct device *dev)
+{
+       struct imx_ahci_priv *imxpriv =  dev_get_drvdata(dev->parent);
+
+       regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_MPLL_CLK_EN,
+                       !IMX6Q_GPR13_SATA_MPLL_CLK_EN);
+       clk_disable_unprepare(imxpriv->sata_ref_clk);
+}
+
+static struct ahci_platform_data imx6q_sata_pdata = {
+       .init = imx6q_sata_init,
+       .exit = imx6q_sata_exit,
+};
+
+static const struct of_device_id imx_ahci_of_match[] = {
+       { .compatible = "fsl,imx6q-ahci", .data = &imx6q_sata_pdata},
+       {},
+};
+MODULE_DEVICE_TABLE(of, imx_ahci_of_match);
+
+static int imx_ahci_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct resource *mem, *irq, res[2];
+       const struct of_device_id *of_id;
+       const struct ahci_platform_data *pdata = NULL;
+       struct imx_ahci_priv *imxpriv;
+       struct device *ahci_dev;
+       struct platform_device *ahci_pdev;
+       int ret;
+
+       imxpriv = devm_kzalloc(dev, sizeof(*imxpriv), GFP_KERNEL);
+       if (!imxpriv) {
+               dev_err(dev, "can't alloc ahci_host_priv\n");
+               return -ENOMEM;
+       }
+
+       ahci_pdev = platform_device_alloc("ahci", -1);
+       if (!ahci_pdev)
+               return -ENODEV;
+
+       ahci_dev = &ahci_pdev->dev;
+       ahci_dev->parent = dev;
+
+       imxpriv->ahb_clk = devm_clk_get(dev, "ahb");
+       if (IS_ERR(imxpriv->ahb_clk)) {
+               dev_err(dev, "can't get ahb clock.\n");
+               ret = PTR_ERR(imxpriv->ahb_clk);
+               goto err_out;
+       }
+
+       imxpriv->sata_ref_clk = devm_clk_get(dev, "sata_ref");
+       if (IS_ERR(imxpriv->sata_ref_clk)) {
+               dev_err(dev, "can't get sata_ref clock.\n");
+               ret = PTR_ERR(imxpriv->sata_ref_clk);
+               goto err_out;
+       }
+
+       imxpriv->ahci_pdev = ahci_pdev;
+       platform_set_drvdata(pdev, imxpriv);
+
+       of_id = of_match_device(imx_ahci_of_match, dev);
+       if (of_id) {
+               pdata = of_id->data;
+       } else {
+               ret = -EINVAL;
+               goto err_out;
+       }
+
+       mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+       if (!mem || !irq) {
+               dev_err(dev, "no mmio/irq resource\n");
+               ret = -ENOMEM;
+               goto err_out;
+       }
+
+       res[0] = *mem;
+       res[1] = *irq;
+
+       ahci_dev->coherent_dma_mask = DMA_BIT_MASK(32);
+       ahci_dev->dma_mask = &ahci_dev->coherent_dma_mask;
+       ahci_dev->of_node = dev->of_node;
+
+       ret = platform_device_add_resources(ahci_pdev, res, 2);
+       if (ret)
+               goto err_out;
+
+       ret = platform_device_add_data(ahci_pdev, pdata, sizeof(*pdata));
+       if (ret)
+               goto err_out;
+
+       ret = platform_device_add(ahci_pdev);
+       if (ret) {
+err_out:
+               platform_device_put(ahci_pdev);
+               return ret;
+       }
+
+       return 0;
+}
+
+static int imx_ahci_remove(struct platform_device *pdev)
+{
+       struct imx_ahci_priv *imxpriv = platform_get_drvdata(pdev);
+       struct platform_device *ahci_pdev = imxpriv->ahci_pdev;
+
+       platform_device_unregister(ahci_pdev);
+       return 0;
+}
+
+static struct platform_driver imx_ahci_driver = {
+       .probe = imx_ahci_probe,
+       .remove = imx_ahci_remove,
+       .driver = {
+               .name = "ahci-imx",
+               .owner = THIS_MODULE,
+               .of_match_table = imx_ahci_of_match,
+       },
+};
+module_platform_driver(imx_ahci_driver);
+
+MODULE_DESCRIPTION("Freescale i.MX AHCI SATA platform driver");
+MODULE_AUTHOR("Richard Zhu <Hong-Xing.Zhu@freescale.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ahci:imx");
index b52a10c8eeb9b775cd1c7fd438ace85571e2ca27..513ad7ed0c997e19c3b3c4190b09221199c3d16c 100644 (file)
@@ -330,7 +330,7 @@ static const struct pci_device_id piix_pci_tbl[] = {
        /* SATA Controller IDE (Wellsburg) */
        { 0x8086, 0x8d00, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb },
        /* SATA Controller IDE (Wellsburg) */
-       { 0x8086, 0x8d08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata },
+       { 0x8086, 0x8d08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_snb },
        /* SATA Controller IDE (Wellsburg) */
        { 0x8086, 0x8d60, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb },
        /* SATA Controller IDE (Wellsburg) */
index 83c08907e0428bd44c6fad7f1f634357ea5606cd..b1e880a3c3dabb9b5bd253da522c8879632d993c 100644 (file)
@@ -206,8 +206,10 @@ static ssize_t ata_scsi_park_store(struct device *device,
        unsigned long flags;
        int rc;
 
-       rc = strict_strtol(buf, 10, &input);
-       if (rc || input < -2)
+       rc = kstrtol(buf, 10, &input);
+       if (rc)
+               return rc;
+       if (input < -2)
                return -EINVAL;
        if (input > ATA_TMOUT_MAX_PARK) {
                rc = -EOVERFLOW;
index e4513174824830fa67d3e000ec97a449245fb0c8..5c54d957370afcaf9cf6c0d1962d87cc1b98eaac 100644 (file)
@@ -6,6 +6,18 @@
  *
  * This file is released under GPL v2.
  *
+ * **** WARNING ****
+ *
+ * This driver never worked properly and unfortunately data corruption is
+ * relatively common.  There isn't anyone working on the driver and there's
+ * no support from the vendor.  Do not use this driver in any production
+ * environment.
+ *
+ * http://thread.gmane.org/gmane.linux.debian.devel.bugs.rc/378525/focus=54491
+ * https://bugzilla.kernel.org/show_bug.cgi?id=60565
+ *
+ * *****************
+ *
  * This controller is eccentric and easily locks up if something isn't
  * right.  Documentation is available at initio's website but it only
  * documents registers (not programming model).
@@ -807,6 +819,8 @@ static int inic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        ata_print_version_once(&pdev->dev, DRV_VERSION);
 
+       dev_alert(&pdev->dev, "inic162x support is broken with common data corruption issues and will be disabled by default, contact linux-ide@vger.kernel.org if in production use\n");
+
        /* alloc host */
        host = ata_host_alloc_pinfo(&pdev->dev, ppi, NR_PORTS);
        hpriv = devm_kzalloc(&pdev->dev, sizeof(*hpriv), GFP_KERNEL);
index b81ddfea1da075ad3c1d0cbeb9ab7bf854593436..e07a5fd58ad7463f8775f7334fd692c7e51df2ad 100644 (file)
@@ -532,11 +532,11 @@ config BLK_DEV_RBD
          If unsure, say N.
 
 config BLK_DEV_RSXX
-       tristate "IBM FlashSystem 70/80 PCIe SSD Device Driver"
+       tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver"
        depends on PCI
        help
          Device driver for IBM's high speed PCIe SSD
-         storage devices: FlashSystem-70 and FlashSystem-80.
+         storage device: Flash Adapter 900GB Full Height.
 
          To compile this driver as a module, choose M here: the
          module will be called rsxx.
index 6608076dc39e73cda5f3aaf705579e3189421e3e..28c73ca320a8f7f9b1741492785fa2f18e042d8c 100644 (file)
@@ -659,6 +659,27 @@ void drbd_al_shrink(struct drbd_conf *mdev)
        wake_up(&mdev->al_wait);
 }
 
+int drbd_initialize_al(struct drbd_conf *mdev, void *buffer)
+{
+       struct al_transaction_on_disk *al = buffer;
+       struct drbd_md *md = &mdev->ldev->md;
+       sector_t al_base = md->md_offset + md->al_offset;
+       int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
+       int i;
+
+       memset(al, 0, 4096);
+       al->magic = cpu_to_be32(DRBD_AL_MAGIC);
+       al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
+       al->crc32c = cpu_to_be32(crc32c(0, al, 4096));
+
+       for (i = 0; i < al_size_4k; i++) {
+               int err = drbd_md_sync_page_io(mdev, mdev->ldev, al_base + i * 8, WRITE);
+               if (err)
+                       return err;
+       }
+       return 0;
+}
+
 static int w_update_odbm(struct drbd_work *w, int unused)
 {
        struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
index f943aacfdad8a90aa07416d905fa2a0bb207b665..2d7f608d181ce8cf2eb26ab3859dcccb0ec38474 100644 (file)
@@ -832,6 +832,7 @@ struct drbd_tconn {                 /* is a resource from the config file */
        unsigned susp_nod:1;            /* IO suspended because no data */
        unsigned susp_fen:1;            /* IO suspended because fence peer handler runs */
        struct mutex cstate_mutex;      /* Protects graceful disconnects */
+       unsigned int connect_cnt;       /* Inc each time a connection is established */
 
        unsigned long flags;
        struct net_conf *net_conf;      /* content protected by rcu */
@@ -1132,6 +1133,7 @@ extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
 void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
 
 extern void conn_md_sync(struct drbd_tconn *tconn);
+extern void drbd_md_write(struct drbd_conf *mdev, void *buffer);
 extern void drbd_md_sync(struct drbd_conf *mdev);
 extern int  drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
 extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
@@ -1466,8 +1468,16 @@ extern void drbd_suspend_io(struct drbd_conf *mdev);
 extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
 extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int);
-enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
-extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
+enum determine_dev_size {
+       DS_ERROR_SHRINK = -3,
+       DS_ERROR_SPACE_MD = -2,
+       DS_ERROR = -1,
+       DS_UNCHANGED = 0,
+       DS_SHRUNK = 1,
+       DS_GREW = 2
+};
+extern enum determine_dev_size
+drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct resize_parms *) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
 extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
@@ -1633,6 +1643,7 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
 #define drbd_set_out_of_sync(mdev, sector, size) \
        __drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
 extern void drbd_al_shrink(struct drbd_conf *mdev);
+extern int drbd_initialize_al(struct drbd_conf *, void *);
 
 /* drbd_nl.c */
 /* state info broadcast */
index a5dca6affcbb894dc2c5d97c63c84f8972503d4e..55635edf563be741d56cccda715ab70ff23248a4 100644 (file)
@@ -2762,8 +2762,6 @@ int __init drbd_init(void)
        /*
         * allocate all necessary structs
         */
-       err = -ENOMEM;
-
        init_waitqueue_head(&drbd_pp_wait);
 
        drbd_proc = NULL; /* play safe for drbd_cleanup */
@@ -2773,6 +2771,7 @@ int __init drbd_init(void)
        if (err)
                goto fail;
 
+       err = -ENOMEM;
        drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
        if (!drbd_proc) {
                printk(KERN_ERR "drbd: unable to register proc file\n");
@@ -2803,7 +2802,6 @@ int __init drbd_init(void)
 fail:
        drbd_cleanup();
        if (err == -ENOMEM)
-               /* currently always the case */
                printk(KERN_ERR "drbd: ran out of memory\n");
        else
                printk(KERN_ERR "drbd: initialization failure\n");
@@ -2881,34 +2879,14 @@ struct meta_data_on_disk {
        u8 reserved_u8[4096 - (7*8 + 10*4)];
 } __packed;
 
-/**
- * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
- * @mdev:      DRBD device.
- */
-void drbd_md_sync(struct drbd_conf *mdev)
+
+
+void drbd_md_write(struct drbd_conf *mdev, void *b)
 {
-       struct meta_data_on_disk *buffer;
+       struct meta_data_on_disk *buffer = b;
        sector_t sector;
        int i;
 
-       /* Don't accidentally change the DRBD meta data layout. */
-       BUILD_BUG_ON(UI_SIZE != 4);
-       BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);
-
-       del_timer(&mdev->md_sync_timer);
-       /* timer may be rearmed by drbd_md_mark_dirty() now. */
-       if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
-               return;
-
-       /* We use here D_FAILED and not D_ATTACHING because we try to write
-        * metadata even if we detach due to a disk failure! */
-       if (!get_ldev_if_state(mdev, D_FAILED))
-               return;
-
-       buffer = drbd_md_get_buffer(mdev);
-       if (!buffer)
-               goto out;
-
        memset(buffer, 0, sizeof(*buffer));
 
        buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
@@ -2937,6 +2915,35 @@ void drbd_md_sync(struct drbd_conf *mdev)
                dev_err(DEV, "meta data update failed!\n");
                drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
        }
+}
+
+/**
+ * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
+ * @mdev:      DRBD device.
+ */
+void drbd_md_sync(struct drbd_conf *mdev)
+{
+       struct meta_data_on_disk *buffer;
+
+       /* Don't accidentally change the DRBD meta data layout. */
+       BUILD_BUG_ON(UI_SIZE != 4);
+       BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);
+
+       del_timer(&mdev->md_sync_timer);
+       /* timer may be rearmed by drbd_md_mark_dirty() now. */
+       if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
+               return;
+
+       /* We use here D_FAILED and not D_ATTACHING because we try to write
+        * metadata even if we detach due to a disk failure! */
+       if (!get_ldev_if_state(mdev, D_FAILED))
+               return;
+
+       buffer = drbd_md_get_buffer(mdev);
+       if (!buffer)
+               goto out;
+
+       drbd_md_write(mdev, buffer);
 
        /* Update mdev->ldev->md.la_size_sect,
         * since we updated it on metadata. */
index 9e3f441e7e8441e83d61273ed0f34f1c309c518b..8cc1e640f485b4620ff39d8f3de915d36a38487e 100644 (file)
@@ -417,6 +417,7 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn)
 
 bool conn_try_outdate_peer(struct drbd_tconn *tconn)
 {
+       unsigned int connect_cnt;
        union drbd_state mask = { };
        union drbd_state val = { };
        enum drbd_fencing_p fp;
@@ -428,6 +429,10 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
                return false;
        }
 
+       spin_lock_irq(&tconn->req_lock);
+       connect_cnt = tconn->connect_cnt;
+       spin_unlock_irq(&tconn->req_lock);
+
        fp = highest_fencing_policy(tconn);
        switch (fp) {
        case FP_NOT_AVAIL:
@@ -492,8 +497,14 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
           here, because we might were able to re-establish the connection in the
           meantime. */
        spin_lock_irq(&tconn->req_lock);
-       if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags))
-               _conn_request_state(tconn, mask, val, CS_VERBOSE);
+       if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags)) {
+               if (tconn->connect_cnt != connect_cnt)
+                       /* In case the connection was established and droped
+                          while the fence-peer handler was running, ignore it */
+                       conn_info(tconn, "Ignoring fence-peer exit code\n");
+               else
+                       _conn_request_state(tconn, mask, val, CS_VERBOSE);
+       }
        spin_unlock_irq(&tconn->req_lock);
 
        return conn_highest_pdsk(tconn) <= D_OUTDATED;
@@ -816,15 +827,20 @@ void drbd_resume_io(struct drbd_conf *mdev)
  * Returns 0 on success, negative return values indicate errors.
  * You should call drbd_md_sync() after calling this function.
  */
-enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
+enum determine_dev_size
+drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
 {
        sector_t prev_first_sect, prev_size; /* previous meta location */
        sector_t la_size_sect, u_size;
+       struct drbd_md *md = &mdev->ldev->md;
+       u32 prev_al_stripe_size_4k;
+       u32 prev_al_stripes;
        sector_t size;
        char ppb[10];
+       void *buffer;
 
        int md_moved, la_size_changed;
-       enum determine_dev_size rv = unchanged;
+       enum determine_dev_size rv = DS_UNCHANGED;
 
        /* race:
         * application request passes inc_ap_bio,
@@ -836,6 +852,11 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
         * still lock the act_log to not trigger ASSERTs there.
         */
        drbd_suspend_io(mdev);
+       buffer = drbd_md_get_buffer(mdev); /* Lock meta-data IO */
+       if (!buffer) {
+               drbd_resume_io(mdev);
+               return DS_ERROR;
+       }
 
        /* no wait necessary anymore, actually we could assert that */
        wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
@@ -844,7 +865,17 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
        prev_size = mdev->ldev->md.md_size_sect;
        la_size_sect = mdev->ldev->md.la_size_sect;
 
-       /* TODO: should only be some assert here, not (re)init... */
+       if (rs) {
+               /* rs is non NULL if we should change the AL layout only */
+
+               prev_al_stripes = md->al_stripes;
+               prev_al_stripe_size_4k = md->al_stripe_size_4k;
+
+               md->al_stripes = rs->al_stripes;
+               md->al_stripe_size_4k = rs->al_stripe_size / 4;
+               md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
+       }
+
        drbd_md_set_sector_offsets(mdev, mdev->ldev);
 
        rcu_read_lock();
@@ -852,6 +883,21 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
        rcu_read_unlock();
        size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED);
 
+       if (size < la_size_sect) {
+               if (rs && u_size == 0) {
+                       /* Remove "rs &&" later. This check should always be active, but
+                          right now the receiver expects the permissive behavior */
+                       dev_warn(DEV, "Implicit shrink not allowed. "
+                                "Use --size=%llus for explicit shrink.\n",
+                                (unsigned long long)size);
+                       rv = DS_ERROR_SHRINK;
+               }
+               if (u_size > size)
+                       rv = DS_ERROR_SPACE_MD;
+               if (rv != DS_UNCHANGED)
+                       goto err_out;
+       }
+
        if (drbd_get_capacity(mdev->this_bdev) != size ||
            drbd_bm_capacity(mdev) != size) {
                int err;
@@ -867,7 +913,7 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
                                    "Leaving size unchanged at size = %lu KB\n",
                                    (unsigned long)size);
                        }
-                       rv = dev_size_error;
+                       rv = DS_ERROR;
                }
                /* racy, see comments above. */
                drbd_set_my_capacity(mdev, size);
@@ -875,38 +921,57 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
                dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
                     (unsigned long long)size>>1);
        }
-       if (rv == dev_size_error)
-               goto out;
+       if (rv <= DS_ERROR)
+               goto err_out;
 
        la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect);
 
        md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
                || prev_size       != mdev->ldev->md.md_size_sect;
 
-       if (la_size_changed || md_moved) {
-               int err;
+       if (la_size_changed || md_moved || rs) {
+               u32 prev_flags;
 
                drbd_al_shrink(mdev); /* All extents inactive. */
+
+               prev_flags = md->flags;
+               md->flags &= ~MDF_PRIMARY_IND;
+               drbd_md_write(mdev, buffer);
+
                dev_info(DEV, "Writing the whole bitmap, %s\n",
                         la_size_changed && md_moved ? "size changed and md moved" :
                         la_size_changed ? "size changed" : "md moved");
                /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
-               err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
-                                    "size changed", BM_LOCKED_MASK);
-               if (err) {
-                       rv = dev_size_error;
-                       goto out;
-               }
-               drbd_md_mark_dirty(mdev);
+               drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
+                              "size changed", BM_LOCKED_MASK);
+               drbd_initialize_al(mdev, buffer);
+
+               md->flags = prev_flags;
+               drbd_md_write(mdev, buffer);
+
+               if (rs)
+                       dev_info(DEV, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
+                                md->al_stripes, md->al_stripe_size_4k * 4);
        }
 
        if (size > la_size_sect)
-               rv = grew;
+               rv = DS_GREW;
        if (size < la_size_sect)
-               rv = shrunk;
-out:
+               rv = DS_SHRUNK;
+
+       if (0) {
+       err_out:
+               if (rs) {
+                       md->al_stripes = prev_al_stripes;
+                       md->al_stripe_size_4k = prev_al_stripe_size_4k;
+                       md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
+
+                       drbd_md_set_sector_offsets(mdev, mdev->ldev);
+               }
+       }
        lc_unlock(mdev->act_log);
        wake_up(&mdev->al_wait);
+       drbd_md_put_buffer(mdev);
        drbd_resume_io(mdev);
 
        return rv;
@@ -1607,11 +1672,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
            !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
                set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
-       dd = drbd_determine_dev_size(mdev, 0);
-       if (dd == dev_size_error) {
+       dd = drbd_determine_dev_size(mdev, 0, NULL);
+       if (dd <= DS_ERROR) {
                retcode = ERR_NOMEM_BITMAP;
                goto force_diskless_dec;
-       } else if (dd == grew)
+       } else if (dd == DS_GREW)
                set_bit(RESYNC_AFTER_NEG, &mdev->flags);
 
        if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) ||
@@ -2305,6 +2370,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
        struct drbd_conf *mdev;
        enum drbd_ret_code retcode;
        enum determine_dev_size dd;
+       bool change_al_layout = false;
        enum dds_flags ddsf;
        sector_t u_size;
        int err;
@@ -2315,31 +2381,33 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
        if (retcode != NO_ERROR)
                goto fail;
 
+       mdev = adm_ctx.mdev;
+       if (!get_ldev(mdev)) {
+               retcode = ERR_NO_DISK;
+               goto fail;
+       }
+
        memset(&rs, 0, sizeof(struct resize_parms));
+       rs.al_stripes = mdev->ldev->md.al_stripes;
+       rs.al_stripe_size = mdev->ldev->md.al_stripe_size_4k * 4;
        if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
                err = resize_parms_from_attrs(&rs, info);
                if (err) {
                        retcode = ERR_MANDATORY_TAG;
                        drbd_msg_put_info(from_attrs_err_to_txt(err));
-                       goto fail;
+                       goto fail_ldev;
                }
        }
 
-       mdev = adm_ctx.mdev;
        if (mdev->state.conn > C_CONNECTED) {
                retcode = ERR_RESIZE_RESYNC;
-               goto fail;
+               goto fail_ldev;
        }
 
        if (mdev->state.role == R_SECONDARY &&
            mdev->state.peer == R_SECONDARY) {
                retcode = ERR_NO_PRIMARY;
-               goto fail;
-       }
-
-       if (!get_ldev(mdev)) {
-               retcode = ERR_NO_DISK;
-               goto fail;
+               goto fail_ldev;
        }
 
        if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
@@ -2358,6 +2426,28 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
                }
        }
 
+       if (mdev->ldev->md.al_stripes != rs.al_stripes ||
+           mdev->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
+               u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
+
+               if (al_size_k > (16 * 1024 * 1024)) {
+                       retcode = ERR_MD_LAYOUT_TOO_BIG;
+                       goto fail_ldev;
+               }
+
+               if (al_size_k < MD_32kB_SECT/2) {
+                       retcode = ERR_MD_LAYOUT_TOO_SMALL;
+                       goto fail_ldev;
+               }
+
+               if (mdev->state.conn != C_CONNECTED) {
+                       retcode = ERR_MD_LAYOUT_CONNECTED;
+                       goto fail_ldev;
+               }
+
+               change_al_layout = true;
+       }
+
        if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
                mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
 
@@ -2373,16 +2463,22 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
        }
 
        ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
-       dd = drbd_determine_dev_size(mdev, ddsf);
+       dd = drbd_determine_dev_size(mdev, ddsf, change_al_layout ? &rs : NULL);
        drbd_md_sync(mdev);
        put_ldev(mdev);
-       if (dd == dev_size_error) {
+       if (dd == DS_ERROR) {
                retcode = ERR_NOMEM_BITMAP;
                goto fail;
+       } else if (dd == DS_ERROR_SPACE_MD) {
+               retcode = ERR_MD_LAYOUT_NO_FIT;
+               goto fail;
+       } else if (dd == DS_ERROR_SHRINK) {
+               retcode = ERR_IMPLICIT_SHRINK;
+               goto fail;
        }
 
        if (mdev->state.conn == C_CONNECTED) {
-               if (dd == grew)
+               if (dd == DS_GREW)
                        set_bit(RESIZE_PENDING, &mdev->flags);
 
                drbd_send_uuids(mdev);
@@ -2658,7 +2754,6 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
                const struct sib_info *sib)
 {
        struct state_info *si = NULL; /* for sizeof(si->member); */
-       struct net_conf *nc;
        struct nlattr *nla;
        int got_ldev;
        int err = 0;
@@ -2688,13 +2783,19 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
                goto nla_put_failure;
 
        rcu_read_lock();
-       if (got_ldev)
-               if (disk_conf_to_skb(skb, rcu_dereference(mdev->ldev->disk_conf), exclude_sensitive))
-                       goto nla_put_failure;
+       if (got_ldev) {
+               struct disk_conf *disk_conf;
 
-       nc = rcu_dereference(mdev->tconn->net_conf);
-       if (nc)
-               err = net_conf_to_skb(skb, nc, exclude_sensitive);
+               disk_conf = rcu_dereference(mdev->ldev->disk_conf);
+               err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
+       }
+       if (!err) {
+               struct net_conf *nc;
+
+               nc = rcu_dereference(mdev->tconn->net_conf);
+               if (nc)
+                       err = net_conf_to_skb(skb, nc, exclude_sensitive);
+       }
        rcu_read_unlock();
        if (err)
                goto nla_put_failure;
index 4222affff488bc15c116187e2eb382af5f2d0508..cc29cd3bf78b6a758f85255f56f6cdeca1b2cc4b 100644 (file)
@@ -1039,6 +1039,8 @@ randomize:
        rcu_read_lock();
        idr_for_each_entry(&tconn->volumes, mdev, vnr) {
                kref_get(&mdev->kref);
+               rcu_read_unlock();
+
                /* Prevent a race between resync-handshake and
                 * being promoted to Primary.
                 *
@@ -1049,8 +1051,6 @@ randomize:
                mutex_lock(mdev->state_mutex);
                mutex_unlock(mdev->state_mutex);
 
-               rcu_read_unlock();
-
                if (discard_my_data)
                        set_bit(DISCARD_MY_DATA, &mdev->flags);
                else
@@ -3545,7 +3545,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
 {
        struct drbd_conf *mdev;
        struct p_sizes *p = pi->data;
-       enum determine_dev_size dd = unchanged;
+       enum determine_dev_size dd = DS_UNCHANGED;
        sector_t p_size, p_usize, my_usize;
        int ldsc = 0; /* local disk size changed */
        enum dds_flags ddsf;
@@ -3617,9 +3617,9 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
 
        ddsf = be16_to_cpu(p->dds_flags);
        if (get_ldev(mdev)) {
-               dd = drbd_determine_dev_size(mdev, ddsf);
+               dd = drbd_determine_dev_size(mdev, ddsf, NULL);
                put_ldev(mdev);
-               if (dd == dev_size_error)
+               if (dd == DS_ERROR)
                        return -EIO;
                drbd_md_sync(mdev);
        } else {
@@ -3647,7 +3647,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
                        drbd_send_sizes(mdev, 0, ddsf);
                }
                if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
-                   (dd == grew && mdev->state.conn == C_CONNECTED)) {
+                   (dd == DS_GREW && mdev->state.conn == C_CONNECTED)) {
                        if (mdev->state.pdsk >= D_INCONSISTENT &&
                            mdev->state.disk >= D_INCONSISTENT) {
                                if (ddsf & DDSF_NO_RESYNC)
index 90c5be2b1d309bf041927891f2f8be24fc491bc1..216d47b7e88b72f59e26ede29a758116adc2f687 100644 (file)
@@ -1115,8 +1115,10 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
                drbd_thread_restart_nowait(&mdev->tconn->receiver);
 
        /* Resume AL writing if we get a connection */
-       if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+       if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
                drbd_resume_al(mdev);
+               mdev->tconn->connect_cnt++;
+       }
 
        /* remember last attach time so request_timer_fn() won't
         * kill newly established sessions while we are still trying to thaw
index 5af21f2db29cd1246397e3b4eed8627ba314adf2..6e85e21445eb13896d19f16a58a0107b4f9baa4e 100644 (file)
@@ -31,6 +31,8 @@
 #include <linux/slab.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 
 #include <linux/genhd.h>
 #include <linux/idr.h>
@@ -39,8 +41,9 @@
 #include "rsxx_cfg.h"
 
 #define NO_LEGACY 0
+#define SYNC_START_TIMEOUT (10 * 60) /* 10 minutes */
 
-MODULE_DESCRIPTION("IBM FlashSystem 70/80 PCIe SSD Device Driver");
+MODULE_DESCRIPTION("IBM Flash Adapter 900GB Full Height Device Driver");
 MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRIVER_VERSION);
@@ -49,9 +52,282 @@ static unsigned int force_legacy = NO_LEGACY;
 module_param(force_legacy, uint, 0444);
 MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts");
 
+static unsigned int sync_start = 1;
+module_param(sync_start, uint, 0444);
+MODULE_PARM_DESC(sync_start, "On by Default: Driver load will not complete "
+                            "until the card startup has completed.");
+
 static DEFINE_IDA(rsxx_disk_ida);
 static DEFINE_SPINLOCK(rsxx_ida_lock);
 
+/* --------------------Debugfs Setup ------------------- */
+
+struct rsxx_cram {
+       u32 f_pos;
+       u32 offset;
+       void *i_private;
+};
+
+static int rsxx_attr_pci_regs_show(struct seq_file *m, void *p)
+{
+       struct rsxx_cardinfo *card = m->private;
+
+       seq_printf(m, "HWID             0x%08x\n",
+                                       ioread32(card->regmap + HWID));
+       seq_printf(m, "SCRATCH          0x%08x\n",
+                                       ioread32(card->regmap + SCRATCH));
+       seq_printf(m, "IER              0x%08x\n",
+                                       ioread32(card->regmap + IER));
+       seq_printf(m, "IPR              0x%08x\n",
+                                       ioread32(card->regmap + IPR));
+       seq_printf(m, "CREG_CMD         0x%08x\n",
+                                       ioread32(card->regmap + CREG_CMD));
+       seq_printf(m, "CREG_ADD         0x%08x\n",
+                                       ioread32(card->regmap + CREG_ADD));
+       seq_printf(m, "CREG_CNT         0x%08x\n",
+                                       ioread32(card->regmap + CREG_CNT));
+       seq_printf(m, "CREG_STAT        0x%08x\n",
+                                       ioread32(card->regmap + CREG_STAT));
+       seq_printf(m, "CREG_DATA0       0x%08x\n",
+                                       ioread32(card->regmap + CREG_DATA0));
+       seq_printf(m, "CREG_DATA1       0x%08x\n",
+                                       ioread32(card->regmap + CREG_DATA1));
+       seq_printf(m, "CREG_DATA2       0x%08x\n",
+                                       ioread32(card->regmap + CREG_DATA2));
+       seq_printf(m, "CREG_DATA3       0x%08x\n",
+                                       ioread32(card->regmap + CREG_DATA3));
+       seq_printf(m, "CREG_DATA4       0x%08x\n",
+                                       ioread32(card->regmap + CREG_DATA4));
+       seq_printf(m, "CREG_DATA5       0x%08x\n",
+                                       ioread32(card->regmap + CREG_DATA5));
+       seq_printf(m, "CREG_DATA6       0x%08x\n",
+                                       ioread32(card->regmap + CREG_DATA6));
+       seq_printf(m, "CREG_DATA7       0x%08x\n",
+                                       ioread32(card->regmap + CREG_DATA7));
+       seq_printf(m, "INTR_COAL        0x%08x\n",
+                                       ioread32(card->regmap + INTR_COAL));
+       seq_printf(m, "HW_ERROR         0x%08x\n",
+                                       ioread32(card->regmap + HW_ERROR));
+       seq_printf(m, "DEBUG0           0x%08x\n",
+                                       ioread32(card->regmap + PCI_DEBUG0));
+       seq_printf(m, "DEBUG1           0x%08x\n",
+                                       ioread32(card->regmap + PCI_DEBUG1));
+       seq_printf(m, "DEBUG2           0x%08x\n",
+                                       ioread32(card->regmap + PCI_DEBUG2));
+       seq_printf(m, "DEBUG3           0x%08x\n",
+                                       ioread32(card->regmap + PCI_DEBUG3));
+       seq_printf(m, "DEBUG4           0x%08x\n",
+                                       ioread32(card->regmap + PCI_DEBUG4));
+       seq_printf(m, "DEBUG5           0x%08x\n",
+                                       ioread32(card->regmap + PCI_DEBUG5));
+       seq_printf(m, "DEBUG6           0x%08x\n",
+                                       ioread32(card->regmap + PCI_DEBUG6));
+       seq_printf(m, "DEBUG7           0x%08x\n",
+                                       ioread32(card->regmap + PCI_DEBUG7));
+       seq_printf(m, "RECONFIG         0x%08x\n",
+                                       ioread32(card->regmap + PCI_RECONFIG));
+
+       return 0;
+}
+
+static int rsxx_attr_stats_show(struct seq_file *m, void *p)
+{
+       struct rsxx_cardinfo *card = m->private;
+       int i;
+
+       for (i = 0; i < card->n_targets; i++) {
+               seq_printf(m, "Ctrl %d CRC Errors       = %d\n",
+                               i, card->ctrl[i].stats.crc_errors);
+               seq_printf(m, "Ctrl %d Hard Errors      = %d\n",
+                               i, card->ctrl[i].stats.hard_errors);
+               seq_printf(m, "Ctrl %d Soft Errors      = %d\n",
+                               i, card->ctrl[i].stats.soft_errors);
+               seq_printf(m, "Ctrl %d Writes Issued    = %d\n",
+                               i, card->ctrl[i].stats.writes_issued);
+               seq_printf(m, "Ctrl %d Writes Failed    = %d\n",
+                               i, card->ctrl[i].stats.writes_failed);
+               seq_printf(m, "Ctrl %d Reads Issued     = %d\n",
+                               i, card->ctrl[i].stats.reads_issued);
+               seq_printf(m, "Ctrl %d Reads Failed     = %d\n",
+                               i, card->ctrl[i].stats.reads_failed);
+               seq_printf(m, "Ctrl %d Reads Retried    = %d\n",
+                               i, card->ctrl[i].stats.reads_retried);
+               seq_printf(m, "Ctrl %d Discards Issued  = %d\n",
+                               i, card->ctrl[i].stats.discards_issued);
+               seq_printf(m, "Ctrl %d Discards Failed  = %d\n",
+                               i, card->ctrl[i].stats.discards_failed);
+               seq_printf(m, "Ctrl %d DMA SW Errors    = %d\n",
+                               i, card->ctrl[i].stats.dma_sw_err);
+               seq_printf(m, "Ctrl %d DMA HW Faults    = %d\n",
+                               i, card->ctrl[i].stats.dma_hw_fault);
+               seq_printf(m, "Ctrl %d DMAs Cancelled   = %d\n",
+                               i, card->ctrl[i].stats.dma_cancelled);
+               seq_printf(m, "Ctrl %d SW Queue Depth   = %d\n",
+                               i, card->ctrl[i].stats.sw_q_depth);
+               seq_printf(m, "Ctrl %d HW Queue Depth   = %d\n",
+                       i, atomic_read(&card->ctrl[i].stats.hw_q_depth));
+       }
+
+       return 0;
+}
+
+static int rsxx_attr_stats_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, rsxx_attr_stats_show, inode->i_private);
+}
+
+static int rsxx_attr_pci_regs_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, rsxx_attr_pci_regs_show, inode->i_private);
+}
+
+static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf,
+                             size_t cnt, loff_t *ppos)
+{
+       struct rsxx_cram *info = fp->private_data;
+       struct rsxx_cardinfo *card = info->i_private;
+       char *buf;
+       int st;
+
+       buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       info->f_pos = (u32)*ppos + info->offset;
+
+       st = rsxx_creg_read(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1);
+       if (st)
+               return st;
+
+       st = copy_to_user(ubuf, buf, cnt);
+       if (st)
+               return st;
+
+       info->offset += cnt;
+
+       kfree(buf);
+
+       return cnt;
+}
+
+static ssize_t rsxx_cram_write(struct file *fp, const char __user *ubuf,
+                              size_t cnt, loff_t *ppos)
+{
+       struct rsxx_cram *info = fp->private_data;
+       struct rsxx_cardinfo *card = info->i_private;
+       char *buf;
+       int st;
+
+       buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       st = copy_from_user(buf, ubuf, cnt);
+       if (st)
+               return st;
+
+       info->f_pos = (u32)*ppos + info->offset;
+
+       st = rsxx_creg_write(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1);
+       if (st)
+               return st;
+
+       info->offset += cnt;
+
+       kfree(buf);
+
+       return cnt;
+}
+
+static int rsxx_cram_open(struct inode *inode, struct file *file)
+{
+       struct rsxx_cram *info = kzalloc(sizeof(*info), GFP_KERNEL);
+       if (!info)
+               return -ENOMEM;
+
+       info->i_private = inode->i_private;
+       info->f_pos = file->f_pos;
+       file->private_data = info;
+
+       return 0;
+}
+
+static int rsxx_cram_release(struct inode *inode, struct file *file)
+{
+       struct rsxx_cram *info = file->private_data;
+
+       if (!info)
+               return 0;
+
+       kfree(info);
+       file->private_data = NULL;
+
+       return 0;
+}
+
+static const struct file_operations debugfs_cram_fops = {
+       .owner          = THIS_MODULE,
+       .open           = rsxx_cram_open,
+       .read           = rsxx_cram_read,
+       .write          = rsxx_cram_write,
+       .release        = rsxx_cram_release,
+};
+
+static const struct file_operations debugfs_stats_fops = {
+       .owner          = THIS_MODULE,
+       .open           = rsxx_attr_stats_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static const struct file_operations debugfs_pci_regs_fops = {
+       .owner          = THIS_MODULE,
+       .open           = rsxx_attr_pci_regs_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static void rsxx_debugfs_dev_new(struct rsxx_cardinfo *card)
+{
+       struct dentry *debugfs_stats;
+       struct dentry *debugfs_pci_regs;
+       struct dentry *debugfs_cram;
+
+       card->debugfs_dir = debugfs_create_dir(card->gendisk->disk_name, NULL);
+       if (IS_ERR_OR_NULL(card->debugfs_dir))
+               goto failed_debugfs_dir;
+
+       debugfs_stats = debugfs_create_file("stats", S_IRUGO,
+                                           card->debugfs_dir, card,
+                                           &debugfs_stats_fops);
+       if (IS_ERR_OR_NULL(debugfs_stats))
+               goto failed_debugfs_stats;
+
+       debugfs_pci_regs = debugfs_create_file("pci_regs", S_IRUGO,
+                                              card->debugfs_dir, card,
+                                              &debugfs_pci_regs_fops);
+       if (IS_ERR_OR_NULL(debugfs_pci_regs))
+               goto failed_debugfs_pci_regs;
+
+       debugfs_cram = debugfs_create_file("cram", S_IRUGO | S_IWUSR,
+                                          card->debugfs_dir, card,
+                                          &debugfs_cram_fops);
+       if (IS_ERR_OR_NULL(debugfs_cram))
+               goto failed_debugfs_cram;
+
+       return;
+failed_debugfs_cram:
+       debugfs_remove(debugfs_pci_regs);
+failed_debugfs_pci_regs:
+       debugfs_remove(debugfs_stats);
+failed_debugfs_stats:
+       debugfs_remove(card->debugfs_dir);
+failed_debugfs_dir:
+       card->debugfs_dir = NULL;
+}
+
 /*----------------- Interrupt Control & Handling -------------------*/
 
 static void rsxx_mask_interrupts(struct rsxx_cardinfo *card)
@@ -163,12 +439,13 @@ static irqreturn_t rsxx_isr(int irq, void *pdata)
                }
 
                if (isr & CR_INTR_CREG) {
-                       schedule_work(&card->creg_ctrl.done_work);
+                       queue_work(card->creg_ctrl.creg_wq,
+                                  &card->creg_ctrl.done_work);
                        handled++;
                }
 
                if (isr & CR_INTR_EVENT) {
-                       schedule_work(&card->event_work);
+                       queue_work(card->event_wq, &card->event_work);
                        rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
                        handled++;
                }
@@ -329,7 +606,7 @@ static int rsxx_eeh_frozen(struct pci_dev *dev)
        int i;
        int st;
 
-       dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n");
+       dev_warn(&dev->dev, "IBM Flash Adapter PCI: preparing for slot reset.\n");
 
        card->eeh_state = 1;
        rsxx_mask_interrupts(card);
@@ -367,15 +644,26 @@ static void rsxx_eeh_failure(struct pci_dev *dev)
 {
        struct rsxx_cardinfo *card = pci_get_drvdata(dev);
        int i;
+       int cnt = 0;
 
-       dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n");
+       dev_err(&dev->dev, "IBM Flash Adapter PCI: disabling failed card.\n");
 
        card->eeh_state = 1;
+       card->halt = 1;
 
-       for (i = 0; i < card->n_targets; i++)
-               del_timer_sync(&card->ctrl[i].activity_timer);
+       for (i = 0; i < card->n_targets; i++) {
+               spin_lock_bh(&card->ctrl[i].queue_lock);
+               cnt = rsxx_cleanup_dma_queue(&card->ctrl[i],
+                                            &card->ctrl[i].queue);
+               spin_unlock_bh(&card->ctrl[i].queue_lock);
+
+               cnt += rsxx_dma_cancel(&card->ctrl[i]);
 
-       rsxx_eeh_cancel_dmas(card);
+               if (cnt)
+                       dev_info(CARD_TO_DEV(card),
+                               "Freed %d queued DMAs on channel %d\n",
+                               cnt, card->ctrl[i].id);
+       }
 }
 
 static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card)
@@ -432,7 +720,7 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
        int st;
 
        dev_warn(&dev->dev,
-               "IBM FlashSystem PCI: recovering from slot reset.\n");
+               "IBM Flash Adapter PCI: recovering from slot reset.\n");
 
        st = pci_enable_device(dev);
        if (st)
@@ -485,7 +773,7 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
                                &card->ctrl[i].issue_dma_work);
        }
 
-       dev_info(&dev->dev, "IBM FlashSystem PCI: recovery complete.\n");
+       dev_info(&dev->dev, "IBM Flash Adapter PCI: recovery complete.\n");
 
        return PCI_ERS_RESULT_RECOVERED;
 
@@ -528,6 +816,7 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 {
        struct rsxx_cardinfo *card;
        int st;
+       unsigned int sync_timeout;
 
        dev_info(&dev->dev, "PCI-Flash SSD discovered\n");
 
@@ -610,7 +899,11 @@ static int rsxx_pci_probe(struct pci_dev *dev,
        }
 
        /************* Setup Processor Command Interface *************/
-       rsxx_creg_setup(card);
+       st = rsxx_creg_setup(card);
+       if (st) {
+               dev_err(CARD_TO_DEV(card), "Failed to setup creg interface.\n");
+               goto failed_creg_setup;
+       }
 
        spin_lock_irq(&card->irq_lock);
        rsxx_enable_ier_and_isr(card, CR_INTR_CREG);
@@ -650,6 +943,12 @@ static int rsxx_pci_probe(struct pci_dev *dev,
        }
 
        /************* Setup Card Event Handler *************/
+       card->event_wq = create_singlethread_workqueue(DRIVER_NAME"_event");
+       if (!card->event_wq) {
+               dev_err(CARD_TO_DEV(card), "Failed card event setup.\n");
+               goto failed_event_handler;
+       }
+
        INIT_WORK(&card->event_work, card_event_handler);
 
        st = rsxx_setup_dev(card);
@@ -676,6 +975,33 @@ static int rsxx_pci_probe(struct pci_dev *dev,
                if (st)
                        dev_crit(CARD_TO_DEV(card),
                                "Failed issuing card startup\n");
+               if (sync_start) {
+                       sync_timeout = SYNC_START_TIMEOUT;
+
+                       dev_info(CARD_TO_DEV(card),
+                                "Waiting for card to startup\n");
+
+                       do {
+                               ssleep(1);
+                               sync_timeout--;
+
+                               rsxx_get_card_state(card, &card->state);
+                       } while (sync_timeout &&
+                               (card->state == CARD_STATE_STARTING));
+
+                       if (card->state == CARD_STATE_STARTING) {
+                               dev_warn(CARD_TO_DEV(card),
+                                        "Card startup timed out\n");
+                               card->size8 = 0;
+                       } else {
+                               dev_info(CARD_TO_DEV(card),
+                                       "card state: %s\n",
+                                       rsxx_card_state_to_str(card->state));
+                               st = rsxx_get_card_size8(card, &card->size8);
+                               if (st)
+                                       card->size8 = 0;
+                       }
+               }
        } else if (card->state == CARD_STATE_GOOD ||
                   card->state == CARD_STATE_RD_ONLY_FAULT) {
                st = rsxx_get_card_size8(card, &card->size8);
@@ -685,12 +1011,21 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 
        rsxx_attach_dev(card);
 
+       /************* Setup Debugfs *************/
+       rsxx_debugfs_dev_new(card);
+
        return 0;
 
 failed_create_dev:
+       destroy_workqueue(card->event_wq);
+       card->event_wq = NULL;
+failed_event_handler:
        rsxx_dma_destroy(card);
 failed_dma_setup:
 failed_compatiblity_check:
+       destroy_workqueue(card->creg_ctrl.creg_wq);
+       card->creg_ctrl.creg_wq = NULL;
+failed_creg_setup:
        spin_lock_irq(&card->irq_lock);
        rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
        spin_unlock_irq(&card->irq_lock);
@@ -756,6 +1091,8 @@ static void rsxx_pci_remove(struct pci_dev *dev)
        /* Prevent work_structs from re-queuing themselves. */
        card->halt = 1;
 
+       debugfs_remove_recursive(card->debugfs_dir);
+
        free_irq(dev->irq, card);
 
        if (!force_legacy)
index 4b5c020a0a65ad8831d75bf3407dd81b2d82b306..926dce9c452faf927d79fb0b99fd1a4e473aea6e 100644 (file)
@@ -431,6 +431,15 @@ static int __issue_creg_rw(struct rsxx_cardinfo *card,
        *hw_stat = completion.creg_status;
 
        if (completion.st) {
+               /*
+               * This read is needed to verify that there has not been any
+               * extreme errors that might have occurred, i.e. EEH. The
+               * function iowrite32 will not detect EEH errors, so it is
+               * necessary that we recover if such an error is the reason
+               * for the timeout. This is a dummy read.
+               */
+               ioread32(card->regmap + SCRATCH);
+
                dev_warn(CARD_TO_DEV(card),
                        "creg command failed(%d x%08x)\n",
                        completion.st, addr);
@@ -727,6 +736,11 @@ int rsxx_creg_setup(struct rsxx_cardinfo *card)
 {
        card->creg_ctrl.active_cmd = NULL;
 
+       card->creg_ctrl.creg_wq =
+                       create_singlethread_workqueue(DRIVER_NAME"_creg");
+       if (!card->creg_ctrl.creg_wq)
+               return -ENOMEM;
+
        INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done);
        mutex_init(&card->creg_ctrl.reset_lock);
        INIT_LIST_HEAD(&card->creg_ctrl.queue);
index 4346d17d29496b65316db8fc2f1133b9e0d116cd..d7af441880befc1c6aeb7cdd4dbfc128a8014bb8 100644 (file)
@@ -155,7 +155,8 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card,
                atomic_set(&meta->error, 1);
 
        if (atomic_dec_and_test(&meta->pending_dmas)) {
-               disk_stats_complete(card, meta->bio, meta->start_time);
+               if (!card->eeh_state && card->gendisk)
+                       disk_stats_complete(card, meta->bio, meta->start_time);
 
                bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0);
                kmem_cache_free(bio_meta_pool, meta);
@@ -170,6 +171,12 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
 
        might_sleep();
 
+       if (!card)
+               goto req_err;
+
+       if (bio->bi_sector + (bio->bi_size >> 9) > get_capacity(card->gendisk))
+               goto req_err;
+
        if (unlikely(card->halt)) {
                st = -EFAULT;
                goto req_err;
@@ -196,7 +203,8 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
        atomic_set(&bio_meta->pending_dmas, 0);
        bio_meta->start_time = jiffies;
 
-       disk_stats_start(card, bio);
+       if (!unlikely(card->halt))
+               disk_stats_start(card, bio);
 
        dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
                 bio_data_dir(bio) ? 'W' : 'R', bio_meta,
@@ -225,24 +233,6 @@ static bool rsxx_discard_supported(struct rsxx_cardinfo *card)
        return (pci_rev >= RSXX_DISCARD_SUPPORT);
 }
 
-static unsigned short rsxx_get_logical_block_size(
-                                       struct rsxx_cardinfo *card)
-{
-       u32 capabilities = 0;
-       int st;
-
-       st = rsxx_get_card_capabilities(card, &capabilities);
-       if (st)
-               dev_warn(CARD_TO_DEV(card),
-                       "Failed reading card capabilities register\n");
-
-       /* Earlier firmware did not have support for 512 byte accesses */
-       if (capabilities & CARD_CAP_SUBPAGE_WRITES)
-               return 512;
-       else
-               return RSXX_HW_BLK_SIZE;
-}
-
 int rsxx_attach_dev(struct rsxx_cardinfo *card)
 {
        mutex_lock(&card->dev_lock);
@@ -305,7 +295,7 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
                return -ENOMEM;
        }
 
-       blk_size = rsxx_get_logical_block_size(card);
+       blk_size = card->config.data.block_size;
 
        blk_queue_make_request(card->queue, rsxx_make_request);
        blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
@@ -347,6 +337,7 @@ void rsxx_destroy_dev(struct rsxx_cardinfo *card)
        card->gendisk = NULL;
 
        blk_cleanup_queue(card->queue);
+       card->queue->queuedata = NULL;
        unregister_blkdev(card->major, DRIVER_NAME);
 }
 
index 0607513cfb41fa4b2ff88bac32be0665c645a2bb..bed32f16b084bdd46f7b881b32f95775effaa499 100644 (file)
@@ -245,6 +245,22 @@ static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl,
        kmem_cache_free(rsxx_dma_pool, dma);
 }
 
+int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
+                          struct list_head *q)
+{
+       struct rsxx_dma *dma;
+       struct rsxx_dma *tmp;
+       int cnt = 0;
+
+       list_for_each_entry_safe(dma, tmp, q, list) {
+               list_del(&dma->list);
+               rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
+               cnt++;
+       }
+
+       return cnt;
+}
+
 static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl,
                                 struct rsxx_dma *dma)
 {
@@ -252,9 +268,10 @@ static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl,
         * Requeued DMAs go to the front of the queue so they are issued
         * first.
         */
-       spin_lock(&ctrl->queue_lock);
+       spin_lock_bh(&ctrl->queue_lock);
+       ctrl->stats.sw_q_depth++;
        list_add(&dma->list, &ctrl->queue);
-       spin_unlock(&ctrl->queue_lock);
+       spin_unlock_bh(&ctrl->queue_lock);
 }
 
 static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
@@ -329,6 +346,7 @@ static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
 static void dma_engine_stalled(unsigned long data)
 {
        struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data;
+       int cnt;
 
        if (atomic_read(&ctrl->stats.hw_q_depth) == 0 ||
            unlikely(ctrl->card->eeh_state))
@@ -349,18 +367,28 @@ static void dma_engine_stalled(unsigned long data)
                        "DMA channel %d has stalled, faulting interface.\n",
                        ctrl->id);
                ctrl->card->dma_fault = 1;
+
+               /* Clean up the DMA queue */
+               spin_lock(&ctrl->queue_lock);
+               cnt = rsxx_cleanup_dma_queue(ctrl, &ctrl->queue);
+               spin_unlock(&ctrl->queue_lock);
+
+               cnt += rsxx_dma_cancel(ctrl);
+
+               if (cnt)
+                       dev_info(CARD_TO_DEV(ctrl->card),
+                               "Freed %d queued DMAs on channel %d\n",
+                               cnt, ctrl->id);
        }
 }
 
-static void rsxx_issue_dmas(struct work_struct *work)
+static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl)
 {
-       struct rsxx_dma_ctrl *ctrl;
        struct rsxx_dma *dma;
        int tag;
        int cmds_pending = 0;
        struct hw_cmd *hw_cmd_buf;
 
-       ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
        hw_cmd_buf = ctrl->cmd.buf;
 
        if (unlikely(ctrl->card->halt) ||
@@ -368,22 +396,22 @@ static void rsxx_issue_dmas(struct work_struct *work)
                return;
 
        while (1) {
-               spin_lock(&ctrl->queue_lock);
+               spin_lock_bh(&ctrl->queue_lock);
                if (list_empty(&ctrl->queue)) {
-                       spin_unlock(&ctrl->queue_lock);
+                       spin_unlock_bh(&ctrl->queue_lock);
                        break;
                }
-               spin_unlock(&ctrl->queue_lock);
+               spin_unlock_bh(&ctrl->queue_lock);
 
                tag = pop_tracker(ctrl->trackers);
                if (tag == -1)
                        break;
 
-               spin_lock(&ctrl->queue_lock);
+               spin_lock_bh(&ctrl->queue_lock);
                dma = list_entry(ctrl->queue.next, struct rsxx_dma, list);
                list_del(&dma->list);
                ctrl->stats.sw_q_depth--;
-               spin_unlock(&ctrl->queue_lock);
+               spin_unlock_bh(&ctrl->queue_lock);
 
                /*
                 * This will catch any DMAs that slipped in right before the
@@ -440,9 +468,8 @@ static void rsxx_issue_dmas(struct work_struct *work)
        }
 }
 
-static void rsxx_dma_done(struct work_struct *work)
+static void rsxx_dma_done(struct rsxx_dma_ctrl *ctrl)
 {
-       struct rsxx_dma_ctrl *ctrl;
        struct rsxx_dma *dma;
        unsigned long flags;
        u16 count;
@@ -450,7 +477,6 @@ static void rsxx_dma_done(struct work_struct *work)
        u8 tag;
        struct hw_status *hw_st_buf;
 
-       ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work);
        hw_st_buf = ctrl->status.buf;
 
        if (unlikely(ctrl->card->halt) ||
@@ -520,33 +546,32 @@ static void rsxx_dma_done(struct work_struct *work)
        rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id));
        spin_unlock_irqrestore(&ctrl->card->irq_lock, flags);
 
-       spin_lock(&ctrl->queue_lock);
+       spin_lock_bh(&ctrl->queue_lock);
        if (ctrl->stats.sw_q_depth)
                queue_work(ctrl->issue_wq, &ctrl->issue_dma_work);
-       spin_unlock(&ctrl->queue_lock);
+       spin_unlock_bh(&ctrl->queue_lock);
 }
 
-static int rsxx_cleanup_dma_queue(struct rsxx_cardinfo *card,
-                                     struct list_head *q)
+static void rsxx_schedule_issue(struct work_struct *work)
 {
-       struct rsxx_dma *dma;
-       struct rsxx_dma *tmp;
-       int cnt = 0;
+       struct rsxx_dma_ctrl *ctrl;
 
-       list_for_each_entry_safe(dma, tmp, q, list) {
-               list_del(&dma->list);
+       ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
 
-               if (dma->dma_addr)
-                       pci_unmap_page(card->dev, dma->dma_addr,
-                                      get_dma_size(dma),
-                                      (dma->cmd == HW_CMD_BLK_WRITE) ?
-                                      PCI_DMA_TODEVICE :
-                                      PCI_DMA_FROMDEVICE);
-               kmem_cache_free(rsxx_dma_pool, dma);
-               cnt++;
-       }
+       mutex_lock(&ctrl->work_lock);
+       rsxx_issue_dmas(ctrl);
+       mutex_unlock(&ctrl->work_lock);
+}
 
-       return cnt;
+static void rsxx_schedule_done(struct work_struct *work)
+{
+       struct rsxx_dma_ctrl *ctrl;
+
+       ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work);
+
+       mutex_lock(&ctrl->work_lock);
+       rsxx_dma_done(ctrl);
+       mutex_unlock(&ctrl->work_lock);
 }
 
 static int rsxx_queue_discard(struct rsxx_cardinfo *card,
@@ -698,10 +723,10 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 
        for (i = 0; i < card->n_targets; i++) {
                if (!list_empty(&dma_list[i])) {
-                       spin_lock(&card->ctrl[i].queue_lock);
+                       spin_lock_bh(&card->ctrl[i].queue_lock);
                        card->ctrl[i].stats.sw_q_depth += dma_cnt[i];
                        list_splice_tail(&dma_list[i], &card->ctrl[i].queue);
-                       spin_unlock(&card->ctrl[i].queue_lock);
+                       spin_unlock_bh(&card->ctrl[i].queue_lock);
 
                        queue_work(card->ctrl[i].issue_wq,
                                   &card->ctrl[i].issue_dma_work);
@@ -711,8 +736,11 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
        return 0;
 
 bvec_err:
-       for (i = 0; i < card->n_targets; i++)
-               rsxx_cleanup_dma_queue(card, &dma_list[i]);
+       for (i = 0; i < card->n_targets; i++) {
+               spin_lock_bh(&card->ctrl[i].queue_lock);
+               rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i]);
+               spin_unlock_bh(&card->ctrl[i].queue_lock);
+       }
 
        return st;
 }
@@ -780,6 +808,7 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev,
        spin_lock_init(&ctrl->trackers->lock);
 
        spin_lock_init(&ctrl->queue_lock);
+       mutex_init(&ctrl->work_lock);
        INIT_LIST_HEAD(&ctrl->queue);
 
        setup_timer(&ctrl->activity_timer, dma_engine_stalled,
@@ -793,8 +822,8 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev,
        if (!ctrl->done_wq)
                return -ENOMEM;
 
-       INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas);
-       INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done);
+       INIT_WORK(&ctrl->issue_dma_work, rsxx_schedule_issue);
+       INIT_WORK(&ctrl->dma_done_work, rsxx_schedule_done);
 
        st = rsxx_hw_buffers_init(dev, ctrl);
        if (st)
@@ -918,13 +947,30 @@ failed_dma_setup:
        return st;
 }
 
+int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl)
+{
+       struct rsxx_dma *dma;
+       int i;
+       int cnt = 0;
+
+       /* Clean up issued DMAs */
+       for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) {
+               dma = get_tracker_dma(ctrl->trackers, i);
+               if (dma) {
+                       atomic_dec(&ctrl->stats.hw_q_depth);
+                       rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
+                       push_tracker(ctrl->trackers, i);
+                       cnt++;
+               }
+       }
+
+       return cnt;
+}
 
 void rsxx_dma_destroy(struct rsxx_cardinfo *card)
 {
        struct rsxx_dma_ctrl *ctrl;
-       struct rsxx_dma *dma;
-       int i, j;
-       int cnt = 0;
+       int i;
 
        for (i = 0; i < card->n_targets; i++) {
                ctrl = &card->ctrl[i];
@@ -943,33 +989,11 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card)
                        del_timer_sync(&ctrl->activity_timer);
 
                /* Clean up the DMA queue */
-               spin_lock(&ctrl->queue_lock);
-               cnt = rsxx_cleanup_dma_queue(card, &ctrl->queue);
-               spin_unlock(&ctrl->queue_lock);
-
-               if (cnt)
-                       dev_info(CARD_TO_DEV(card),
-                               "Freed %d queued DMAs on channel %d\n",
-                               cnt, i);
-
-               /* Clean up issued DMAs */
-               for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) {
-                       dma = get_tracker_dma(ctrl->trackers, j);
-                       if (dma) {
-                               pci_unmap_page(card->dev, dma->dma_addr,
-                                              get_dma_size(dma),
-                                              (dma->cmd == HW_CMD_BLK_WRITE) ?
-                                              PCI_DMA_TODEVICE :
-                                              PCI_DMA_FROMDEVICE);
-                               kmem_cache_free(rsxx_dma_pool, dma);
-                               cnt++;
-                       }
-               }
+               spin_lock_bh(&ctrl->queue_lock);
+               rsxx_cleanup_dma_queue(ctrl, &ctrl->queue);
+               spin_unlock_bh(&ctrl->queue_lock);
 
-               if (cnt)
-                       dev_info(CARD_TO_DEV(card),
-                               "Freed %d pending DMAs on channel %d\n",
-                               cnt, i);
+               rsxx_dma_cancel(ctrl);
 
                vfree(ctrl->trackers);
 
@@ -1013,7 +1037,7 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
                        cnt++;
                }
 
-               spin_lock(&card->ctrl[i].queue_lock);
+               spin_lock_bh(&card->ctrl[i].queue_lock);
                list_splice(&issued_dmas[i], &card->ctrl[i].queue);
 
                atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth);
@@ -1028,7 +1052,7 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
                                               PCI_DMA_TODEVICE :
                                               PCI_DMA_FROMDEVICE);
                }
-               spin_unlock(&card->ctrl[i].queue_lock);
+               spin_unlock_bh(&card->ctrl[i].queue_lock);
        }
 
        kfree(issued_dmas);
@@ -1036,30 +1060,13 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
        return 0;
 }
 
-void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card)
-{
-       struct rsxx_dma *dma;
-       struct rsxx_dma *tmp;
-       int i;
-
-       for (i = 0; i < card->n_targets; i++) {
-               spin_lock(&card->ctrl[i].queue_lock);
-               list_for_each_entry_safe(dma, tmp, &card->ctrl[i].queue, list) {
-                       list_del(&dma->list);
-
-                       rsxx_complete_dma(&card->ctrl[i], dma, DMA_CANCELLED);
-               }
-               spin_unlock(&card->ctrl[i].queue_lock);
-       }
-}
-
 int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
 {
        struct rsxx_dma *dma;
        int i;
 
        for (i = 0; i < card->n_targets; i++) {
-               spin_lock(&card->ctrl[i].queue_lock);
+               spin_lock_bh(&card->ctrl[i].queue_lock);
                list_for_each_entry(dma, &card->ctrl[i].queue, list) {
                        dma->dma_addr = pci_map_page(card->dev, dma->page,
                                        dma->pg_off, get_dma_size(dma),
@@ -1067,12 +1074,12 @@ int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
                                        PCI_DMA_TODEVICE :
                                        PCI_DMA_FROMDEVICE);
                        if (!dma->dma_addr) {
-                               spin_unlock(&card->ctrl[i].queue_lock);
+                               spin_unlock_bh(&card->ctrl[i].queue_lock);
                                kmem_cache_free(rsxx_dma_pool, dma);
                                return -ENOMEM;
                        }
                }
-               spin_unlock(&card->ctrl[i].queue_lock);
+               spin_unlock_bh(&card->ctrl[i].queue_lock);
        }
 
        return 0;
index 382e8bf5c03b73dae35fd1ae8865110cc1a85168..5ad5055a4104d9341fc1f55054d1c872c58467d1 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/vmalloc.h>
 #include <linux/timer.h>
 #include <linux/ioctl.h>
+#include <linux/delay.h>
 
 #include "rsxx.h"
 #include "rsxx_cfg.h"
@@ -114,6 +115,7 @@ struct rsxx_dma_ctrl {
        struct timer_list               activity_timer;
        struct dma_tracker_list         *trackers;
        struct rsxx_dma_stats           stats;
+       struct mutex                    work_lock;
 };
 
 struct rsxx_cardinfo {
@@ -134,6 +136,7 @@ struct rsxx_cardinfo {
                spinlock_t              lock;
                bool                    active;
                struct creg_cmd         *active_cmd;
+               struct workqueue_struct *creg_wq;
                struct work_struct      done_work;
                struct list_head        queue;
                unsigned int            q_depth;
@@ -154,6 +157,7 @@ struct rsxx_cardinfo {
                int buf_len;
        } log;
 
+       struct workqueue_struct *event_wq;
        struct work_struct      event_work;
        unsigned int            state;
        u64                     size8;
@@ -181,6 +185,8 @@ struct rsxx_cardinfo {
 
        int                     n_targets;
        struct rsxx_dma_ctrl    *ctrl;
+
+       struct dentry           *debugfs_dir;
 };
 
 enum rsxx_pci_regmap {
@@ -283,6 +289,7 @@ enum rsxx_creg_addr {
        CREG_ADD_CAPABILITIES           = 0x80001050,
        CREG_ADD_LOG                    = 0x80002000,
        CREG_ADD_NUM_TARGETS            = 0x80003000,
+       CREG_ADD_CRAM                   = 0xA0000000,
        CREG_ADD_CONFIG                 = 0xB0000000,
 };
 
@@ -372,6 +379,8 @@ typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card,
 int rsxx_dma_setup(struct rsxx_cardinfo *card);
 void rsxx_dma_destroy(struct rsxx_cardinfo *card);
 int rsxx_dma_init(void);
+int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, struct list_head *q);
+int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
 void rsxx_dma_cleanup(void);
 void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
 int rsxx_dma_configure(struct rsxx_cardinfo *card);
@@ -382,7 +391,6 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
                           void *cb_data);
 int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl);
 int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card);
-void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card);
 int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card);
 
 /***** cregs.c *****/
index dd5b2fed97e9ae2b7665f13ec3c42db6ee6fb462..bf4b9d282c042ec4d5b3c31a9d3192bd059afcc5 100644 (file)
 #include "common.h"
 
 /*
- * These are rather arbitrary. They are fairly large because adjacent requests
- * pulled from a communication ring are quite likely to end up being part of
- * the same scatter/gather request at the disc.
+ * Maximum number of unused free pages to keep in the internal buffer.
+ * Setting this to a value too low will reduce memory used in each backend,
+ * but can have a performance penalty.
  *
- * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
- *
- * This will increase the chances of being able to write whole tracks.
- * 64 should be enough to keep us competitive with Linux.
+ * A sane value is xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST, but can
+ * be set to a lower value that might degrade performance on some intensive
+ * IO workloads.
  */
-static int xen_blkif_reqs = 64;
-module_param_named(reqs, xen_blkif_reqs, int, 0);
-MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
 
-/* Run-time switchable: /sys/module/blkback/parameters/ */
-static unsigned int log_stats;
-module_param(log_stats, int, 0644);
+static int xen_blkif_max_buffer_pages = 1024;
+module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644);
+MODULE_PARM_DESC(max_buffer_pages,
+"Maximum number of free pages to keep in each block backend buffer");
 
 /*
- * Each outstanding request that we've passed to the lower device layers has a
- * 'pending_req' allocated to it. Each buffer_head that completes decrements
- * the pendcnt towards zero. When it hits zero, the specified domain has a
- * response queued for it, with the saved 'id' passed back.
+ * Maximum number of grants to map persistently in blkback. For maximum
+ * performance this should be the total numbers of grants that can be used
+ * to fill the ring, but since this might become too high, specially with
+ * the use of indirect descriptors, we set it to a value that provides good
+ * performance without using too much memory.
+ *
+ * When the list of persistent grants is full we clean it up using a LRU
+ * algorithm.
  */
-struct pending_req {
-       struct xen_blkif        *blkif;
-       u64                     id;
-       int                     nr_pages;
-       atomic_t                pendcnt;
-       unsigned short          operation;
-       int                     status;
-       struct list_head        free_list;
-       DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-};
 
-#define BLKBACK_INVALID_HANDLE (~0)
+static int xen_blkif_max_pgrants = 1056;
+module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
+MODULE_PARM_DESC(max_persistent_grants,
+                 "Maximum number of grants to map persistently");
 
-struct xen_blkbk {
-       struct pending_req      *pending_reqs;
-       /* List of all 'pending_req' available */
-       struct list_head        pending_free;
-       /* And its spinlock. */
-       spinlock_t              pending_free_lock;
-       wait_queue_head_t       pending_free_wq;
-       /* The list of all pages that are available. */
-       struct page             **pending_pages;
-       /* And the grant handles that are available. */
-       grant_handle_t          *pending_grant_handles;
-};
-
-static struct xen_blkbk *blkbk;
+/*
+ * The LRU mechanism to clean the lists of persistent grants needs to
+ * be executed periodically. The time interval between consecutive executions
+ * of the purge mechanism is set in ms.
+ */
+#define LRU_INTERVAL 100
 
 /*
- * Maximum number of grant pages that can be mapped in blkback.
- * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
- * pages that blkback will persistently map.
- * Currently, this is:
- * RING_SIZE = 32 (for all known ring types)
- * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
- * sizeof(struct persistent_gnt) = 48
- * So the maximum memory used to store the grants is:
- * 32 * 11 * 48 = 16896 bytes
+ * When the persistent grants list is full we will remove unused grants
+ * from the list. The percent number of grants to be removed at each LRU
+ * execution.
  */
-static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
+#define LRU_PERCENT_CLEAN 5
+
+/* Run-time switchable: /sys/module/blkback/parameters/ */
+static unsigned int log_stats;
+module_param(log_stats, int, 0644);
+
+#define BLKBACK_INVALID_HANDLE (~0)
+
+/* Number of free pages to remove on each call to free_xenballooned_pages */
+#define NUM_BATCH_FREE_PAGES 10
+
+static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
 {
-       switch (protocol) {
-       case BLKIF_PROTOCOL_NATIVE:
-               return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
-                          BLKIF_MAX_SEGMENTS_PER_REQUEST;
-       case BLKIF_PROTOCOL_X86_32:
-               return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
-                          BLKIF_MAX_SEGMENTS_PER_REQUEST;
-       case BLKIF_PROTOCOL_X86_64:
-               return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
-                          BLKIF_MAX_SEGMENTS_PER_REQUEST;
-       default:
-               BUG();
+       unsigned long flags;
+
+       spin_lock_irqsave(&blkif->free_pages_lock, flags);
+       if (list_empty(&blkif->free_pages)) {
+               BUG_ON(blkif->free_pages_num != 0);
+               spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+               return alloc_xenballooned_pages(1, page, false);
        }
+       BUG_ON(blkif->free_pages_num == 0);
+       page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
+       list_del(&page[0]->lru);
+       blkif->free_pages_num--;
+       spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+
        return 0;
 }
 
-
-/*
- * Little helpful macro to figure out the index and virtual address of the
- * pending_pages[..]. For each 'pending_req' we have have up to
- * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
- * 10 and would index in the pending_pages[..].
- */
-static inline int vaddr_pagenr(struct pending_req *req, int seg)
+static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
+                                  int num)
 {
-       return (req - blkbk->pending_reqs) *
-               BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
-}
+       unsigned long flags;
+       int i;
 
-#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
+       spin_lock_irqsave(&blkif->free_pages_lock, flags);
+       for (i = 0; i < num; i++)
+               list_add(&page[i]->lru, &blkif->free_pages);
+       blkif->free_pages_num += num;
+       spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+}
 
-static inline unsigned long vaddr(struct pending_req *req, int seg)
+static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
 {
-       unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg));
-       return (unsigned long)pfn_to_kaddr(pfn);
-}
+       /* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
+       struct page *page[NUM_BATCH_FREE_PAGES];
+       unsigned int num_pages = 0;
+       unsigned long flags;
 
-#define pending_handle(_req, _seg) \
-       (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)])
+       spin_lock_irqsave(&blkif->free_pages_lock, flags);
+       while (blkif->free_pages_num > num) {
+               BUG_ON(list_empty(&blkif->free_pages));
+               page[num_pages] = list_first_entry(&blkif->free_pages,
+                                                  struct page, lru);
+               list_del(&page[num_pages]->lru);
+               blkif->free_pages_num--;
+               if (++num_pages == NUM_BATCH_FREE_PAGES) {
+                       spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+                       free_xenballooned_pages(num_pages, page);
+                       spin_lock_irqsave(&blkif->free_pages_lock, flags);
+                       num_pages = 0;
+               }
+       }
+       spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+       if (num_pages != 0)
+               free_xenballooned_pages(num_pages, page);
+}
 
+#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
 
 static int do_block_io_op(struct xen_blkif *blkif);
 static int dispatch_rw_block_io(struct xen_blkif *blkif,
@@ -170,13 +178,29 @@ static void make_response(struct xen_blkif *blkif, u64 id,
             (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL)
 
 
-static void add_persistent_gnt(struct rb_root *root,
+/*
+ * We don't need locking around the persistent grant helpers
+ * because blkback uses a single-thread for each backed, so we
+ * can be sure that this functions will never be called recursively.
+ *
+ * The only exception to that is put_persistent_grant, that can be called
+ * from interrupt context (by xen_blkbk_unmap), so we have to use atomic
+ * bit operations to modify the flags of a persistent grant and to count
+ * the number of used grants.
+ */
+static int add_persistent_gnt(struct xen_blkif *blkif,
                               struct persistent_gnt *persistent_gnt)
 {
-       struct rb_node **new = &(root->rb_node), *parent = NULL;
+       struct rb_node **new = NULL, *parent = NULL;
        struct persistent_gnt *this;
 
+       if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
+               if (!blkif->vbd.overflow_max_grants)
+                       blkif->vbd.overflow_max_grants = 1;
+               return -EBUSY;
+       }
        /* Figure out where to put new node */
+       new = &blkif->persistent_gnts.rb_node;
        while (*new) {
                this = container_of(*new, struct persistent_gnt, node);
 
@@ -186,22 +210,28 @@ static void add_persistent_gnt(struct rb_root *root,
                else if (persistent_gnt->gnt > this->gnt)
                        new = &((*new)->rb_right);
                else {
-                       pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n");
-                       BUG();
+                       pr_alert_ratelimited(DRV_PFX " trying to add a gref that's already in the tree\n");
+                       return -EINVAL;
                }
        }
 
+       bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE);
+       set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
        /* Add new node and rebalance tree. */
        rb_link_node(&(persistent_gnt->node), parent, new);
-       rb_insert_color(&(persistent_gnt->node), root);
+       rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts);
+       blkif->persistent_gnt_c++;
+       atomic_inc(&blkif->persistent_gnt_in_use);
+       return 0;
 }
 
-static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
+static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
                                                 grant_ref_t gref)
 {
        struct persistent_gnt *data;
-       struct rb_node *node = root->rb_node;
+       struct rb_node *node = NULL;
 
+       node = blkif->persistent_gnts.rb_node;
        while (node) {
                data = container_of(node, struct persistent_gnt, node);
 
@@ -209,13 +239,31 @@ static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
                        node = node->rb_left;
                else if (gref > data->gnt)
                        node = node->rb_right;
-               else
+               else {
+                       if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) {
+                               pr_alert_ratelimited(DRV_PFX " requesting a grant already in use\n");
+                               return NULL;
+                       }
+                       set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
+                       atomic_inc(&blkif->persistent_gnt_in_use);
                        return data;
+               }
        }
        return NULL;
 }
 
-static void free_persistent_gnts(struct rb_root *root, unsigned int num)
+static void put_persistent_gnt(struct xen_blkif *blkif,
+                               struct persistent_gnt *persistent_gnt)
+{
+       if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+                 pr_alert_ratelimited(DRV_PFX " freeing a grant already unused");
+       set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
+       clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
+       atomic_dec(&blkif->persistent_gnt_in_use);
+}
+
+static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
+                                 unsigned int num)
 {
        struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -240,7 +288,7 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
                        ret = gnttab_unmap_refs(unmap, NULL, pages,
                                segs_to_unmap);
                        BUG_ON(ret);
-                       free_xenballooned_pages(segs_to_unmap, pages);
+                       put_free_pages(blkif, pages, segs_to_unmap);
                        segs_to_unmap = 0;
                }
 
@@ -251,21 +299,148 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
        BUG_ON(num != 0);
 }
 
+static void unmap_purged_grants(struct work_struct *work)
+{
+       struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct persistent_gnt *persistent_gnt;
+       int ret, segs_to_unmap = 0;
+       struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
+
+       while(!list_empty(&blkif->persistent_purge_list)) {
+               persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
+                                                 struct persistent_gnt,
+                                                 remove_node);
+               list_del(&persistent_gnt->remove_node);
+
+               gnttab_set_unmap_op(&unmap[segs_to_unmap],
+                       vaddr(persistent_gnt->page),
+                       GNTMAP_host_map,
+                       persistent_gnt->handle);
+
+               pages[segs_to_unmap] = persistent_gnt->page;
+
+               if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+                       ret = gnttab_unmap_refs(unmap, NULL, pages,
+                               segs_to_unmap);
+                       BUG_ON(ret);
+                       put_free_pages(blkif, pages, segs_to_unmap);
+                       segs_to_unmap = 0;
+               }
+               kfree(persistent_gnt);
+       }
+       if (segs_to_unmap > 0) {
+               ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap);
+               BUG_ON(ret);
+               put_free_pages(blkif, pages, segs_to_unmap);
+       }
+}
+
+static void purge_persistent_gnt(struct xen_blkif *blkif)
+{
+       struct persistent_gnt *persistent_gnt;
+       struct rb_node *n;
+       unsigned int num_clean, total;
+       bool scan_used = false, clean_used = false;
+       struct rb_root *root;
+
+       if (blkif->persistent_gnt_c < xen_blkif_max_pgrants ||
+           (blkif->persistent_gnt_c == xen_blkif_max_pgrants &&
+           !blkif->vbd.overflow_max_grants)) {
+               return;
+       }
+
+       if (work_pending(&blkif->persistent_purge_work)) {
+               pr_alert_ratelimited(DRV_PFX "Scheduled work from previous purge is still pending, cannot purge list\n");
+               return;
+       }
+
+       num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
+       num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
+       num_clean = min(blkif->persistent_gnt_c, num_clean);
+       if ((num_clean == 0) ||
+           (num_clean > (blkif->persistent_gnt_c - atomic_read(&blkif->persistent_gnt_in_use))))
+               return;
+
+       /*
+        * At this point, we can assure that there will be no calls
+         * to get_persistent_grant (because we are executing this code from
+         * xen_blkif_schedule), there can only be calls to put_persistent_gnt,
+         * which means that the number of currently used grants will go down,
+         * but never up, so we will always be able to remove the requested
+         * number of grants.
+        */
+
+       total = num_clean;
+
+       pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean);
+
+       INIT_LIST_HEAD(&blkif->persistent_purge_list);
+       root = &blkif->persistent_gnts;
+purge_list:
+       foreach_grant_safe(persistent_gnt, n, root, node) {
+               BUG_ON(persistent_gnt->handle ==
+                       BLKBACK_INVALID_HANDLE);
+
+               if (clean_used) {
+                       clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
+                       continue;
+               }
+
+               if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+                       continue;
+               if (!scan_used &&
+                   (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags)))
+                       continue;
+
+               rb_erase(&persistent_gnt->node, root);
+               list_add(&persistent_gnt->remove_node,
+                        &blkif->persistent_purge_list);
+               if (--num_clean == 0)
+                       goto finished;
+       }
+       /*
+        * If we get here it means we also need to start cleaning
+        * grants that were used since last purge in order to cope
+        * with the requested num
+        */
+       if (!scan_used && !clean_used) {
+               pr_debug(DRV_PFX "Still missing %u purged frames\n", num_clean);
+               scan_used = true;
+               goto purge_list;
+       }
+finished:
+       if (!clean_used) {
+               pr_debug(DRV_PFX "Finished scanning for grants to clean, removing used flag\n");
+               clean_used = true;
+               goto purge_list;
+       }
+
+       blkif->persistent_gnt_c -= (total - num_clean);
+       blkif->vbd.overflow_max_grants = 0;
+
+       /* We can defer this work */
+       INIT_WORK(&blkif->persistent_purge_work, unmap_purged_grants);
+       schedule_work(&blkif->persistent_purge_work);
+       pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total);
+       return;
+}
+
 /*
  * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
  */
-static struct pending_req *alloc_req(void)
+static struct pending_req *alloc_req(struct xen_blkif *blkif)
 {
        struct pending_req *req = NULL;
        unsigned long flags;
 
-       spin_lock_irqsave(&blkbk->pending_free_lock, flags);
-       if (!list_empty(&blkbk->pending_free)) {
-               req = list_entry(blkbk->pending_free.next, struct pending_req,
+       spin_lock_irqsave(&blkif->pending_free_lock, flags);
+       if (!list_empty(&blkif->pending_free)) {
+               req = list_entry(blkif->pending_free.next, struct pending_req,
                                 free_list);
                list_del(&req->free_list);
        }
-       spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
+       spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
        return req;
 }
 
@@ -273,17 +448,17 @@ static struct pending_req *alloc_req(void)
  * Return the 'pending_req' structure back to the freepool. We also
  * wake up the thread if it was waiting for a free page.
  */
-static void free_req(struct pending_req *req)
+static void free_req(struct xen_blkif *blkif, struct pending_req *req)
 {
        unsigned long flags;
        int was_empty;
 
-       spin_lock_irqsave(&blkbk->pending_free_lock, flags);
-       was_empty = list_empty(&blkbk->pending_free);
-       list_add(&req->free_list, &blkbk->pending_free);
-       spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
+       spin_lock_irqsave(&blkif->pending_free_lock, flags);
+       was_empty = list_empty(&blkif->pending_free);
+       list_add(&req->free_list, &blkif->pending_free);
+       spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
        if (was_empty)
-               wake_up(&blkbk->pending_free_wq);
+               wake_up(&blkif->pending_free_wq);
 }
 
 /*
@@ -382,10 +557,12 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
 static void print_stats(struct xen_blkif *blkif)
 {
        pr_info("xen-blkback (%s): oo %3llu  |  rd %4llu  |  wr %4llu  |  f %4llu"
-                "  |  ds %4llu\n",
+                "  |  ds %4llu | pg: %4u/%4d\n",
                 current->comm, blkif->st_oo_req,
                 blkif->st_rd_req, blkif->st_wr_req,
-                blkif->st_f_req, blkif->st_ds_req);
+                blkif->st_f_req, blkif->st_ds_req,
+                blkif->persistent_gnt_c,
+                xen_blkif_max_pgrants);
        blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
        blkif->st_rd_req = 0;
        blkif->st_wr_req = 0;
@@ -397,6 +574,8 @@ int xen_blkif_schedule(void *arg)
 {
        struct xen_blkif *blkif = arg;
        struct xen_vbd *vbd = &blkif->vbd;
+       unsigned long timeout;
+       int ret;
 
        xen_blkif_get(blkif);
 
@@ -406,27 +585,52 @@ int xen_blkif_schedule(void *arg)
                if (unlikely(vbd->size != vbd_sz(vbd)))
                        xen_vbd_resize(blkif);
 
-               wait_event_interruptible(
+               timeout = msecs_to_jiffies(LRU_INTERVAL);
+
+               timeout = wait_event_interruptible_timeout(
                        blkif->wq,
-                       blkif->waiting_reqs || kthread_should_stop());
-               wait_event_interruptible(
-                       blkbk->pending_free_wq,
-                       !list_empty(&blkbk->pending_free) ||
-                       kthread_should_stop());
+                       blkif->waiting_reqs || kthread_should_stop(),
+                       timeout);
+               if (timeout == 0)
+                       goto purge_gnt_list;
+               timeout = wait_event_interruptible_timeout(
+                       blkif->pending_free_wq,
+                       !list_empty(&blkif->pending_free) ||
+                       kthread_should_stop(),
+                       timeout);
+               if (timeout == 0)
+                       goto purge_gnt_list;
 
                blkif->waiting_reqs = 0;
                smp_mb(); /* clear flag *before* checking for work */
 
-               if (do_block_io_op(blkif))
+               ret = do_block_io_op(blkif);
+               if (ret > 0)
                        blkif->waiting_reqs = 1;
+               if (ret == -EACCES)
+                       wait_event_interruptible(blkif->shutdown_wq,
+                                                kthread_should_stop());
+
+purge_gnt_list:
+               if (blkif->vbd.feature_gnt_persistent &&
+                   time_after(jiffies, blkif->next_lru)) {
+                       purge_persistent_gnt(blkif);
+                       blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
+               }
+
+               /* Shrink if we have more than xen_blkif_max_buffer_pages */
+               shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages);
 
                if (log_stats && time_after(jiffies, blkif->st_print))
                        print_stats(blkif);
        }
 
+       /* Since we are shutting down remove all pages from the buffer */
+       shrink_free_pagepool(blkif, 0 /* All */);
+
        /* Free all persistent grant pages */
        if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
-               free_persistent_gnts(&blkif->persistent_gnts,
+               free_persistent_gnts(blkif, &blkif->persistent_gnts,
                        blkif->persistent_gnt_c);
 
        BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
@@ -441,148 +645,98 @@ int xen_blkif_schedule(void *arg)
        return 0;
 }
 
-struct seg_buf {
-       unsigned int offset;
-       unsigned int nsec;
-};
 /*
  * Unmap the grant references, and also remove the M2P over-rides
  * used in the 'pending_req'.
  */
-static void xen_blkbk_unmap(struct pending_req *req)
+static void xen_blkbk_unmap(struct xen_blkif *blkif,
+                            struct grant_page *pages[],
+                            int num)
 {
        struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-       struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        unsigned int i, invcount = 0;
-       grant_handle_t handle;
        int ret;
 
-       for (i = 0; i < req->nr_pages; i++) {
-               if (!test_bit(i, req->unmap_seg))
+       for (i = 0; i < num; i++) {
+               if (pages[i]->persistent_gnt != NULL) {
+                       put_persistent_gnt(blkif, pages[i]->persistent_gnt);
                        continue;
-               handle = pending_handle(req, i);
-               if (handle == BLKBACK_INVALID_HANDLE)
+               }
+               if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
                        continue;
-               gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
-                                   GNTMAP_host_map, handle);
-               pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
-               pages[invcount] = virt_to_page(vaddr(req, i));
-               invcount++;
+               unmap_pages[invcount] = pages[i]->page;
+               gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]->page),
+                                   GNTMAP_host_map, pages[i]->handle);
+               pages[i]->handle = BLKBACK_INVALID_HANDLE;
+               if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+                       ret = gnttab_unmap_refs(unmap, NULL, unmap_pages,
+                                               invcount);
+                       BUG_ON(ret);
+                       put_free_pages(blkif, unmap_pages, invcount);
+                       invcount = 0;
+               }
+       }
+       if (invcount) {
+               ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
+               BUG_ON(ret);
+               put_free_pages(blkif, unmap_pages, invcount);
        }
-
-       ret = gnttab_unmap_refs(unmap, NULL, pages, invcount);
-       BUG_ON(ret);
 }
 
-static int xen_blkbk_map(struct blkif_request *req,
-                        struct pending_req *pending_req,
-                        struct seg_buf seg[],
-                        struct page *pages[])
+static int xen_blkbk_map(struct xen_blkif *blkif,
+                        struct grant_page *pages[],
+                        int num, bool ro)
 {
        struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-       struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        struct persistent_gnt *persistent_gnt = NULL;
-       struct xen_blkif *blkif = pending_req->blkif;
        phys_addr_t addr = 0;
-       int i, j;
-       bool new_map;
-       int nseg = req->u.rw.nr_segments;
+       int i, seg_idx, new_map_idx;
        int segs_to_map = 0;
        int ret = 0;
+       int last_map = 0, map_until = 0;
        int use_persistent_gnts;
 
        use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
 
-       BUG_ON(blkif->persistent_gnt_c >
-                  max_mapped_grant_pages(pending_req->blkif->blk_protocol));
-
        /*
         * Fill out preq.nr_sects with proper amount of sectors, and setup
         * assign map[..] with the PFN of the page in our domain with the
         * corresponding grant reference for each page.
         */
-       for (i = 0; i < nseg; i++) {
+again:
+       for (i = map_until; i < num; i++) {
                uint32_t flags;
 
                if (use_persistent_gnts)
                        persistent_gnt = get_persistent_gnt(
-                               &blkif->persistent_gnts,
-                               req->u.rw.seg[i].gref);
+                               blkif,
+                               pages[i]->gref);
 
                if (persistent_gnt) {
                        /*
                         * We are using persistent grants and
                         * the grant is already mapped
                         */
-                       new_map = false;
-               } else if (use_persistent_gnts &&
-                          blkif->persistent_gnt_c <
-                          max_mapped_grant_pages(blkif->blk_protocol)) {
-                       /*
-                        * We are using persistent grants, the grant is
-                        * not mapped but we have room for it
-                        */
-                       new_map = true;
-                       persistent_gnt = kmalloc(
-                               sizeof(struct persistent_gnt),
-                               GFP_KERNEL);
-                       if (!persistent_gnt)
-                               return -ENOMEM;
-                       if (alloc_xenballooned_pages(1, &persistent_gnt->page,
-                           false)) {
-                               kfree(persistent_gnt);
-                               return -ENOMEM;
-                       }
-                       persistent_gnt->gnt = req->u.rw.seg[i].gref;
-                       persistent_gnt->handle = BLKBACK_INVALID_HANDLE;
-
-                       pages_to_gnt[segs_to_map] =
-                               persistent_gnt->page;
-                       addr = (unsigned long) pfn_to_kaddr(
-                               page_to_pfn(persistent_gnt->page));
-
-                       add_persistent_gnt(&blkif->persistent_gnts,
-                               persistent_gnt);
-                       blkif->persistent_gnt_c++;
-                       pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
-                                persistent_gnt->gnt, blkif->persistent_gnt_c,
-                                max_mapped_grant_pages(blkif->blk_protocol));
+                       pages[i]->page = persistent_gnt->page;
+                       pages[i]->persistent_gnt = persistent_gnt;
                } else {
-                       /*
-                        * We are either using persistent grants and
-                        * hit the maximum limit of grants mapped,
-                        * or we are not using persistent grants.
-                        */
-                       if (use_persistent_gnts &&
-                               !blkif->vbd.overflow_max_grants) {
-                               blkif->vbd.overflow_max_grants = 1;
-                               pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
-                                        blkif->domid, blkif->vbd.handle);
-                       }
-                       new_map = true;
-                       pages[i] = blkbk->pending_page(pending_req, i);
-                       addr = vaddr(pending_req, i);
-                       pages_to_gnt[segs_to_map] =
-                               blkbk->pending_page(pending_req, i);
-               }
-
-               if (persistent_gnt) {
-                       pages[i] = persistent_gnt->page;
-                       persistent_gnts[i] = persistent_gnt;
-               } else {
-                       persistent_gnts[i] = NULL;
-               }
-
-               if (new_map) {
+                       if (get_free_page(blkif, &pages[i]->page))
+                               goto out_of_memory;
+                       addr = vaddr(pages[i]->page);
+                       pages_to_gnt[segs_to_map] = pages[i]->page;
+                       pages[i]->persistent_gnt = NULL;
                        flags = GNTMAP_host_map;
-                       if (!persistent_gnt &&
-                           (pending_req->operation != BLKIF_OP_READ))
+                       if (!use_persistent_gnts && ro)
                                flags |= GNTMAP_readonly;
                        gnttab_set_map_op(&map[segs_to_map++], addr,
-                                         flags, req->u.rw.seg[i].gref,
+                                         flags, pages[i]->gref,
                                          blkif->domid);
                }
+               map_until = i + 1;
+               if (segs_to_map == BLKIF_MAX_SEGMENTS_PER_REQUEST)
+                       break;
        }
 
        if (segs_to_map) {
@@ -595,49 +749,133 @@ static int xen_blkbk_map(struct blkif_request *req,
         * so that when we access vaddr(pending_req,i) it has the contents of
         * the page from the other domain.
         */
-       bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-       for (i = 0, j = 0; i < nseg; i++) {
-               if (!persistent_gnts[i] ||
-                   persistent_gnts[i]->handle == BLKBACK_INVALID_HANDLE) {
+       for (seg_idx = last_map, new_map_idx = 0; seg_idx < map_until; seg_idx++) {
+               if (!pages[seg_idx]->persistent_gnt) {
                        /* This is a newly mapped grant */
-                       BUG_ON(j >= segs_to_map);
-                       if (unlikely(map[j].status != 0)) {
+                       BUG_ON(new_map_idx >= segs_to_map);
+                       if (unlikely(map[new_map_idx].status != 0)) {
                                pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
-                               map[j].handle = BLKBACK_INVALID_HANDLE;
+                               pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
                                ret |= 1;
-                               if (persistent_gnts[i]) {
-                                       rb_erase(&persistent_gnts[i]->node,
-                                                &blkif->persistent_gnts);
-                                       blkif->persistent_gnt_c--;
-                                       kfree(persistent_gnts[i]);
-                                       persistent_gnts[i] = NULL;
-                               }
+                               goto next;
                        }
+                       pages[seg_idx]->handle = map[new_map_idx].handle;
+               } else {
+                       continue;
                }
-               if (persistent_gnts[i]) {
-                       if (persistent_gnts[i]->handle ==
-                           BLKBACK_INVALID_HANDLE) {
+               if (use_persistent_gnts &&
+                   blkif->persistent_gnt_c < xen_blkif_max_pgrants) {
+                       /*
+                        * We are using persistent grants, the grant is
+                        * not mapped but we might have room for it.
+                        */
+                       persistent_gnt = kmalloc(sizeof(struct persistent_gnt),
+                                                GFP_KERNEL);
+                       if (!persistent_gnt) {
                                /*
-                                * If this is a new persistent grant
-                                * save the handler
+                                * If we don't have enough memory to
+                                * allocate the persistent_gnt struct
+                                * map this grant non-persistenly
                                 */
-                               persistent_gnts[i]->handle = map[j++].handle;
+                               goto next;
                        }
-                       pending_handle(pending_req, i) =
-                               persistent_gnts[i]->handle;
+                       persistent_gnt->gnt = map[new_map_idx].ref;
+                       persistent_gnt->handle = map[new_map_idx].handle;
+                       persistent_gnt->page = pages[seg_idx]->page;
+                       if (add_persistent_gnt(blkif,
+                                              persistent_gnt)) {
+                               kfree(persistent_gnt);
+                               persistent_gnt = NULL;
+                               goto next;
+                       }
+                       pages[seg_idx]->persistent_gnt = persistent_gnt;
+                       pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
+                                persistent_gnt->gnt, blkif->persistent_gnt_c,
+                                xen_blkif_max_pgrants);
+                       goto next;
+               }
+               if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) {
+                       blkif->vbd.overflow_max_grants = 1;
+                       pr_debug(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
+                                blkif->domid, blkif->vbd.handle);
+               }
+               /*
+                * We could not map this grant persistently, so use it as
+                * a non-persistent grant.
+                */
+next:
+               new_map_idx++;
+       }
+       segs_to_map = 0;
+       last_map = map_until;
+       if (map_until != num)
+               goto again;
 
-                       if (ret)
-                               continue;
-               } else {
-                       pending_handle(pending_req, i) = map[j++].handle;
-                       bitmap_set(pending_req->unmap_seg, i, 1);
+       return ret;
+
+out_of_memory:
+       pr_alert(DRV_PFX "%s: out of memory\n", __func__);
+       put_free_pages(blkif, pages_to_gnt, segs_to_map);
+       return -ENOMEM;
+}
+
+static int xen_blkbk_map_seg(struct pending_req *pending_req)
+{
+       int rc;
+
+       rc = xen_blkbk_map(pending_req->blkif, pending_req->segments,
+                          pending_req->nr_pages,
+                          (pending_req->operation != BLKIF_OP_READ));
+
+       return rc;
+}
 
-                       if (ret)
-                               continue;
+static int xen_blkbk_parse_indirect(struct blkif_request *req,
+                                   struct pending_req *pending_req,
+                                   struct seg_buf seg[],
+                                   struct phys_req *preq)
+{
+       struct grant_page **pages = pending_req->indirect_pages;
+       struct xen_blkif *blkif = pending_req->blkif;
+       int indirect_grefs, rc, n, nseg, i;
+       struct blkif_request_segment_aligned *segments = NULL;
+
+       nseg = pending_req->nr_pages;
+       indirect_grefs = INDIRECT_PAGES(nseg);
+       BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
+
+       for (i = 0; i < indirect_grefs; i++)
+               pages[i]->gref = req->u.indirect.indirect_grefs[i];
+
+       rc = xen_blkbk_map(blkif, pages, indirect_grefs, true);
+       if (rc)
+               goto unmap;
+
+       for (n = 0, i = 0; n < nseg; n++) {
+               if ((n % SEGS_PER_INDIRECT_FRAME) == 0) {
+                       /* Map indirect segments */
+                       if (segments)
+                               kunmap_atomic(segments);
+                       segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]->page);
+               }
+               i = n % SEGS_PER_INDIRECT_FRAME;
+               pending_req->segments[n]->gref = segments[i].gref;
+               seg[n].nsec = segments[i].last_sect -
+                       segments[i].first_sect + 1;
+               seg[n].offset = (segments[i].first_sect << 9);
+               if ((segments[i].last_sect >= (PAGE_SIZE >> 9)) ||
+                   (segments[i].last_sect < segments[i].first_sect)) {
+                       rc = -EINVAL;
+                       goto unmap;
                }
-               seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
+               preq->nr_sects += seg[n].nsec;
        }
-       return ret;
+
+unmap:
+       if (segments)
+               kunmap_atomic(segments);
+       xen_blkbk_unmap(blkif, pages, indirect_grefs);
+       return rc;
 }
 
 static int dispatch_discard_io(struct xen_blkif *blkif,
@@ -647,7 +885,18 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
        int status = BLKIF_RSP_OKAY;
        struct block_device *bdev = blkif->vbd.bdev;
        unsigned long secure;
+       struct phys_req preq;
+
+       preq.sector_number = req->u.discard.sector_number;
+       preq.nr_sects      = req->u.discard.nr_sectors;
 
+       err = xen_vbd_translate(&preq, blkif, WRITE);
+       if (err) {
+               pr_warn(DRV_PFX "access denied: DISCARD [%llu->%llu] on dev=%04x\n",
+                       preq.sector_number,
+                       preq.sector_number + preq.nr_sects, blkif->vbd.pdevice);
+               goto fail_response;
+       }
        blkif->st_ds_req++;
 
        xen_blkif_get(blkif);
@@ -658,7 +907,7 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
        err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
                                   req->u.discard.nr_sectors,
                                   GFP_KERNEL, secure);
-
+fail_response:
        if (err == -EOPNOTSUPP) {
                pr_debug(DRV_PFX "discard op failed, not supported\n");
                status = BLKIF_RSP_EOPNOTSUPP;
@@ -674,7 +923,7 @@ static int dispatch_other_io(struct xen_blkif *blkif,
                             struct blkif_request *req,
                             struct pending_req *pending_req)
 {
-       free_req(pending_req);
+       free_req(blkif, pending_req);
        make_response(blkif, req->u.other.id, req->operation,
                      BLKIF_RSP_EOPNOTSUPP);
        return -EIO;
@@ -726,7 +975,9 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
         * the proper response on the ring.
         */
        if (atomic_dec_and_test(&pending_req->pendcnt)) {
-               xen_blkbk_unmap(pending_req);
+               xen_blkbk_unmap(pending_req->blkif,
+                               pending_req->segments,
+                               pending_req->nr_pages);
                make_response(pending_req->blkif, pending_req->id,
                              pending_req->operation, pending_req->status);
                xen_blkif_put(pending_req->blkif);
@@ -734,7 +985,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
                        if (atomic_read(&pending_req->blkif->drain))
                                complete(&pending_req->blkif->drain_complete);
                }
-               free_req(pending_req);
+               free_req(pending_req->blkif, pending_req);
        }
 }
 
@@ -767,6 +1018,12 @@ __do_block_io_op(struct xen_blkif *blkif)
        rp = blk_rings->common.sring->req_prod;
        rmb(); /* Ensure we see queued requests up to 'rp'. */
 
+       if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) {
+               rc = blk_rings->common.rsp_prod_pvt;
+               pr_warn(DRV_PFX "Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n",
+                       rp, rc, rp - rc, blkif->vbd.pdevice);
+               return -EACCES;
+       }
        while (rc != rp) {
 
                if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
@@ -777,7 +1034,7 @@ __do_block_io_op(struct xen_blkif *blkif)
                        break;
                }
 
-               pending_req = alloc_req();
+               pending_req = alloc_req(blkif);
                if (NULL == pending_req) {
                        blkif->st_oo_req++;
                        more_to_do = 1;
@@ -807,11 +1064,12 @@ __do_block_io_op(struct xen_blkif *blkif)
                case BLKIF_OP_WRITE:
                case BLKIF_OP_WRITE_BARRIER:
                case BLKIF_OP_FLUSH_DISKCACHE:
+               case BLKIF_OP_INDIRECT:
                        if (dispatch_rw_block_io(blkif, &req, pending_req))
                                goto done;
                        break;
                case BLKIF_OP_DISCARD:
-                       free_req(pending_req);
+                       free_req(blkif, pending_req);
                        if (dispatch_discard_io(blkif, &req))
                                goto done;
                        break;
@@ -853,17 +1111,28 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
                                struct pending_req *pending_req)
 {
        struct phys_req preq;
-       struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct seg_buf *seg = pending_req->seg;
        unsigned int nseg;
        struct bio *bio = NULL;
-       struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct bio **biolist = pending_req->biolist;
        int i, nbio = 0;
        int operation;
        struct blk_plug plug;
        bool drain = false;
-       struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct grant_page **pages = pending_req->segments;
+       unsigned short req_operation;
+
+       req_operation = req->operation == BLKIF_OP_INDIRECT ?
+                       req->u.indirect.indirect_op : req->operation;
+       if ((req->operation == BLKIF_OP_INDIRECT) &&
+           (req_operation != BLKIF_OP_READ) &&
+           (req_operation != BLKIF_OP_WRITE)) {
+               pr_debug(DRV_PFX "Invalid indirect operation (%u)\n",
+                        req_operation);
+               goto fail_response;
+       }
 
-       switch (req->operation) {
+       switch (req_operation) {
        case BLKIF_OP_READ:
                blkif->st_rd_req++;
                operation = READ;
@@ -885,33 +1154,47 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
        }
 
        /* Check that the number of segments is sane. */
-       nseg = req->u.rw.nr_segments;
+       nseg = req->operation == BLKIF_OP_INDIRECT ?
+              req->u.indirect.nr_segments : req->u.rw.nr_segments;
 
        if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
-           unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+           unlikely((req->operation != BLKIF_OP_INDIRECT) &&
+                    (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) ||
+           unlikely((req->operation == BLKIF_OP_INDIRECT) &&
+                    (nseg > MAX_INDIRECT_SEGMENTS))) {
                pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
                         nseg);
                /* Haven't submitted any bio's yet. */
                goto fail_response;
        }
 
-       preq.sector_number = req->u.rw.sector_number;
        preq.nr_sects      = 0;
 
        pending_req->blkif     = blkif;
        pending_req->id        = req->u.rw.id;
-       pending_req->operation = req->operation;
+       pending_req->operation = req_operation;
        pending_req->status    = BLKIF_RSP_OKAY;
        pending_req->nr_pages  = nseg;
 
-       for (i = 0; i < nseg; i++) {
-               seg[i].nsec = req->u.rw.seg[i].last_sect -
-                       req->u.rw.seg[i].first_sect + 1;
-               if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
-                   (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect))
+       if (req->operation != BLKIF_OP_INDIRECT) {
+               preq.dev               = req->u.rw.handle;
+               preq.sector_number     = req->u.rw.sector_number;
+               for (i = 0; i < nseg; i++) {
+                       pages[i]->gref = req->u.rw.seg[i].gref;
+                       seg[i].nsec = req->u.rw.seg[i].last_sect -
+                               req->u.rw.seg[i].first_sect + 1;
+                       seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
+                       if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
+                           (req->u.rw.seg[i].last_sect <
+                            req->u.rw.seg[i].first_sect))
+                               goto fail_response;
+                       preq.nr_sects += seg[i].nsec;
+               }
+       } else {
+               preq.dev               = req->u.indirect.handle;
+               preq.sector_number     = req->u.indirect.sector_number;
+               if (xen_blkbk_parse_indirect(req, pending_req, seg, &preq))
                        goto fail_response;
-               preq.nr_sects += seg[i].nsec;
-
        }
 
        if (xen_vbd_translate(&preq, blkif, operation) != 0) {
@@ -948,7 +1231,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
         * the hypercall to unmap the grants - that is all done in
         * xen_blkbk_unmap.
         */
-       if (xen_blkbk_map(req, pending_req, seg, pages))
+       if (xen_blkbk_map_seg(pending_req))
                goto fail_flush;
 
        /*
@@ -960,11 +1243,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
        for (i = 0; i < nseg; i++) {
                while ((bio == NULL) ||
                       (bio_add_page(bio,
-                                    pages[i],
+                                    pages[i]->page,
                                     seg[i].nsec << 9,
                                     seg[i].offset) == 0)) {
 
-                       bio = bio_alloc(GFP_KERNEL, nseg-i);
+                       int nr_iovecs = min_t(int, (nseg-i), BIO_MAX_PAGES);
+                       bio = bio_alloc(GFP_KERNEL, nr_iovecs);
                        if (unlikely(bio == NULL))
                                goto fail_put_bio;
 
@@ -1009,11 +1293,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
        return 0;
 
  fail_flush:
-       xen_blkbk_unmap(pending_req);
+       xen_blkbk_unmap(blkif, pending_req->segments,
+                       pending_req->nr_pages);
  fail_response:
        /* Haven't submitted any bio's yet. */
-       make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR);
-       free_req(pending_req);
+       make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
+       free_req(blkif, pending_req);
        msleep(1); /* back off a bit */
        return -EIO;
 
@@ -1070,73 +1355,20 @@ static void make_response(struct xen_blkif *blkif, u64 id,
 
 static int __init xen_blkif_init(void)
 {
-       int i, mmap_pages;
        int rc = 0;
 
        if (!xen_domain())
                return -ENODEV;
 
-       blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
-       if (!blkbk) {
-               pr_alert(DRV_PFX "%s: out of memory!\n", __func__);
-               return -ENOMEM;
-       }
-
-       mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
-
-       blkbk->pending_reqs          = kzalloc(sizeof(blkbk->pending_reqs[0]) *
-                                       xen_blkif_reqs, GFP_KERNEL);
-       blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
-                                       mmap_pages, GFP_KERNEL);
-       blkbk->pending_pages         = kzalloc(sizeof(blkbk->pending_pages[0]) *
-                                       mmap_pages, GFP_KERNEL);
-
-       if (!blkbk->pending_reqs || !blkbk->pending_grant_handles ||
-           !blkbk->pending_pages) {
-               rc = -ENOMEM;
-               goto out_of_memory;
-       }
-
-       for (i = 0; i < mmap_pages; i++) {
-               blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
-               blkbk->pending_pages[i] = alloc_page(GFP_KERNEL);
-               if (blkbk->pending_pages[i] == NULL) {
-                       rc = -ENOMEM;
-                       goto out_of_memory;
-               }
-       }
        rc = xen_blkif_interface_init();
        if (rc)
                goto failed_init;
 
-       INIT_LIST_HEAD(&blkbk->pending_free);
-       spin_lock_init(&blkbk->pending_free_lock);
-       init_waitqueue_head(&blkbk->pending_free_wq);
-
-       for (i = 0; i < xen_blkif_reqs; i++)
-               list_add_tail(&blkbk->pending_reqs[i].free_list,
-                             &blkbk->pending_free);
-
        rc = xen_blkif_xenbus_init();
        if (rc)
                goto failed_init;
 
-       return 0;
-
- out_of_memory:
-       pr_alert(DRV_PFX "%s: out of memory\n", __func__);
  failed_init:
-       kfree(blkbk->pending_reqs);
-       kfree(blkbk->pending_grant_handles);
-       if (blkbk->pending_pages) {
-               for (i = 0; i < mmap_pages; i++) {
-                       if (blkbk->pending_pages[i])
-                               __free_page(blkbk->pending_pages[i]);
-               }
-               kfree(blkbk->pending_pages);
-       }
-       kfree(blkbk);
-       blkbk = NULL;
        return rc;
 }
 
index 60103e2517ba28ac5c946f32e1bd9e0e6eb7f668..8d8807563d9967afd28b70dcb0cd072bed35e8ec 100644 (file)
                 __func__, __LINE__, ##args)
 
 
+/*
+ * This is the maximum number of segments that would be allowed in indirect
+ * requests. This value will also be passed to the frontend.
+ */
+#define MAX_INDIRECT_SEGMENTS 256
+
+#define SEGS_PER_INDIRECT_FRAME \
+       (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
+#define MAX_INDIRECT_PAGES \
+       ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+#define INDIRECT_PAGES(_segs) \
+       ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+
 /* Not a real protocol.  Used to generate ring structs which contain
  * the elements common to all protocols only.  This way we get a
  * compiler-checkable way to use common struct elements, so we can
@@ -83,12 +96,31 @@ struct blkif_x86_32_request_other {
        uint64_t       id;           /* private guest value, echoed in resp  */
 } __attribute__((__packed__));
 
+struct blkif_x86_32_request_indirect {
+       uint8_t        indirect_op;
+       uint16_t       nr_segments;
+       uint64_t       id;
+       blkif_sector_t sector_number;
+       blkif_vdev_t   handle;
+       uint16_t       _pad1;
+       grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+       /*
+        * The maximum number of indirect segments (and pages) that will
+        * be used is determined by MAX_INDIRECT_SEGMENTS, this value
+        * is also exported to the guest (via xenstore
+        * feature-max-indirect-segments entry), so the frontend knows how
+        * many indirect segments the backend supports.
+        */
+       uint64_t       _pad2;        /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
 struct blkif_x86_32_request {
        uint8_t        operation;    /* BLKIF_OP_???                         */
        union {
                struct blkif_x86_32_request_rw rw;
                struct blkif_x86_32_request_discard discard;
                struct blkif_x86_32_request_other other;
+               struct blkif_x86_32_request_indirect indirect;
        } u;
 } __attribute__((__packed__));
 
@@ -127,12 +159,32 @@ struct blkif_x86_64_request_other {
        uint64_t       id;           /* private guest value, echoed in resp  */
 } __attribute__((__packed__));
 
+struct blkif_x86_64_request_indirect {
+       uint8_t        indirect_op;
+       uint16_t       nr_segments;
+       uint32_t       _pad1;        /* offsetof(blkif_..,u.indirect.id)==8   */
+       uint64_t       id;
+       blkif_sector_t sector_number;
+       blkif_vdev_t   handle;
+       uint16_t       _pad2;
+       grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+       /*
+        * The maximum number of indirect segments (and pages) that will
+        * be used is determined by MAX_INDIRECT_SEGMENTS, this value
+        * is also exported to the guest (via xenstore
+        * feature-max-indirect-segments entry), so the frontend knows how
+        * many indirect segments the backend supports.
+        */
+       uint32_t       _pad3;        /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
 struct blkif_x86_64_request {
        uint8_t        operation;    /* BLKIF_OP_???                         */
        union {
                struct blkif_x86_64_request_rw rw;
                struct blkif_x86_64_request_discard discard;
                struct blkif_x86_64_request_other other;
+               struct blkif_x86_64_request_indirect indirect;
        } u;
 } __attribute__((__packed__));
 
@@ -182,12 +234,26 @@ struct xen_vbd {
 
 struct backend_info;
 
+/* Number of available flags */
+#define PERSISTENT_GNT_FLAGS_SIZE      2
+/* This persistent grant is currently in use */
+#define PERSISTENT_GNT_ACTIVE          0
+/*
+ * This persistent grant has been used, this flag is set when we remove the
+ * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently.
+ */
+#define PERSISTENT_GNT_WAS_ACTIVE      1
+
+/* Number of requests that we can fit in a ring */
+#define XEN_BLKIF_REQS                 32
 
 struct persistent_gnt {
        struct page *page;
        grant_ref_t gnt;
        grant_handle_t handle;
+       DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE);
        struct rb_node node;
+       struct list_head remove_node;
 };
 
 struct xen_blkif {
@@ -219,6 +285,23 @@ struct xen_blkif {
        /* tree to store persistent grants */
        struct rb_root          persistent_gnts;
        unsigned int            persistent_gnt_c;
+       atomic_t                persistent_gnt_in_use;
+       unsigned long           next_lru;
+
+       /* used by the kworker that offload work from the persistent purge */
+       struct list_head        persistent_purge_list;
+       struct work_struct      persistent_purge_work;
+
+       /* buffer of free pages to map grant refs */
+       spinlock_t              free_pages_lock;
+       int                     free_pages_num;
+       struct list_head        free_pages;
+
+       /* List of all 'pending_req' available */
+       struct list_head        pending_free;
+       /* And its spinlock. */
+       spinlock_t              pending_free_lock;
+       wait_queue_head_t       pending_free_wq;
 
        /* statistics */
        unsigned long           st_print;
@@ -231,6 +314,41 @@ struct xen_blkif {
        unsigned long long                      st_wr_sect;
 
        wait_queue_head_t       waiting_to_free;
+       /* Thread shutdown wait queue. */
+       wait_queue_head_t       shutdown_wq;
+};
+
+struct seg_buf {
+       unsigned long offset;
+       unsigned int nsec;
+};
+
+struct grant_page {
+       struct page             *page;
+       struct persistent_gnt   *persistent_gnt;
+       grant_handle_t          handle;
+       grant_ref_t             gref;
+};
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
+ * the pendcnt towards zero. When it hits zero, the specified domain has a
+ * response queued for it, with the saved 'id' passed back.
+ */
+struct pending_req {
+       struct xen_blkif        *blkif;
+       u64                     id;
+       int                     nr_pages;
+       atomic_t                pendcnt;
+       unsigned short          operation;
+       int                     status;
+       struct list_head        free_list;
+       struct grant_page       *segments[MAX_INDIRECT_SEGMENTS];
+       /* Indirect descriptors */
+       struct grant_page       *indirect_pages[MAX_INDIRECT_PAGES];
+       struct seg_buf          seg[MAX_INDIRECT_SEGMENTS];
+       struct bio              *biolist[MAX_INDIRECT_SEGMENTS];
 };
 
 
@@ -257,6 +375,7 @@ int xen_blkif_xenbus_init(void);
 
 irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
 int xen_blkif_schedule(void *arg);
+int xen_blkif_purge_persistent(void *arg);
 
 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
                              struct backend_info *be, int state);
@@ -268,7 +387,7 @@ struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
 static inline void blkif_get_x86_32_req(struct blkif_request *dst,
                                        struct blkif_x86_32_request *src)
 {
-       int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+       int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
        dst->operation = src->operation;
        switch (src->operation) {
        case BLKIF_OP_READ:
@@ -291,6 +410,18 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
                dst->u.discard.sector_number = src->u.discard.sector_number;
                dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
                break;
+       case BLKIF_OP_INDIRECT:
+               dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
+               dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
+               dst->u.indirect.handle = src->u.indirect.handle;
+               dst->u.indirect.id = src->u.indirect.id;
+               dst->u.indirect.sector_number = src->u.indirect.sector_number;
+               barrier();
+               j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
+               for (i = 0; i < j; i++)
+                       dst->u.indirect.indirect_grefs[i] =
+                               src->u.indirect.indirect_grefs[i];
+               break;
        default:
                /*
                 * Don't know how to translate this op. Only get the
@@ -304,7 +435,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 static inline void blkif_get_x86_64_req(struct blkif_request *dst,
                                        struct blkif_x86_64_request *src)
 {
-       int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+       int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
        dst->operation = src->operation;
        switch (src->operation) {
        case BLKIF_OP_READ:
@@ -327,6 +458,18 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
                dst->u.discard.sector_number = src->u.discard.sector_number;
                dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
                break;
+       case BLKIF_OP_INDIRECT:
+               dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
+               dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
+               dst->u.indirect.handle = src->u.indirect.handle;
+               dst->u.indirect.id = src->u.indirect.id;
+               dst->u.indirect.sector_number = src->u.indirect.sector_number;
+               barrier();
+               j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
+               for (i = 0; i < j; i++)
+                       dst->u.indirect.indirect_grefs[i] =
+                               src->u.indirect.indirect_grefs[i];
+               break;
        default:
                /*
                 * Don't know how to translate this op. Only get the
index 04608a6502d7b25bec73a51da69e1e2748e960c4..fe5c3cd10c341045ca63aeacb15edc93c185a88b 100644 (file)
@@ -98,12 +98,17 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
                err = PTR_ERR(blkif->xenblkd);
                blkif->xenblkd = NULL;
                xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
+               return;
        }
 }
 
 static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 {
        struct xen_blkif *blkif;
+       struct pending_req *req, *n;
+       int i, j;
+
+       BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
 
        blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
        if (!blkif)
@@ -118,8 +123,57 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
        blkif->st_print = jiffies;
        init_waitqueue_head(&blkif->waiting_to_free);
        blkif->persistent_gnts.rb_node = NULL;
+       spin_lock_init(&blkif->free_pages_lock);
+       INIT_LIST_HEAD(&blkif->free_pages);
+       blkif->free_pages_num = 0;
+       atomic_set(&blkif->persistent_gnt_in_use, 0);
+
+       INIT_LIST_HEAD(&blkif->pending_free);
+
+       for (i = 0; i < XEN_BLKIF_REQS; i++) {
+               req = kzalloc(sizeof(*req), GFP_KERNEL);
+               if (!req)
+                       goto fail;
+               list_add_tail(&req->free_list,
+                             &blkif->pending_free);
+               for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+                       req->segments[j] = kzalloc(sizeof(*req->segments[0]),
+                                                  GFP_KERNEL);
+                       if (!req->segments[j])
+                               goto fail;
+               }
+               for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+                       req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
+                                                        GFP_KERNEL);
+                       if (!req->indirect_pages[j])
+                               goto fail;
+               }
+       }
+       spin_lock_init(&blkif->pending_free_lock);
+       init_waitqueue_head(&blkif->pending_free_wq);
+       init_waitqueue_head(&blkif->shutdown_wq);
 
        return blkif;
+
+fail:
+       list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
+               list_del(&req->free_list);
+               for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+                       if (!req->segments[j])
+                               break;
+                       kfree(req->segments[j]);
+               }
+               for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+                       if (!req->indirect_pages[j])
+                               break;
+                       kfree(req->indirect_pages[j]);
+               }
+               kfree(req);
+       }
+
+       kmem_cache_free(xen_blkif_cachep, blkif);
+
+       return ERR_PTR(-ENOMEM);
 }
 
 static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
@@ -178,6 +232,7 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
 {
        if (blkif->xenblkd) {
                kthread_stop(blkif->xenblkd);
+               wake_up(&blkif->shutdown_wq);
                blkif->xenblkd = NULL;
        }
 
@@ -198,8 +253,28 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
 
 static void xen_blkif_free(struct xen_blkif *blkif)
 {
+       struct pending_req *req, *n;
+       int i = 0, j;
+
        if (!atomic_dec_and_test(&blkif->refcnt))
                BUG();
+
+       /* Check that there is no request in use */
+       list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
+               list_del(&req->free_list);
+
+               for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
+                       kfree(req->segments[j]);
+
+               for (j = 0; j < MAX_INDIRECT_PAGES; j++)
+                       kfree(req->indirect_pages[j]);
+
+               kfree(req);
+               i++;
+       }
+
+       WARN_ON(i != XEN_BLKIF_REQS);
+
        kmem_cache_free(xen_blkif_cachep, blkif);
 }
 
@@ -678,6 +753,11 @@ again:
                                 dev->nodename);
                goto abort;
        }
+       err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u",
+                           MAX_INDIRECT_SEGMENTS);
+       if (err)
+               dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)",
+                        dev->nodename, err);
 
        err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
                            (unsigned long long)vbd_sz(&be->blkif->vbd));
@@ -704,6 +784,11 @@ again:
                                 dev->nodename);
                goto abort;
        }
+       err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
+                           bdev_physical_block_size(be->blkif->vbd.bdev));
+       if (err)
+               xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
+                                dev->nodename);
 
        err = xenbus_transaction_end(xbt, 0);
        if (err == -EAGAIN)
index d89ef86220f4a55a247083d36d5f8dd3ce74e0d5..a4660bbee8a6279bd4cc1a6def776ddc85bcf169 100644 (file)
@@ -74,12 +74,30 @@ struct grant {
 struct blk_shadow {
        struct blkif_request req;
        struct request *request;
-       struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct grant **grants_used;
+       struct grant **indirect_grants;
+       struct scatterlist *sg;
+};
+
+struct split_bio {
+       struct bio *bio;
+       atomic_t pending;
+       int err;
 };
 
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;
 
+/*
+ * Maximum number of segments in indirect requests, the actual value used by
+ * the frontend driver is the minimum of this value and the value provided
+ * by the backend driver.
+ */
+
+static unsigned int xen_blkif_max_segments = 32;
+module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
+MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
+
 #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
 
 /*
@@ -98,7 +116,6 @@ struct blkfront_info
        enum blkif_state connected;
        int ring_ref;
        struct blkif_front_ring ring;
-       struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
        unsigned int evtchn, irq;
        struct request_queue *rq;
        struct work_struct work;
@@ -114,6 +131,7 @@ struct blkfront_info
        unsigned int discard_granularity;
        unsigned int discard_alignment;
        unsigned int feature_persistent:1;
+       unsigned int max_indirect_segments;
        int is_ready;
 };
 
@@ -142,6 +160,13 @@ static DEFINE_SPINLOCK(minor_lock);
 
 #define DEV_NAME       "xvd"   /* name in /dev */
 
+#define SEGS_PER_INDIRECT_FRAME \
+       (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
+#define INDIRECT_GREFS(_segs) \
+       ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+
+static int blkfront_setup_indirect(struct blkfront_info *info);
+
 static int get_id_from_freelist(struct blkfront_info *info)
 {
        unsigned long free = info->shadow_free;
@@ -358,7 +383,8 @@ static int blkif_queue_request(struct request *req)
        struct blkif_request *ring_req;
        unsigned long id;
        unsigned int fsect, lsect;
-       int i, ref;
+       int i, ref, n;
+       struct blkif_request_segment_aligned *segments = NULL;
 
        /*
         * Used to store if we are able to queue the request by just using
@@ -369,21 +395,27 @@ static int blkif_queue_request(struct request *req)
        grant_ref_t gref_head;
        struct grant *gnt_list_entry = NULL;
        struct scatterlist *sg;
+       int nseg, max_grefs;
 
        if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
                return 1;
 
-       /* Check if we have enought grants to allocate a requests */
-       if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+       max_grefs = info->max_indirect_segments ?
+                   info->max_indirect_segments +
+                   INDIRECT_GREFS(info->max_indirect_segments) :
+                   BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+       /* Check if we have enough grants to allocate a requests */
+       if (info->persistent_gnts_c < max_grefs) {
                new_persistent_gnts = 1;
                if (gnttab_alloc_grant_references(
-                   BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
+                   max_grefs - info->persistent_gnts_c,
                    &gref_head) < 0) {
                        gnttab_request_free_callback(
                                &info->callback,
                                blkif_restart_queue_callback,
                                info,
-                               BLKIF_MAX_SEGMENTS_PER_REQUEST);
+                               max_grefs);
                        return 1;
                }
        } else
@@ -394,42 +426,67 @@ static int blkif_queue_request(struct request *req)
        id = get_id_from_freelist(info);
        info->shadow[id].request = req;
 
-       ring_req->u.rw.id = id;
-       ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
-       ring_req->u.rw.handle = info->handle;
-
-       ring_req->operation = rq_data_dir(req) ?
-               BLKIF_OP_WRITE : BLKIF_OP_READ;
-
-       if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
-               /*
-                * Ideally we can do an unordered flush-to-disk. In case the
-                * backend onlysupports barriers, use that. A barrier request
-                * a superset of FUA, so we can implement it the same
-                * way.  (It's also a FLUSH+FUA, since it is
-                * guaranteed ordered WRT previous writes.)
-                */
-               ring_req->operation = info->flush_op;
-       }
-
        if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
-               /* id, sector_number and handle are set above. */
                ring_req->operation = BLKIF_OP_DISCARD;
                ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
+               ring_req->u.discard.id = id;
+               ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req);
                if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
                        ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
                else
                        ring_req->u.discard.flag = 0;
        } else {
-               ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req,
-                                                          info->sg);
-               BUG_ON(ring_req->u.rw.nr_segments >
-                      BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
-               for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
+               BUG_ON(info->max_indirect_segments == 0 &&
+                      req->nr_phys_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
+               BUG_ON(info->max_indirect_segments &&
+                      req->nr_phys_segments > info->max_indirect_segments);
+               nseg = blk_rq_map_sg(req->q, req, info->shadow[id].sg);
+               ring_req->u.rw.id = id;
+               if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+                       /*
+                        * The indirect operation can only be a BLKIF_OP_READ or
+                        * BLKIF_OP_WRITE
+                        */
+                       BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA));
+                       ring_req->operation = BLKIF_OP_INDIRECT;
+                       ring_req->u.indirect.indirect_op = rq_data_dir(req) ?
+                               BLKIF_OP_WRITE : BLKIF_OP_READ;
+                       ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req);
+                       ring_req->u.indirect.handle = info->handle;
+                       ring_req->u.indirect.nr_segments = nseg;
+               } else {
+                       ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
+                       ring_req->u.rw.handle = info->handle;
+                       ring_req->operation = rq_data_dir(req) ?
+                               BLKIF_OP_WRITE : BLKIF_OP_READ;
+                       if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
+                               /*
+                                * Ideally we can do an unordered flush-to-disk. In case the
+                                * backend onlysupports barriers, use that. A barrier request
+                                * a superset of FUA, so we can implement it the same
+                                * way.  (It's also a FLUSH+FUA, since it is
+                                * guaranteed ordered WRT previous writes.)
+                                */
+                               ring_req->operation = info->flush_op;
+                       }
+                       ring_req->u.rw.nr_segments = nseg;
+               }
+               for_each_sg(info->shadow[id].sg, sg, nseg, i) {
                        fsect = sg->offset >> 9;
                        lsect = fsect + (sg->length >> 9) - 1;
 
+                       if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
+                           (i % SEGS_PER_INDIRECT_FRAME == 0)) {
+                               if (segments)
+                                       kunmap_atomic(segments);
+
+                               n = i / SEGS_PER_INDIRECT_FRAME;
+                               gnt_list_entry = get_grant(&gref_head, info);
+                               info->shadow[id].indirect_grants[n] = gnt_list_entry;
+                               segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn));
+                               ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref;
+                       }
+
                        gnt_list_entry = get_grant(&gref_head, info);
                        ref = gnt_list_entry->gref;
 
@@ -441,8 +498,7 @@ static int blkif_queue_request(struct request *req)
 
                                BUG_ON(sg->offset + sg->length > PAGE_SIZE);
 
-                               shared_data = kmap_atomic(
-                                       pfn_to_page(gnt_list_entry->pfn));
+                               shared_data = kmap_atomic(pfn_to_page(gnt_list_entry->pfn));
                                bvec_data = kmap_atomic(sg_page(sg));
 
                                /*
@@ -461,13 +517,23 @@ static int blkif_queue_request(struct request *req)
                                kunmap_atomic(bvec_data);
                                kunmap_atomic(shared_data);
                        }
-
-                       ring_req->u.rw.seg[i] =
-                                       (struct blkif_request_segment) {
-                                               .gref       = ref,
-                                               .first_sect = fsect,
-                                               .last_sect  = lsect };
+                       if (ring_req->operation != BLKIF_OP_INDIRECT) {
+                               ring_req->u.rw.seg[i] =
+                                               (struct blkif_request_segment) {
+                                                       .gref       = ref,
+                                                       .first_sect = fsect,
+                                                       .last_sect  = lsect };
+                       } else {
+                               n = i % SEGS_PER_INDIRECT_FRAME;
+                               segments[n] =
+                                       (struct blkif_request_segment_aligned) {
+                                                       .gref       = ref,
+                                                       .first_sect = fsect,
+                                                       .last_sect  = lsect };
+                       }
                }
+               if (segments)
+                       kunmap_atomic(segments);
        }
 
        info->ring.req_prod_pvt++;
@@ -542,7 +608,9 @@ wait:
                flush_requests(info);
 }
 
-static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
+static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
+                               unsigned int physical_sector_size,
+                               unsigned int segments)
 {
        struct request_queue *rq;
        struct blkfront_info *info = gd->private_data;
@@ -564,14 +632,15 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 
        /* Hard sector size and max sectors impersonate the equiv. hardware. */
        blk_queue_logical_block_size(rq, sector_size);
-       blk_queue_max_hw_sectors(rq, 512);
+       blk_queue_physical_block_size(rq, physical_sector_size);
+       blk_queue_max_hw_sectors(rq, (segments * PAGE_SIZE) / 512);
 
        /* Each segment in a request is up to an aligned page in size. */
        blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
        blk_queue_max_segment_size(rq, PAGE_SIZE);
 
        /* Ensure a merged request will fit in a single I/O ring slot. */
-       blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+       blk_queue_max_segments(rq, segments);
 
        /* Make sure buffer addresses are sector-aligned. */
        blk_queue_dma_alignment(rq, 511);
@@ -588,13 +657,16 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 static void xlvbd_flush(struct blkfront_info *info)
 {
        blk_queue_flush(info->rq, info->feature_flush);
-       printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
+       printk(KERN_INFO "blkfront: %s: %s: %s %s %s %s %s\n",
               info->gd->disk_name,
               info->flush_op == BLKIF_OP_WRITE_BARRIER ?
                "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
                "flush diskcache" : "barrier or flush"),
-              info->feature_flush ? "enabled" : "disabled",
-              info->feature_persistent ? "using persistent grants" : "");
+              info->feature_flush ? "enabled;" : "disabled;",
+              "persistent grants:",
+              info->feature_persistent ? "enabled;" : "disabled;",
+              "indirect descriptors:",
+              info->max_indirect_segments ? "enabled;" : "disabled;");
 }
 
 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
@@ -667,7 +739,8 @@ static char *encode_disk_name(char *ptr, unsigned int n)
 
 static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
                               struct blkfront_info *info,
-                              u16 vdisk_info, u16 sector_size)
+                              u16 vdisk_info, u16 sector_size,
+                              unsigned int physical_sector_size)
 {
        struct gendisk *gd;
        int nr_minors = 1;
@@ -734,7 +807,9 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
        gd->driverfs_dev = &(info->xbdev->dev);
        set_capacity(gd, capacity);
 
-       if (xlvbd_init_blk_queue(gd, sector_size)) {
+       if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size,
+                                info->max_indirect_segments ? :
+                                BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
                del_gendisk(gd);
                goto release;
        }
@@ -818,6 +893,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 {
        struct grant *persistent_gnt;
        struct grant *n;
+       int i, j, segs;
 
        /* Prevent new requests being issued until we fix things up. */
        spin_lock_irq(&info->io_lock);
@@ -843,6 +919,47 @@ static void blkif_free(struct blkfront_info *info, int suspend)
        }
        BUG_ON(info->persistent_gnts_c != 0);
 
+       for (i = 0; i < BLK_RING_SIZE; i++) {
+               /*
+                * Clear persistent grants present in requests already
+                * on the shared ring
+                */
+               if (!info->shadow[i].request)
+                       goto free_shadow;
+
+               segs = info->shadow[i].req.operation == BLKIF_OP_INDIRECT ?
+                      info->shadow[i].req.u.indirect.nr_segments :
+                      info->shadow[i].req.u.rw.nr_segments;
+               for (j = 0; j < segs; j++) {
+                       persistent_gnt = info->shadow[i].grants_used[j];
+                       gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+                       __free_page(pfn_to_page(persistent_gnt->pfn));
+                       kfree(persistent_gnt);
+               }
+
+               if (info->shadow[i].req.operation != BLKIF_OP_INDIRECT)
+                       /*
+                        * If this is not an indirect operation don't try to
+                        * free indirect segments
+                        */
+                       goto free_shadow;
+
+               for (j = 0; j < INDIRECT_GREFS(segs); j++) {
+                       persistent_gnt = info->shadow[i].indirect_grants[j];
+                       gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+                       __free_page(pfn_to_page(persistent_gnt->pfn));
+                       kfree(persistent_gnt);
+               }
+
+free_shadow:
+               kfree(info->shadow[i].grants_used);
+               info->shadow[i].grants_used = NULL;
+               kfree(info->shadow[i].indirect_grants);
+               info->shadow[i].indirect_grants = NULL;
+               kfree(info->shadow[i].sg);
+               info->shadow[i].sg = NULL;
+       }
+
        /* No more gnttab callback work. */
        gnttab_cancel_free_callback(&info->callback);
        spin_unlock_irq(&info->io_lock);
@@ -867,12 +984,13 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
                             struct blkif_response *bret)
 {
        int i = 0;
-       struct bio_vec *bvec;
-       struct req_iterator iter;
-       unsigned long flags;
+       struct scatterlist *sg;
        char *bvec_data;
        void *shared_data;
-       unsigned int offset = 0;
+       int nseg;
+
+       nseg = s->req.operation == BLKIF_OP_INDIRECT ?
+               s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
 
        if (bret->operation == BLKIF_OP_READ) {
                /*
@@ -881,26 +999,29 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
                 * than PAGE_SIZE, we have to keep track of the current offset,
                 * to be sure we are copying the data from the right shared page.
                 */
-               rq_for_each_segment(bvec, s->request, iter) {
-                       BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
-                       if (bvec->bv_offset < offset)
-                               i++;
-                       BUG_ON(i >= s->req.u.rw.nr_segments);
+               for_each_sg(s->sg, sg, nseg, i) {
+                       BUG_ON(sg->offset + sg->length > PAGE_SIZE);
                        shared_data = kmap_atomic(
                                pfn_to_page(s->grants_used[i]->pfn));
-                       bvec_data = bvec_kmap_irq(bvec, &flags);
-                       memcpy(bvec_data, shared_data + bvec->bv_offset,
-                               bvec->bv_len);
-                       bvec_kunmap_irq(bvec_data, &flags);
+                       bvec_data = kmap_atomic(sg_page(sg));
+                       memcpy(bvec_data   + sg->offset,
+                              shared_data + sg->offset,
+                              sg->length);
+                       kunmap_atomic(bvec_data);
                        kunmap_atomic(shared_data);
-                       offset = bvec->bv_offset + bvec->bv_len;
                }
        }
        /* Add the persistent grant into the list of free grants */
-       for (i = 0; i < s->req.u.rw.nr_segments; i++) {
+       for (i = 0; i < nseg; i++) {
                list_add(&s->grants_used[i]->node, &info->persistent_gnts);
                info->persistent_gnts_c++;
        }
+       if (s->req.operation == BLKIF_OP_INDIRECT) {
+               for (i = 0; i < INDIRECT_GREFS(nseg); i++) {
+                       list_add(&s->indirect_grants[i]->node, &info->persistent_gnts);
+                       info->persistent_gnts_c++;
+               }
+       }
 }
 
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -1034,14 +1155,6 @@ static int setup_blkring(struct xenbus_device *dev,
        SHARED_RING_INIT(sring);
        FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
 
-       sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
-       /* Allocate memory for grants */
-       err = fill_grant_buffer(info, BLK_RING_SIZE *
-                                     BLKIF_MAX_SEGMENTS_PER_REQUEST);
-       if (err)
-               goto fail;
-
        err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
        if (err < 0) {
                free_page((unsigned long)sring);
@@ -1223,13 +1336,84 @@ static int blkfront_probe(struct xenbus_device *dev,
        return 0;
 }
 
+/*
+ * This is a clone of md_trim_bio, used to split a bio into smaller ones
+ */
+static void trim_bio(struct bio *bio, int offset, int size)
+{
+       /* 'bio' is a cloned bio which we need to trim to match
+        * the given offset and size.
+        * This requires adjusting bi_sector, bi_size, and bi_io_vec
+        */
+       int i;
+       struct bio_vec *bvec;
+       int sofar = 0;
+
+       size <<= 9;
+       if (offset == 0 && size == bio->bi_size)
+               return;
+
+       bio->bi_sector += offset;
+       bio->bi_size = size;
+       offset <<= 9;
+       clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
+       while (bio->bi_idx < bio->bi_vcnt &&
+              bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
+               /* remove this whole bio_vec */
+               offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
+               bio->bi_idx++;
+       }
+       if (bio->bi_idx < bio->bi_vcnt) {
+               bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
+               bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
+       }
+       /* avoid any complications with bi_idx being non-zero*/
+       if (bio->bi_idx) {
+               memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
+                       (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
+               bio->bi_vcnt -= bio->bi_idx;
+               bio->bi_idx = 0;
+       }
+       /* Make sure vcnt and last bv are not too big */
+       bio_for_each_segment(bvec, bio, i) {
+               if (sofar + bvec->bv_len > size)
+                       bvec->bv_len = size - sofar;
+               if (bvec->bv_len == 0) {
+                       bio->bi_vcnt = i;
+                       break;
+               }
+               sofar += bvec->bv_len;
+       }
+}
+
+static void split_bio_end(struct bio *bio, int error)
+{
+       struct split_bio *split_bio = bio->bi_private;
+
+       if (error)
+               split_bio->err = error;
+
+       if (atomic_dec_and_test(&split_bio->pending)) {
+               split_bio->bio->bi_phys_segments = 0;
+               bio_endio(split_bio->bio, split_bio->err);
+               kfree(split_bio);
+       }
+       bio_put(bio);
+}
 
 static int blkif_recover(struct blkfront_info *info)
 {
        int i;
-       struct blkif_request *req;
+       struct request *req, *n;
        struct blk_shadow *copy;
-       int j;
+       int rc;
+       struct bio *bio, *cloned_bio;
+       struct bio_list bio_list, merge_bio;
+       unsigned int segs, offset;
+       int pending, size;
+       struct split_bio *split_bio;
+       struct list_head requests;
 
        /* Stage 1: Make a safe copy of the shadow state. */
        copy = kmemdup(info->shadow, sizeof(info->shadow),
@@ -1244,36 +1428,64 @@ static int blkif_recover(struct blkfront_info *info)
        info->shadow_free = info->ring.req_prod_pvt;
        info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
 
-       /* Stage 3: Find pending requests and requeue them. */
+       rc = blkfront_setup_indirect(info);
+       if (rc) {
+               kfree(copy);
+               return rc;
+       }
+
+       segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
+       blk_queue_max_segments(info->rq, segs);
+       bio_list_init(&bio_list);
+       INIT_LIST_HEAD(&requests);
        for (i = 0; i < BLK_RING_SIZE; i++) {
                /* Not in use? */
                if (!copy[i].request)
                        continue;
 
-               /* Grab a request slot and copy shadow state into it. */
-               req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
-               *req = copy[i].req;
-
-               /* We get a new request id, and must reset the shadow state. */
-               req->u.rw.id = get_id_from_freelist(info);
-               memcpy(&info->shadow[req->u.rw.id], &copy[i], sizeof(copy[i]));
-
-               if (req->operation != BLKIF_OP_DISCARD) {
-               /* Rewrite any grant references invalidated by susp/resume. */
-                       for (j = 0; j < req->u.rw.nr_segments; j++)
-                               gnttab_grant_foreign_access_ref(
-                                       req->u.rw.seg[j].gref,
-                                       info->xbdev->otherend_id,
-                                       pfn_to_mfn(copy[i].grants_used[j]->pfn),
-                                       0);
+               /*
+                * Get the bios in the request so we can re-queue them.
+                */
+               if (copy[i].request->cmd_flags &
+                   (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+                       /*
+                        * Flush operations don't contain bios, so
+                        * we need to requeue the whole request
+                        */
+                       list_add(&copy[i].request->queuelist, &requests);
+                       continue;
                }
-               info->shadow[req->u.rw.id].req = *req;
-
-               info->ring.req_prod_pvt++;
+               merge_bio.head = copy[i].request->bio;
+               merge_bio.tail = copy[i].request->biotail;
+               bio_list_merge(&bio_list, &merge_bio);
+               copy[i].request->bio = NULL;
+               blk_put_request(copy[i].request);
        }
 
        kfree(copy);
 
+       /*
+        * Empty the queue, this is important because we might have
+        * requests in the queue with more segments than what we
+        * can handle now.
+        */
+       spin_lock_irq(&info->io_lock);
+       while ((req = blk_fetch_request(info->rq)) != NULL) {
+               if (req->cmd_flags &
+                   (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+                       list_add(&req->queuelist, &requests);
+                       continue;
+               }
+               merge_bio.head = req->bio;
+               merge_bio.tail = req->biotail;
+               bio_list_merge(&bio_list, &merge_bio);
+               req->bio = NULL;
+               if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
+                       pr_alert("diskcache flush request found!\n");
+               __blk_put_request(info->rq, req);
+       }
+       spin_unlock_irq(&info->io_lock);
+
        xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
        spin_lock_irq(&info->io_lock);
@@ -1281,14 +1493,50 @@ static int blkif_recover(struct blkfront_info *info)
        /* Now safe for us to use the shared ring */
        info->connected = BLKIF_STATE_CONNECTED;
 
-       /* Send off requeued requests */
-       flush_requests(info);
-
        /* Kick any other new requests queued since we resumed */
        kick_pending_request_queues(info);
 
+       list_for_each_entry_safe(req, n, &requests, queuelist) {
+               /* Requeue pending requests (flush or discard) */
+               list_del_init(&req->queuelist);
+               BUG_ON(req->nr_phys_segments > segs);
+               blk_requeue_request(info->rq, req);
+       }
        spin_unlock_irq(&info->io_lock);
 
+       while ((bio = bio_list_pop(&bio_list)) != NULL) {
+               /* Traverse the list of pending bios and re-queue them */
+               if (bio_segments(bio) > segs) {
+                       /*
+                        * This bio has more segments than what we can
+                        * handle, we have to split it.
+                        */
+                       pending = (bio_segments(bio) + segs - 1) / segs;
+                       split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO);
+                       BUG_ON(split_bio == NULL);
+                       atomic_set(&split_bio->pending, pending);
+                       split_bio->bio = bio;
+                       for (i = 0; i < pending; i++) {
+                               offset = (i * segs * PAGE_SIZE) >> 9;
+                               size = min((unsigned int)(segs * PAGE_SIZE) >> 9,
+                                          (unsigned int)(bio->bi_size >> 9) - offset);
+                               cloned_bio = bio_clone(bio, GFP_NOIO);
+                               BUG_ON(cloned_bio == NULL);
+                               trim_bio(cloned_bio, offset, size);
+                               cloned_bio->bi_private = split_bio;
+                               cloned_bio->bi_end_io = split_bio_end;
+                               submit_bio(cloned_bio->bi_rw, cloned_bio);
+                       }
+                       /*
+                        * Now we have to wait for all those smaller bios to
+                        * end, so we can also end the "parent" bio.
+                        */
+                       continue;
+               }
+               /* We don't need to split this bio */
+               submit_bio(bio->bi_rw, bio);
+       }
+
        return 0;
 }
 
@@ -1308,8 +1556,12 @@ static int blkfront_resume(struct xenbus_device *dev)
        blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
        err = talk_to_blkback(dev, info);
-       if (info->connected == BLKIF_STATE_SUSPENDED && !err)
-               err = blkif_recover(info);
+
+       /*
+        * We have to wait for the backend to switch to
+        * connected state, since we want to read which
+        * features it supports.
+        */
 
        return err;
 }
@@ -1387,6 +1639,60 @@ static void blkfront_setup_discard(struct blkfront_info *info)
        kfree(type);
 }
 
+static int blkfront_setup_indirect(struct blkfront_info *info)
+{
+       unsigned int indirect_segments, segs;
+       int err, i;
+
+       err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+                           "feature-max-indirect-segments", "%u", &indirect_segments,
+                           NULL);
+       if (err) {
+               info->max_indirect_segments = 0;
+               segs = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+       } else {
+               info->max_indirect_segments = min(indirect_segments,
+                                                 xen_blkif_max_segments);
+               segs = info->max_indirect_segments;
+       }
+
+       err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE);
+       if (err)
+               goto out_of_memory;
+
+       for (i = 0; i < BLK_RING_SIZE; i++) {
+               info->shadow[i].grants_used = kzalloc(
+                       sizeof(info->shadow[i].grants_used[0]) * segs,
+                       GFP_NOIO);
+               info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * segs, GFP_NOIO);
+               if (info->max_indirect_segments)
+                       info->shadow[i].indirect_grants = kzalloc(
+                               sizeof(info->shadow[i].indirect_grants[0]) *
+                               INDIRECT_GREFS(segs),
+                               GFP_NOIO);
+               if ((info->shadow[i].grants_used == NULL) ||
+                       (info->shadow[i].sg == NULL) ||
+                    (info->max_indirect_segments &&
+                    (info->shadow[i].indirect_grants == NULL)))
+                       goto out_of_memory;
+               sg_init_table(info->shadow[i].sg, segs);
+       }
+
+
+       return 0;
+
+out_of_memory:
+       for (i = 0; i < BLK_RING_SIZE; i++) {
+               kfree(info->shadow[i].grants_used);
+               info->shadow[i].grants_used = NULL;
+               kfree(info->shadow[i].sg);
+               info->shadow[i].sg = NULL;
+               kfree(info->shadow[i].indirect_grants);
+               info->shadow[i].indirect_grants = NULL;
+       }
+       return -ENOMEM;
+}
+
 /*
  * Invoked when the backend is finally 'ready' (and has told produced
  * the details about the physical device - #sectors, size, etc).
@@ -1395,6 +1701,7 @@ static void blkfront_connect(struct blkfront_info *info)
 {
        unsigned long long sectors;
        unsigned long sector_size;
+       unsigned int physical_sector_size;
        unsigned int binfo;
        int err;
        int barrier, flush, discard, persistent;
@@ -1414,8 +1721,15 @@ static void blkfront_connect(struct blkfront_info *info)
                set_capacity(info->gd, sectors);
                revalidate_disk(info->gd);
 
-               /* fall through */
+               return;
        case BLKIF_STATE_SUSPENDED:
+               /*
+                * If we are recovering from suspension, we need to wait
+                * for the backend to announce it's features before
+                * reconnecting, at least we need to know if the backend
+                * supports indirect descriptors, and how many.
+                */
+               blkif_recover(info);
                return;
 
        default:
@@ -1437,6 +1751,16 @@ static void blkfront_connect(struct blkfront_info *info)
                return;
        }
 
+       /*
+        * physcial-sector-size is a newer field, so old backends may not
+        * provide this. Assume physical sector size to be the same as
+        * sector_size in that case.
+        */
+       err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+                          "physical-sector-size", "%u", &physical_sector_size);
+       if (err != 1)
+               physical_sector_size = sector_size;
+
        info->feature_flush = 0;
        info->flush_op = 0;
 
@@ -1483,7 +1807,15 @@ static void blkfront_connect(struct blkfront_info *info)
        else
                info->feature_persistent = persistent;
 
-       err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
+       err = blkfront_setup_indirect(info);
+       if (err) {
+               xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
+                                info->xbdev->otherend);
+               return;
+       }
+
+       err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
+                                 physical_sector_size);
        if (err) {
                xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
                                 info->xbdev->otherend);
index 5996521a1caff11068e9b2e1e417cdf729f474df..84573b4d6f92809d5cd98fa968f6dd1a617482d3 100644 (file)
@@ -429,7 +429,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
        dma_addr_t src_dma, dst_dma;
        int ret = 0;
 
-       desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
+       desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
        if (!desc) {
                dev_err(jrdev, "unable to allocate key input memory\n");
                return -ENOMEM;
index 27e86d93826280a62744a559332bf0cd9bc03b3b..89e109022d78a27cf506d995c1c16de2feffea0f 100644 (file)
@@ -48,6 +48,8 @@ static LIST_HEAD(mc_devices);
  */
 static void const *edac_mc_owner;
 
+static struct bus_type mc_bus[EDAC_MAX_MCS];
+
 unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
                                 unsigned len)
 {
@@ -723,6 +725,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
        int ret = -EINVAL;
        edac_dbg(0, "\n");
 
+       if (mci->mc_idx >= EDAC_MAX_MCS) {
+               pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx);
+               return -ENODEV;
+       }
+
 #ifdef CONFIG_EDAC_DEBUG
        if (edac_debug_level >= 3)
                edac_mc_dump_mci(mci);
@@ -762,6 +769,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
        /* set load time so that error rate can be tracked */
        mci->start_time = jiffies;
 
+       mci->bus = &mc_bus[mci->mc_idx];
+
        if (edac_create_sysfs_mci_device(mci)) {
                edac_mc_printk(mci, KERN_WARNING,
                        "failed to create sysfs device\n");
index ef15a7e613bc6b1d1feec75733a86e20cb23efa7..e7c32c4f783737962b575a232f03b5ed19d3629b 100644 (file)
@@ -370,7 +370,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci,
                return -ENODEV;
 
        csrow->dev.type = &csrow_attr_type;
-       csrow->dev.bus = &mci->bus;
+       csrow->dev.bus = mci->bus;
        device_initialize(&csrow->dev);
        csrow->dev.parent = &mci->dev;
        csrow->mci = mci;
@@ -605,7 +605,7 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci,
        dimm->mci = mci;
 
        dimm->dev.type = &dimm_attr_type;
-       dimm->dev.bus = &mci->bus;
+       dimm->dev.bus = mci->bus;
        device_initialize(&dimm->dev);
 
        dimm->dev.parent = &mci->dev;
@@ -975,11 +975,13 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
         * The memory controller needs its own bus, in order to avoid
         * namespace conflicts at /sys/bus/edac.
         */
-       mci->bus.name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx);
-       if (!mci->bus.name)
+       mci->bus->name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx);
+       if (!mci->bus->name)
                return -ENOMEM;
-       edac_dbg(0, "creating bus %s\n", mci->bus.name);
-       err = bus_register(&mci->bus);
+
+       edac_dbg(0, "creating bus %s\n", mci->bus->name);
+
+       err = bus_register(mci->bus);
        if (err < 0)
                return err;
 
@@ -988,7 +990,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
        device_initialize(&mci->dev);
 
        mci->dev.parent = mci_pdev;
-       mci->dev.bus = &mci->bus;
+       mci->dev.bus = mci->bus;
        dev_set_name(&mci->dev, "mc%d", mci->mc_idx);
        dev_set_drvdata(&mci->dev, mci);
        pm_runtime_forbid(&mci->dev);
@@ -997,8 +999,8 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
        err = device_add(&mci->dev);
        if (err < 0) {
                edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev));
-               bus_unregister(&mci->bus);
-               kfree(mci->bus.name);
+               bus_unregister(mci->bus);
+               kfree(mci->bus->name);
                return err;
        }
 
@@ -1064,8 +1066,8 @@ fail:
        }
 fail2:
        device_unregister(&mci->dev);
-       bus_unregister(&mci->bus);
-       kfree(mci->bus.name);
+       bus_unregister(mci->bus);
+       kfree(mci->bus->name);
        return err;
 }
 
@@ -1098,8 +1100,8 @@ void edac_unregister_sysfs(struct mem_ctl_info *mci)
 {
        edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev));
        device_unregister(&mci->dev);
-       bus_unregister(&mci->bus);
-       kfree(mci->bus.name);
+       bus_unregister(mci->bus);
+       kfree(mci->bus->name);
 }
 
 static void mc_attr_release(struct device *dev)
index 1b635178cc44ff31b8ed224560ebdf86851165f0..157b934e8ce3a528468e45a735ab41dee6043cea 100644 (file)
@@ -974,7 +974,7 @@ static int i5100_setup_debugfs(struct mem_ctl_info *mci)
        if (!i5100_debugfs)
                return -ENODEV;
 
-       priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs);
+       priv->debugfs = debugfs_create_dir(mci->bus->name, i5100_debugfs);
 
        if (!priv->debugfs)
                return -ENOMEM;
index f4491a497cc8760ef59484ac2c6c264573b3b8a9..c2fa77086eb58cf9de7f2c2a453c900a9771f0b5 100644 (file)
@@ -378,7 +378,7 @@ static int msm_gpio_probe(struct platform_device *pdev)
        int ret, ngpio;
        struct resource *res;
 
-       if (!of_property_read_u32(pdev->dev.of_node, "ngpio", &ngpio)) {
+       if (of_property_read_u32(pdev->dev.of_node, "ngpio", &ngpio)) {
                dev_err(&pdev->dev, "%s: ngpio property missing\n", __func__);
                return -EINVAL;
        }
index dfeb3a3a8f20949098b5dc0e703ffe1ea944f245..c57244ef428b1db249e82917a55870bc17774f27 100644 (file)
@@ -1037,6 +1037,18 @@ omap_mpuio_alloc_gc(struct gpio_bank *bank, unsigned int irq_start,
                               IRQ_NOREQUEST | IRQ_NOPROBE, 0);
 }
 
+#if defined(CONFIG_OF_GPIO)
+static inline bool omap_gpio_chip_boot_dt(struct gpio_chip *chip)
+{
+       return chip->of_node != NULL;
+}
+#else
+static inline bool omap_gpio_chip_boot_dt(struct gpio_chip *chip)
+{
+       return false;
+}
+#endif
+
 static void omap_gpio_chip_init(struct gpio_bank *bank)
 {
        int j;
@@ -1068,24 +1080,68 @@ static void omap_gpio_chip_init(struct gpio_bank *bank)
 
        gpiochip_add(&bank->chip);
 
-       for (j = 0; j < bank->width; j++) {
-               int irq = irq_create_mapping(bank->domain, j);
-               irq_set_lockdep_class(irq, &gpio_lock_class);
-               irq_set_chip_data(irq, bank);
-               if (bank->is_mpuio) {
-                       omap_mpuio_alloc_gc(bank, irq, bank->width);
-               } else {
-                       irq_set_chip_and_handler(irq, &gpio_irq_chip,
-                                                handle_simple_irq);
-                       set_irq_flags(irq, IRQF_VALID);
-               }
-       }
+       /*
+        * REVISIT these explicit calls to irq_create_mapping()
+        * to do the GPIO to IRQ domain mapping for each GPIO in
+        * the bank can be removed once all OMAP platforms have
+        * been migrated to Device Tree boot only.
+        * Since in DT boot irq_create_mapping() is called from
+        * irq_create_of_mapping() only for the GPIO lines that
+        * are used as interrupts.
+        */
+       if (!omap_gpio_chip_boot_dt(&bank->chip))
+               for (j = 0; j < bank->width; j++)
+                       irq_create_mapping(bank->domain, j);
        irq_set_chained_handler(bank->irq, gpio_irq_handler);
        irq_set_handler_data(bank->irq, bank);
 }
 
 static const struct of_device_id omap_gpio_match[];
 
+static int omap_gpio_irq_map(struct irq_domain *d, unsigned int virq,
+                            irq_hw_number_t hwirq)
+{
+       struct gpio_bank *bank = d->host_data;
+       int gpio;
+       int ret;
+
+       if (!bank)
+               return -EINVAL;
+
+       irq_set_lockdep_class(virq, &gpio_lock_class);
+       irq_set_chip_data(virq, bank);
+       if (bank->is_mpuio) {
+               omap_mpuio_alloc_gc(bank, virq, bank->width);
+       } else {
+               irq_set_chip_and_handler(virq, &gpio_irq_chip,
+                                        handle_simple_irq);
+               set_irq_flags(virq, IRQF_VALID);
+       }
+
+       /*
+        * REVISIT most GPIO IRQ chip drivers need to call
+        * gpio_request() before a GPIO line can be used as an
+        * IRQ. Ideally this should be handled by the IRQ core
+        * but until then this has to be done on a per driver
+        * basis. Remove this once this is managed by the core.
+        */
+       if (omap_gpio_chip_boot_dt(&bank->chip)) {
+               gpio = irq_to_gpio(bank, hwirq);
+               ret = gpio_request_one(gpio, GPIOF_IN, NULL);
+               if (ret) {
+                       dev_err(bank->dev, "Could not request GPIO%d\n", gpio);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+static struct irq_domain_ops omap_gpio_irq_ops = {
+       .xlate  = irq_domain_xlate_onetwocell,
+       .map    = omap_gpio_irq_map,
+};
+
 static int omap_gpio_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
@@ -1151,10 +1207,10 @@ static int omap_gpio_probe(struct platform_device *pdev)
        }
 
        bank->domain = irq_domain_add_legacy(node, bank->width, irq_base,
-                                            0, &irq_domain_simple_ops, NULL);
+                                            0, &omap_gpio_irq_ops, bank);
 #else
        bank->domain = irq_domain_add_linear(node, bank->width,
-                                            &irq_domain_simple_ops, NULL);
+                                            &omap_gpio_irq_ops, bank);
 #endif
        if (!bank->domain) {
                dev_err(dev, "Couldn't register an IRQ domain\n");
index 738a4294d820e164d7534f830dd4eed7c880aa4d..6a647493ca7f0741cd39d03134f0614c74451d48 100644 (file)
@@ -677,6 +677,11 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
                                        /* don't break so fail path works correct */
                                        fail = 1;
                                break;
+
+                               if (connector->dpms != DRM_MODE_DPMS_ON) {
+                                       DRM_DEBUG_KMS("connector dpms not on, full mode switch\n");
+                                       mode_changed = true;
+                               }
                        }
                }
 
@@ -754,6 +759,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
                                ret = -EINVAL;
                                goto fail;
                        }
+                       DRM_DEBUG_KMS("Setting connector DPMS state to on\n");
+                       for (i = 0; i < set->num_connectors; i++) {
+                               DRM_DEBUG_KMS("\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id,
+                                             drm_get_connector_name(set->connectors[i]));
+                               set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON);
+                       }
                }
                drm_helper_disable_unused_functions(dev);
        } else if (fb_changed) {
@@ -771,22 +782,6 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
                }
        }
 
-       /*
-        * crtc set_config helpers implicit set the crtc and all connected
-        * encoders to DPMS on for a full mode set. But for just an fb update it
-        * doesn't do that. To not confuse userspace, do an explicit DPMS_ON
-        * unconditionally. This will also ensure driver internal dpms state is
-        * consistent again.
-        */
-       if (set->crtc->enabled) {
-               DRM_DEBUG_KMS("Setting connector DPMS state to on\n");
-               for (i = 0; i < set->num_connectors; i++) {
-                       DRM_DEBUG_KMS("\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id,
-                                     drm_get_connector_name(set->connectors[i]));
-                       set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON);
-               }
-       }
-
        kfree(save_connectors);
        kfree(save_encoders);
        kfree(save_crtcs);
index cf188ab7051a6b213e0ebb945fde92fc869b0ce6..67ec54f67afe2d7a56cbabda7800819bfee753ac 100644 (file)
@@ -1495,6 +1495,14 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
        dev_priv->dev = dev;
        dev_priv->info = info;
 
+       spin_lock_init(&dev_priv->irq_lock);
+       spin_lock_init(&dev_priv->gpu_error.lock);
+       spin_lock_init(&dev_priv->rps.lock);
+       spin_lock_init(&dev_priv->backlight.lock);
+       mutex_init(&dev_priv->dpio_lock);
+       mutex_init(&dev_priv->rps.hw_lock);
+       mutex_init(&dev_priv->modeset_restore_lock);
+
        i915_dump_device_info(dev_priv);
 
        if (i915_get_bridge_dev(dev)) {
@@ -1585,6 +1593,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
        intel_detect_pch(dev);
 
        intel_irq_init(dev);
+       intel_gt_sanitize(dev);
        intel_gt_init(dev);
 
        /* Try to make sure MCHBAR is enabled before poking at it */
@@ -1610,15 +1619,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
        if (!IS_I945G(dev) && !IS_I945GM(dev))
                pci_enable_msi(dev->pdev);
 
-       spin_lock_init(&dev_priv->irq_lock);
-       spin_lock_init(&dev_priv->gpu_error.lock);
-       spin_lock_init(&dev_priv->rps.lock);
-       spin_lock_init(&dev_priv->backlight.lock);
-       mutex_init(&dev_priv->dpio_lock);
-
-       mutex_init(&dev_priv->rps.hw_lock);
-       mutex_init(&dev_priv->modeset_restore_lock);
-
        dev_priv->num_plane = 1;
        if (IS_VALLEYVIEW(dev))
                dev_priv->num_plane = 2;
index f4af1ca0fb62d82827213d51461d9642f0cf33cf..45b3c030f48393b6921e66406e48f92e2f1d643e 100644 (file)
@@ -706,7 +706,7 @@ static int i915_drm_thaw(struct drm_device *dev)
 {
        int error = 0;
 
-       intel_gt_reset(dev);
+       intel_gt_sanitize(dev);
 
        if (drm_core_check_feature(dev, DRIVER_MODESET)) {
                mutex_lock(&dev->struct_mutex);
@@ -732,7 +732,7 @@ int i915_resume(struct drm_device *dev)
 
        pci_set_master(dev->pdev);
 
-       intel_gt_reset(dev);
+       intel_gt_sanitize(dev);
 
        /*
         * Platforms with opregion should have sane BIOS, older ones (gen3 and
@@ -1253,21 +1253,21 @@ hsw_unclaimed_reg_check(struct drm_i915_private *dev_priv, u32 reg)
 
 #define __i915_read(x, y) \
 u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
+       unsigned long irqflags; \
        u##x val = 0; \
+       spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \
        if (IS_GEN5(dev_priv->dev)) \
                ilk_dummy_write(dev_priv); \
        if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-               unsigned long irqflags; \
-               spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \
                if (dev_priv->forcewake_count == 0) \
                        dev_priv->gt.force_wake_get(dev_priv); \
                val = read##y(dev_priv->regs + reg); \
                if (dev_priv->forcewake_count == 0) \
                        dev_priv->gt.force_wake_put(dev_priv); \
-               spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \
        } else { \
                val = read##y(dev_priv->regs + reg); \
        } \
+       spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \
        trace_i915_reg_rw(false, reg, val, sizeof(val)); \
        return val; \
 }
@@ -1280,8 +1280,10 @@ __i915_read(64, q)
 
 #define __i915_write(x, y) \
 void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
+       unsigned long irqflags; \
        u32 __fifo_ret = 0; \
        trace_i915_reg_rw(true, reg, val, sizeof(val)); \
+       spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \
        if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
                __fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
        } \
@@ -1293,6 +1295,7 @@ void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
                gen6_gt_check_fifodbg(dev_priv); \
        } \
        hsw_unclaimed_reg_check(dev_priv, reg); \
+       spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \
 }
 __i915_write(8, b)
 __i915_write(16, w)
index a416645bcd23364bb9793100da286b7f74858794..d2ee3343c9439cbcf306ce443b9fbe57e53e534a 100644 (file)
@@ -555,6 +555,7 @@ enum intel_sbi_destination {
 #define QUIRK_PIPEA_FORCE (1<<0)
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
 #define QUIRK_INVERT_BRIGHTNESS (1<<2)
+#define QUIRK_NO_PCH_PWM_ENABLE (1<<3)
 
 struct intel_fbdev;
 struct intel_fbc_work;
@@ -1583,7 +1584,7 @@ void i915_handle_error(struct drm_device *dev, bool wedged);
 extern void intel_irq_init(struct drm_device *dev);
 extern void intel_hpd_init(struct drm_device *dev);
 extern void intel_gt_init(struct drm_device *dev);
-extern void intel_gt_reset(struct drm_device *dev);
+extern void intel_gt_sanitize(struct drm_device *dev);
 
 void i915_error_state_free(struct kref *error_ref);
 
index 97afd2639fb63a1e240fce2c6f97f53fd9f82e98..d9e2208cfe98f2fdcb3957bf4ee13f0497e06dfb 100644 (file)
@@ -2258,7 +2258,17 @@ void i915_gem_restore_fences(struct drm_device *dev)
 
        for (i = 0; i < dev_priv->num_fence_regs; i++) {
                struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
-               i915_gem_write_fence(dev, i, reg->obj);
+
+               /*
+                * Commit delayed tiling changes if we have an object still
+                * attached to the fence, otherwise just clear the fence.
+                */
+               if (reg->obj) {
+                       i915_gem_object_update_fence(reg->obj, reg,
+                                                    reg->obj->tiling_mode);
+               } else {
+                       i915_gem_write_fence(dev, i, NULL);
+               }
        }
 }
 
@@ -2795,6 +2805,10 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg,
        if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
                mb();
 
+       WARN(obj && (!obj->stride || !obj->tiling_mode),
+            "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
+            obj->stride, obj->tiling_mode);
+
        switch (INTEL_INFO(dev)->gen) {
        case 7:
        case 6:
@@ -2836,6 +2850,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
                fence->obj = NULL;
                list_del_init(&fence->lru_list);
        }
+       obj->fence_dirty = false;
 }
 
 static int
@@ -2965,7 +2980,6 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
                return 0;
 
        i915_gem_object_update_fence(obj, reg, enable);
-       obj->fence_dirty = false;
 
        return 0;
 }
index 324211ac9c555486d56acd36584af7468e762f01..b042ee5c40704a8beaa1e5b99f3d6a1b2082d722 100644 (file)
@@ -301,7 +301,7 @@ static void intel_ddi_mode_set(struct drm_encoder *encoder,
                struct intel_digital_port *intel_dig_port =
                        enc_to_dig_port(encoder);
 
-               intel_dp->DP = intel_dig_port->port_reversal |
+               intel_dp->DP = intel_dig_port->saved_port_bits |
                               DDI_BUF_CTL_ENABLE | DDI_BUF_EMP_400MV_0DB_HSW;
                intel_dp->DP |= DDI_PORT_WIDTH(intel_dp->lane_count);
 
@@ -1109,7 +1109,8 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder)
                 * enabling the port.
                 */
                I915_WRITE(DDI_BUF_CTL(port),
-                          intel_dig_port->port_reversal | DDI_BUF_CTL_ENABLE);
+                          intel_dig_port->saved_port_bits |
+                          DDI_BUF_CTL_ENABLE);
        } else if (type == INTEL_OUTPUT_EDP) {
                struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
@@ -1347,8 +1348,9 @@ void intel_ddi_init(struct drm_device *dev, enum port port)
        intel_encoder->get_config = intel_ddi_get_config;
 
        intel_dig_port->port = port;
-       intel_dig_port->port_reversal = I915_READ(DDI_BUF_CTL(port)) &
-                                       DDI_BUF_PORT_REVERSAL;
+       intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
+                                         (DDI_BUF_PORT_REVERSAL |
+                                          DDI_A_4_LANES);
        intel_dig_port->dp.output_reg = DDI_BUF_CTL(port);
 
        intel_encoder->type = INTEL_OUTPUT_UNKNOWN;
index 85f3eb74d2b75dbf48f7d9afcf3d5626de4187ca..5fb305840db89ecd80b5cd42b69ff9c35dd9432b 100644 (file)
@@ -4913,22 +4913,19 @@ static void i9xx_get_pfit_config(struct intel_crtc *crtc,
        uint32_t tmp;
 
        tmp = I915_READ(PFIT_CONTROL);
+       if (!(tmp & PFIT_ENABLE))
+               return;
 
+       /* Check whether the pfit is attached to our pipe. */
        if (INTEL_INFO(dev)->gen < 4) {
                if (crtc->pipe != PIPE_B)
                        return;
-
-               /* gen2/3 store dither state in pfit control, needs to match */
-               pipe_config->gmch_pfit.control = tmp & PANEL_8TO6_DITHER_ENABLE;
        } else {
                if ((tmp & PFIT_PIPE_MASK) != (crtc->pipe << PFIT_PIPE_SHIFT))
                        return;
        }
 
-       if (!(tmp & PFIT_ENABLE))
-               return;
-
-       pipe_config->gmch_pfit.control = I915_READ(PFIT_CONTROL);
+       pipe_config->gmch_pfit.control = tmp;
        pipe_config->gmch_pfit.pgm_ratios = I915_READ(PFIT_PGM_RATIOS);
        if (INTEL_INFO(dev)->gen < 5)
                pipe_config->gmch_pfit.lvds_border_bits =
@@ -8317,6 +8314,8 @@ check_shared_dpll_state(struct drm_device *dev)
                     pll->active, pll->refcount);
                WARN(pll->active && !pll->on,
                     "pll in active use but not on in sw tracking\n");
+               WARN(pll->on && !pll->active,
+                    "pll in on but not on in use in sw tracking\n");
                WARN(pll->on != active,
                     "pll on state mismatch (expected %i, found %i)\n",
                     pll->on, active);
@@ -8541,15 +8540,20 @@ static void intel_set_config_restore_state(struct drm_device *dev,
 }
 
 static bool
-is_crtc_connector_off(struct drm_crtc *crtc, struct drm_connector *connectors,
-                     int num_connectors)
+is_crtc_connector_off(struct drm_mode_set *set)
 {
        int i;
 
-       for (i = 0; i < num_connectors; i++)
-               if (connectors[i].encoder &&
-                   connectors[i].encoder->crtc == crtc &&
-                   connectors[i].dpms != DRM_MODE_DPMS_ON)
+       if (set->num_connectors == 0)
+               return false;
+
+       if (WARN_ON(set->connectors == NULL))
+               return false;
+
+       for (i = 0; i < set->num_connectors; i++)
+               if (set->connectors[i]->encoder &&
+                   set->connectors[i]->encoder->crtc == set->crtc &&
+                   set->connectors[i]->dpms != DRM_MODE_DPMS_ON)
                        return true;
 
        return false;
@@ -8562,10 +8566,8 @@ intel_set_config_compute_mode_changes(struct drm_mode_set *set,
 
        /* We should be able to check here if the fb has the same properties
         * and then just flip_or_move it */
-       if (set->connectors != NULL &&
-           is_crtc_connector_off(set->crtc, *set->connectors,
-                                 set->num_connectors)) {
-                       config->mode_changed = true;
+       if (is_crtc_connector_off(set)) {
+               config->mode_changed = true;
        } else if (set->crtc->fb != set->fb) {
                /* If we have no fb then treat it as a full mode set */
                if (set->crtc->fb == NULL) {
@@ -9398,6 +9400,17 @@ static void quirk_invert_brightness(struct drm_device *dev)
        DRM_INFO("applying inverted panel brightness quirk\n");
 }
 
+/*
+ * Some machines (Dell XPS13) suffer broken backlight controls if
+ * BLM_PCH_PWM_ENABLE is set.
+ */
+static void quirk_no_pcm_pwm_enable(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       dev_priv->quirks |= QUIRK_NO_PCH_PWM_ENABLE;
+       DRM_INFO("applying no-PCH_PWM_ENABLE quirk\n");
+}
+
 struct intel_quirk {
        int device;
        int subsystem_vendor;
@@ -9467,6 +9480,11 @@ static struct intel_quirk intel_quirks[] = {
 
        /* Acer Aspire 4736Z */
        { 0x2a42, 0x1025, 0x0260, quirk_invert_brightness },
+
+       /* Dell XPS13 HD Sandy Bridge */
+       { 0x0116, 0x1028, 0x052e, quirk_no_pcm_pwm_enable },
+       /* Dell XPS13 HD and XPS13 FHD Ivy Bridge */
+       { 0x0166, 0x1028, 0x058b, quirk_no_pcm_pwm_enable },
 };
 
 static void intel_init_quirks(struct drm_device *dev)
@@ -9817,8 +9835,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
                }
                pll->refcount = pll->active;
 
-               DRM_DEBUG_KMS("%s hw state readout: refcount %i\n",
-                             pll->name, pll->refcount);
+               DRM_DEBUG_KMS("%s hw state readout: refcount %i, on %i\n",
+                             pll->name, pll->refcount, pll->on);
        }
 
        list_for_each_entry(encoder, &dev->mode_config.encoder_list,
@@ -9869,6 +9887,7 @@ void intel_modeset_setup_hw_state(struct drm_device *dev,
        struct drm_plane *plane;
        struct intel_crtc *crtc;
        struct intel_encoder *encoder;
+       int i;
 
        intel_modeset_readout_hw_state(dev);
 
@@ -9884,6 +9903,18 @@ void intel_modeset_setup_hw_state(struct drm_device *dev,
                intel_dump_pipe_config(crtc, &crtc->config, "[setup_hw_state]");
        }
 
+       for (i = 0; i < dev_priv->num_shared_dpll; i++) {
+               struct intel_shared_dpll *pll = &dev_priv->shared_dplls[i];
+
+               if (!pll->on || pll->active)
+                       continue;
+
+               DRM_DEBUG_KMS("%s enabled but not in use, disabling\n", pll->name);
+
+               pll->disable(dev_priv, pll);
+               pll->on = false;
+       }
+
        if (force_restore) {
                /*
                 * We need to use raw interfaces for restoring state to avoid
index c8c9b6f48230489b0cccdf37413e5b4d8fbb28d7..b7d6e09456ce372f8a87e8f3b1ebd75df1779e70 100644 (file)
@@ -504,7 +504,7 @@ struct intel_dp {
 struct intel_digital_port {
        struct intel_encoder base;
        enum port port;
-       u32 port_reversal;
+       u32 saved_port_bits;
        struct intel_dp dp;
        struct intel_hdmi hdmi;
 };
index 021e8daa022d34485583e53eeea71d7f9ba33fb5..61348eae2f0436a05f7b5d88fec4242db918655d 100644 (file)
@@ -109,6 +109,13 @@ static void intel_lvds_get_config(struct intel_encoder *encoder,
                flags |= DRM_MODE_FLAG_PVSYNC;
 
        pipe_config->adjusted_mode.flags |= flags;
+
+       /* gen2/3 store dither state in pfit control, needs to match */
+       if (INTEL_INFO(dev)->gen < 4) {
+               tmp = I915_READ(PFIT_CONTROL);
+
+               pipe_config->gmch_pfit.control |= tmp & PANEL_8TO6_DITHER_ENABLE;
+       }
 }
 
 /* The LVDS pin pair needs to be on before the DPLLs are enabled.
@@ -290,14 +297,11 @@ static bool intel_lvds_compute_config(struct intel_encoder *intel_encoder,
 
                intel_pch_panel_fitting(intel_crtc, pipe_config,
                                        intel_connector->panel.fitting_mode);
-               return true;
        } else {
                intel_gmch_panel_fitting(intel_crtc, pipe_config,
                                         intel_connector->panel.fitting_mode);
-       }
 
-       drm_mode_set_crtcinfo(adjusted_mode, 0);
-       pipe_config->timings_set = true;
+       }
 
        /*
         * XXX: It would be nice to support lower refresh rates on the
index 80bea1d3209fad623a9d189f5ac1b3e1c12b6f0f..67e2c1f1c9a8309b809c35f32395e26e67236992 100644 (file)
@@ -194,6 +194,9 @@ void intel_gmch_panel_fitting(struct intel_crtc *intel_crtc,
            adjusted_mode->vdisplay == mode->vdisplay)
                goto out;
 
+       drm_mode_set_crtcinfo(adjusted_mode, 0);
+       pipe_config->timings_set = true;
+
        switch (fitting_mode) {
        case DRM_MODE_SCALE_CENTER:
                /*
@@ -580,7 +583,8 @@ void intel_panel_enable_backlight(struct drm_device *dev,
                POSTING_READ(reg);
                I915_WRITE(reg, tmp | BLM_PWM_ENABLE);
 
-               if (HAS_PCH_SPLIT(dev)) {
+               if (HAS_PCH_SPLIT(dev) &&
+                   !(dev_priv->quirks & QUIRK_NO_PCH_PWM_ENABLE)) {
                        tmp = I915_READ(BLC_PWM_PCH_CTL1);
                        tmp |= BLM_PCH_PWM_ENABLE;
                        tmp &= ~BLM_PCH_OVERRIDE_ENABLE;
index d10e6735771fe55bd738bc3ee2d840f384d04471..6a347f54d39fd9724ac368b710abafb259ee8180 100644 (file)
@@ -5476,7 +5476,7 @@ static void vlv_force_wake_put(struct drm_i915_private *dev_priv)
        gen6_gt_check_fifodbg(dev_priv);
 }
 
-void intel_gt_reset(struct drm_device *dev)
+void intel_gt_sanitize(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
@@ -5487,6 +5487,10 @@ void intel_gt_reset(struct drm_device *dev)
                if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev))
                        __gen6_gt_force_wake_mt_reset(dev_priv);
        }
+
+       /* BIOS often leaves RC6 enabled, but disable it for hw init */
+       if (INTEL_INFO(dev)->gen >= 6)
+               intel_disable_gt_powersave(dev);
 }
 
 void intel_gt_init(struct drm_device *dev)
@@ -5495,8 +5499,6 @@ void intel_gt_init(struct drm_device *dev)
 
        spin_lock_init(&dev_priv->gt_lock);
 
-       intel_gt_reset(dev);
-
        if (IS_VALLEYVIEW(dev)) {
                dev_priv->gt.force_wake_get = vlv_force_wake_get;
                dev_priv->gt.force_wake_put = vlv_force_wake_put;
index 262c9f5f5f60b322d1a122fe44d99e86d079c818..ce860de43e614a31c73cc6a265a152cbed191233 100644 (file)
@@ -90,6 +90,7 @@ nvc0_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
                return ret;
 
        nv_subdev(priv)->unit = 0x00008000;
+       nv_subdev(priv)->intr = nouveau_falcon_intr;
        nv_engine(priv)->cclass = &nvc0_bsp_cclass;
        nv_engine(priv)->sclass = nvc0_bsp_sclass;
        return 0;
index c46882c8398253ceb1739d82dc31739fef8e09e4..ba6aeca0285ee14afac488166432c5d392ba9a01 100644 (file)
@@ -90,6 +90,7 @@ nve0_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
                return ret;
 
        nv_subdev(priv)->unit = 0x00008000;
+       nv_subdev(priv)->intr = nouveau_falcon_intr;
        nv_engine(priv)->cclass = &nve0_bsp_cclass;
        nv_engine(priv)->sclass = nve0_bsp_sclass;
        return 0;
index 3c7a31f7590edd973b84376427dfeb8d60387b0a..e03fc8e4dc1dac8c8262a93b2b0b812dcae60841 100644 (file)
 #include <engine/falcon.h>
 #include <subdev/timer.h>
 
+void
+nouveau_falcon_intr(struct nouveau_subdev *subdev)
+{
+       struct nouveau_falcon *falcon = (void *)subdev;
+       u32 dispatch = nv_ro32(falcon, 0x01c);
+       u32 intr = nv_ro32(falcon, 0x008) & dispatch & ~(dispatch >> 16);
+
+       if (intr & 0x00000010) {
+               nv_debug(falcon, "ucode halted\n");
+               nv_wo32(falcon, 0x004, 0x00000010);
+               intr &= ~0x00000010;
+       }
+
+       if (intr)  {
+               nv_error(falcon, "unhandled intr 0x%08x\n", intr);
+               nv_wo32(falcon, 0x004, intr);
+       }
+}
+
 u32
 _nouveau_falcon_rd32(struct nouveau_object *object, u64 addr)
 {
index 98072c1ff3606117e477cb294435e0e75bc11116..73719aaa62d6474ea66fbe85ca252de826f6cff9 100644 (file)
@@ -90,6 +90,7 @@ nvc0_ppp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
                return ret;
 
        nv_subdev(priv)->unit = 0x00000002;
+       nv_subdev(priv)->intr = nouveau_falcon_intr;
        nv_engine(priv)->cclass = &nvc0_ppp_cclass;
        nv_engine(priv)->sclass = nvc0_ppp_sclass;
        return 0;
index 1879229b60eb8ef4e347d16c07f78fed8a8b3f82..ac1f62aace72eb99ff1ca47849aa4828a160a2b5 100644 (file)
@@ -90,6 +90,7 @@ nvc0_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
                return ret;
 
        nv_subdev(priv)->unit = 0x00020000;
+       nv_subdev(priv)->intr = nouveau_falcon_intr;
        nv_engine(priv)->cclass = &nvc0_vp_cclass;
        nv_engine(priv)->sclass = nvc0_vp_sclass;
        return 0;
index d28ecbf7bc49ee6c2fd521c6ab652237e04bb6ae..d4c3108479c9344967759c134a9d96802181b8ba 100644 (file)
@@ -90,6 +90,7 @@ nve0_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
                return ret;
 
        nv_subdev(priv)->unit = 0x00020000;
+       nv_subdev(priv)->intr = nouveau_falcon_intr;
        nv_engine(priv)->cclass = &nve0_vp_cclass;
        nv_engine(priv)->sclass = nve0_vp_sclass;
        return 0;
index 1edec386ab36008e2a50272c2c2200d4ddb3f80b..181aa7da524dd1c05779f21da4aad367ce4c0e94 100644 (file)
@@ -72,6 +72,8 @@ int nouveau_falcon_create_(struct nouveau_object *, struct nouveau_object *,
                           struct nouveau_oclass *, u32, bool, const char *,
                           const char *, int, void **);
 
+void nouveau_falcon_intr(struct nouveau_subdev *subdev);
+
 #define _nouveau_falcon_dtor _nouveau_engine_dtor
 int  _nouveau_falcon_init(struct nouveau_object *);
 int  _nouveau_falcon_fini(struct nouveau_object *, bool);
index 4b1afb131380312bc971117635c9ac072fbdba1a..4e7ee5f4155c96bce6b1091fdf432a6d46140eab 100644 (file)
@@ -148,6 +148,7 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
 
        if (unlikely(nvbo->gem))
                DRM_ERROR("bo %p still attached to GEM object\n", bo);
+       WARN_ON(nvbo->pin_refcnt > 0);
        nv10_bo_put_tile_region(dev, nvbo->tile, NULL);
        kfree(nvbo);
 }
@@ -197,6 +198,12 @@ nouveau_bo_new(struct drm_device *dev, int size, int align,
        size_t acc_size;
        int ret;
        int type = ttm_bo_type_device;
+       int max_size = INT_MAX & ~((1 << drm->client.base.vm->vmm->lpg_shift) - 1);
+
+       if (size <= 0 || size > max_size) {
+               nv_warn(drm, "skipped size %x\n", (u32)size);
+               return -EINVAL;
+       }
 
        if (sg)
                type = ttm_bo_type_sg;
@@ -340,13 +347,15 @@ nouveau_bo_unpin(struct nouveau_bo *nvbo)
 {
        struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
        struct ttm_buffer_object *bo = &nvbo->bo;
-       int ret;
+       int ret, ref;
 
        ret = ttm_bo_reserve(bo, false, false, false, 0);
        if (ret)
                return ret;
 
-       if (--nvbo->pin_refcnt)
+       ref = --nvbo->pin_refcnt;
+       WARN_ON_ONCE(ref < 0);
+       if (ref)
                goto out;
 
        nouveau_bo_placement_set(nvbo, bo->mem.placement, 0);
@@ -578,7 +587,7 @@ nve0_bo_move_init(struct nouveau_channel *chan, u32 handle)
        int ret = RING_SPACE(chan, 2);
        if (ret == 0) {
                BEGIN_NVC0(chan, NvSubCopy, 0x0000, 1);
-               OUT_RING  (chan, handle);
+               OUT_RING  (chan, handle & 0x0000ffff);
                FIRE_RING (chan);
        }
        return ret;
@@ -973,7 +982,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
        struct ttm_mem_reg *old_mem = &bo->mem;
        int ret;
 
-       mutex_lock(&chan->cli->mutex);
+       mutex_lock_nested(&chan->cli->mutex, SINGLE_DEPTH_NESTING);
 
        /* create temporary vmas for the transfer and attach them to the
         * old nouveau_mem node, these will get cleaned up after ttm has
@@ -1014,7 +1023,7 @@ nouveau_bo_move_init(struct nouveau_drm *drm)
                            struct ttm_mem_reg *, struct ttm_mem_reg *);
                int (*init)(struct nouveau_channel *, u32 handle);
        } _methods[] = {
-               {  "COPY", 0, 0xa0b5, nve0_bo_move_copy, nve0_bo_move_init },
+               {  "COPY", 4, 0xa0b5, nve0_bo_move_copy, nve0_bo_move_init },
                {  "GRCE", 0, 0xa0b5, nve0_bo_move_copy, nvc0_bo_move_init },
                { "COPY1", 5, 0x90b8, nvc0_bo_move_copy, nvc0_bo_move_init },
                { "COPY0", 4, 0x90b5, nvc0_bo_move_copy, nvc0_bo_move_init },
@@ -1034,7 +1043,7 @@ nouveau_bo_move_init(struct nouveau_drm *drm)
                struct nouveau_channel *chan;
                u32 handle = (mthd->engine << 16) | mthd->oclass;
 
-               if (mthd->init == nve0_bo_move_init)
+               if (mthd->engine)
                        chan = drm->cechan;
                else
                        chan = drm->channel;
index 708b2d1c0037e689b579b860d812463c01a9bf2e..907d20ef6d4d119f81c06fe1801edf3867f01673 100644 (file)
@@ -138,7 +138,7 @@ nouveau_user_framebuffer_create(struct drm_device *dev,
 {
        struct nouveau_framebuffer *nouveau_fb;
        struct drm_gem_object *gem;
-       int ret;
+       int ret = -ENOMEM;
 
        gem = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
        if (!gem)
@@ -146,15 +146,19 @@ nouveau_user_framebuffer_create(struct drm_device *dev,
 
        nouveau_fb = kzalloc(sizeof(struct nouveau_framebuffer), GFP_KERNEL);
        if (!nouveau_fb)
-               return ERR_PTR(-ENOMEM);
+               goto err_unref;
 
        ret = nouveau_framebuffer_init(dev, nouveau_fb, mode_cmd, nouveau_gem_object(gem));
-       if (ret) {
-               drm_gem_object_unreference(gem);
-               return ERR_PTR(ret);
-       }
+       if (ret)
+               goto err;
 
        return &nouveau_fb->base;
+
+err:
+       kfree(nouveau_fb);
+err_unref:
+       drm_gem_object_unreference(gem);
+       return ERR_PTR(ret);
 }
 
 static const struct drm_mode_config_funcs nouveau_mode_config_funcs = {
@@ -524,9 +528,12 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
        struct nouveau_page_flip_state *s;
        struct nouveau_channel *chan = NULL;
        struct nouveau_fence *fence;
-       struct list_head res;
-       struct ttm_validate_buffer res_val[2];
+       struct ttm_validate_buffer resv[2] = {
+               { .bo = &old_bo->bo },
+               { .bo = &new_bo->bo },
+       };
        struct ww_acquire_ctx ticket;
+       LIST_HEAD(res);
        int ret;
 
        if (!drm->channel)
@@ -545,27 +552,19 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
                chan = drm->channel;
        spin_unlock(&old_bo->bo.bdev->fence_lock);
 
-       mutex_lock(&chan->cli->mutex);
-
        if (new_bo != old_bo) {
                ret = nouveau_bo_pin(new_bo, TTM_PL_FLAG_VRAM);
-               if (likely(!ret)) {
-                       res_val[0].bo = &old_bo->bo;
-                       res_val[1].bo = &new_bo->bo;
-                       INIT_LIST_HEAD(&res);
-                       list_add_tail(&res_val[0].head, &res);
-                       list_add_tail(&res_val[1].head, &res);
-                       ret = ttm_eu_reserve_buffers(&ticket, &res);
-                       if (ret)
-                               nouveau_bo_unpin(new_bo);
-               }
-       } else
-               ret = ttm_bo_reserve(&new_bo->bo, false, false, false, 0);
+               if (ret)
+                       goto fail_free;
 
-       if (ret) {
-               mutex_unlock(&chan->cli->mutex);
-               goto fail_free;
+               list_add(&resv[1].head, &res);
        }
+       list_add(&resv[0].head, &res);
+
+       mutex_lock(&chan->cli->mutex);
+       ret = ttm_eu_reserve_buffers(&ticket, &res);
+       if (ret)
+               goto fail_unpin;
 
        /* Initialize a page flip struct */
        *s = (struct nouveau_page_flip_state)
@@ -576,10 +575,8 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
        /* Emit a page flip */
        if (nv_device(drm->device)->card_type >= NV_50) {
                ret = nv50_display_flip_next(crtc, fb, chan, 0);
-               if (ret) {
-                       mutex_unlock(&chan->cli->mutex);
+               if (ret)
                        goto fail_unreserve;
-               }
        }
 
        ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, &fence);
@@ -590,22 +587,18 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
        /* Update the crtc struct and cleanup */
        crtc->fb = fb;
 
-       if (old_bo != new_bo) {
-               ttm_eu_fence_buffer_objects(&ticket, &res, fence);
+       ttm_eu_fence_buffer_objects(&ticket, &res, fence);
+       if (old_bo != new_bo)
                nouveau_bo_unpin(old_bo);
-       } else {
-               nouveau_bo_fence(new_bo, fence);
-               ttm_bo_unreserve(&new_bo->bo);
-       }
        nouveau_fence_unref(&fence);
        return 0;
 
 fail_unreserve:
-       if (old_bo != new_bo) {
-               ttm_eu_backoff_reservation(&ticket, &res);
+       ttm_eu_backoff_reservation(&ticket, &res);
+fail_unpin:
+       mutex_unlock(&chan->cli->mutex);
+       if (old_bo != new_bo)
                nouveau_bo_unpin(new_bo);
-       } else
-               ttm_bo_unreserve(&new_bo->bo);
 fail_free:
        kfree(s);
        return ret;
index 218a4b522fe591ecca92eee558afc2e1baf9ff0c..61972668fd0532f9f29e6ea540ab925713d5cf7d 100644 (file)
@@ -192,6 +192,18 @@ nouveau_accel_init(struct nouveau_drm *drm)
 
                arg0 = NVE0_CHANNEL_IND_ENGINE_GR;
                arg1 = 1;
+       } else
+       if (device->chipset >= 0xa3 &&
+           device->chipset != 0xaa &&
+           device->chipset != 0xac) {
+               ret = nouveau_channel_new(drm, &drm->client, NVDRM_DEVICE,
+                                         NVDRM_CHAN + 1, NvDmaFB, NvDmaTT,
+                                         &drm->cechan);
+               if (ret)
+                       NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
+
+               arg0 = NvDmaFB;
+               arg1 = NvDmaTT;
        } else {
                arg0 = NvDmaFB;
                arg1 = NvDmaTT;
@@ -284,8 +296,6 @@ static int nouveau_drm_probe(struct pci_dev *pdev,
        return 0;
 }
 
-static struct lock_class_key drm_client_lock_class_key;
-
 static int
 nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 {
@@ -297,7 +307,6 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
        ret = nouveau_cli_create(pdev, "DRM", sizeof(*drm), (void**)&drm);
        if (ret)
                return ret;
-       lockdep_set_class(&drm->client.mutex, &drm_client_lock_class_key);
 
        dev->dev_private = drm;
        drm->dev = dev;
index 9352010030e9fd6f885834aca9e81c769fa61e10..4c1bc061fae2cab668f5d5f2bd24f1ab5febbc62 100644 (file)
@@ -385,6 +385,7 @@ out_unlock:
        mutex_unlock(&dev->struct_mutex);
        if (chan)
                nouveau_bo_vma_del(nvbo, &fbcon->nouveau_fb.vma);
+       nouveau_bo_unmap(nvbo);
 out_unpin:
        nouveau_bo_unpin(nvbo);
 out_unref:
index 1680d9187bab6623cc958be6f3fa603b9f9e6941..be3149932c2d4c3035a36cf40e68f04af8ffd808 100644 (file)
@@ -143,7 +143,7 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
        int ret;
 
        fence->channel  = chan;
-       fence->timeout  = jiffies + (3 * DRM_HZ);
+       fence->timeout  = jiffies + (15 * DRM_HZ);
        fence->sequence = ++fctx->sequence;
 
        ret = fctx->emit(fence);
index e72d09c068a8e8bcb8de4b47abd1ea0340415c12..830cb7bad922bba909a6c5129c16a786cb9ee969 100644 (file)
@@ -50,12 +50,6 @@ nouveau_gem_object_del(struct drm_gem_object *gem)
                return;
        nvbo->gem = NULL;
 
-       /* Lockdep hates you for doing reserve with gem object lock held */
-       if (WARN_ON_ONCE(nvbo->pin_refcnt)) {
-               nvbo->pin_refcnt = 1;
-               nouveau_bo_unpin(nvbo);
-       }
-
        if (gem->import_attach)
                drm_prime_gem_destroy(gem, nvbo->bo.sg);
 
index 54dc6355b0c2a02b7629440248a3cfb5e22a234c..8b40a36c1b57eb2f4e1eeac30a20478e7f563974 100644 (file)
@@ -355,6 +355,7 @@ struct nv50_oimm {
 
 struct nv50_head {
        struct nouveau_crtc base;
+       struct nouveau_bo *image;
        struct nv50_curs curs;
        struct nv50_sync sync;
        struct nv50_ovly ovly;
@@ -517,9 +518,10 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 {
        struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
        struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+       struct nv50_head *head = nv50_head(crtc);
        struct nv50_sync *sync = nv50_sync(crtc);
-       int head = nv_crtc->index, ret;
        u32 *push;
+       int ret;
 
        swap_interval <<= 4;
        if (swap_interval == 0)
@@ -537,7 +539,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
                        return ret;
 
                BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2);
-               OUT_RING  (chan, NvEvoSema0 + head);
+               OUT_RING  (chan, NvEvoSema0 + nv_crtc->index);
                OUT_RING  (chan, sync->addr ^ 0x10);
                BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_RELEASE, 1);
                OUT_RING  (chan, sync->data + 1);
@@ -546,7 +548,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
                OUT_RING  (chan, sync->data);
        } else
        if (chan && nv_mclass(chan->object) < NVC0_CHANNEL_IND_CLASS) {
-               u64 addr = nv84_fence_crtc(chan, head) + sync->addr;
+               u64 addr = nv84_fence_crtc(chan, nv_crtc->index) + sync->addr;
                ret = RING_SPACE(chan, 12);
                if (ret)
                        return ret;
@@ -565,7 +567,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
                OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
        } else
        if (chan) {
-               u64 addr = nv84_fence_crtc(chan, head) + sync->addr;
+               u64 addr = nv84_fence_crtc(chan, nv_crtc->index) + sync->addr;
                ret = RING_SPACE(chan, 10);
                if (ret)
                        return ret;
@@ -630,6 +632,8 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
        evo_mthd(push, 0x0080, 1);
        evo_data(push, 0x00000000);
        evo_kick(push, sync);
+
+       nouveau_bo_ref(nv_fb->nvbo, &head->image);
        return 0;
 }
 
@@ -1038,18 +1042,17 @@ static int
 nv50_crtc_swap_fbs(struct drm_crtc *crtc, struct drm_framebuffer *old_fb)
 {
        struct nouveau_framebuffer *nvfb = nouveau_framebuffer(crtc->fb);
+       struct nv50_head *head = nv50_head(crtc);
        int ret;
 
        ret = nouveau_bo_pin(nvfb->nvbo, TTM_PL_FLAG_VRAM);
-       if (ret)
-               return ret;
-
-       if (old_fb) {
-               nvfb = nouveau_framebuffer(old_fb);
-               nouveau_bo_unpin(nvfb->nvbo);
+       if (ret == 0) {
+               if (head->image)
+                       nouveau_bo_unpin(head->image);
+               nouveau_bo_ref(nvfb->nvbo, &head->image);
        }
 
-       return 0;
+       return ret;
 }
 
 static int
@@ -1198,6 +1201,15 @@ nv50_crtc_lut_load(struct drm_crtc *crtc)
        }
 }
 
+static void
+nv50_crtc_disable(struct drm_crtc *crtc)
+{
+       struct nv50_head *head = nv50_head(crtc);
+       if (head->image)
+               nouveau_bo_unpin(head->image);
+       nouveau_bo_ref(NULL, &head->image);
+}
+
 static int
 nv50_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
                     uint32_t handle, uint32_t width, uint32_t height)
@@ -1271,18 +1283,29 @@ nv50_crtc_destroy(struct drm_crtc *crtc)
        struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
        struct nv50_disp *disp = nv50_disp(crtc->dev);
        struct nv50_head *head = nv50_head(crtc);
+
        nv50_dmac_destroy(disp->core, &head->ovly.base);
        nv50_pioc_destroy(disp->core, &head->oimm.base);
        nv50_dmac_destroy(disp->core, &head->sync.base);
        nv50_pioc_destroy(disp->core, &head->curs.base);
+
+       /*XXX: this shouldn't be necessary, but the core doesn't call
+        *     disconnect() during the cleanup paths
+        */
+       if (head->image)
+               nouveau_bo_unpin(head->image);
+       nouveau_bo_ref(NULL, &head->image);
+
        nouveau_bo_unmap(nv_crtc->cursor.nvbo);
        if (nv_crtc->cursor.nvbo)
                nouveau_bo_unpin(nv_crtc->cursor.nvbo);
        nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
+
        nouveau_bo_unmap(nv_crtc->lut.nvbo);
        if (nv_crtc->lut.nvbo)
                nouveau_bo_unpin(nv_crtc->lut.nvbo);
        nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
+
        drm_crtc_cleanup(crtc);
        kfree(crtc);
 }
@@ -1296,6 +1319,7 @@ static const struct drm_crtc_helper_funcs nv50_crtc_hfunc = {
        .mode_set_base = nv50_crtc_mode_set_base,
        .mode_set_base_atomic = nv50_crtc_mode_set_base_atomic,
        .load_lut = nv50_crtc_lut_load,
+       .disable = nv50_crtc_disable,
 };
 
 static const struct drm_crtc_funcs nv50_crtc_func = {
index 4c605c70ebf9ed858d014d94383ebfab86851b1f..deb5c25305aff4622ceb8f84a1598902e484a6ce 100644 (file)
@@ -562,7 +562,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
                                struct hv_hotadd_state *has)
 {
        int ret = 0;
-       int i, nid, t;
+       int i, nid;
        unsigned long start_pfn;
        unsigned long processed_pfn;
        unsigned long total_pfn = pfn_count;
@@ -607,14 +607,11 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 
                /*
                 * Wait for the memory block to be onlined.
+                * Since the hot add has succeeded, it is ok to
+                * proceed even if the pages in the hot added region
+                * have not been "onlined" within the allowed time.
                 */
-               t = wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
-               if (t == 0) {
-                       pr_info("hot_add memory timedout\n");
-                       has->ha_end_pfn -= HA_CHUNK;
-                       has->covered_end_pfn -=  processed_pfn;
-                       break;
-               }
+               wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
 
        }
 
@@ -978,6 +975,14 @@ static void post_status(struct hv_dynmem_device *dm)
                                dm->num_pages_ballooned +
                                compute_balloon_floor();
 
+       /*
+        * If our transaction ID is no longer current, just don't
+        * send the status. This can happen if we were interrupted
+        * after we picked our transaction ID.
+        */
+       if (status.hdr.trans_id != atomic_read(&trans_id))
+               return;
+
        vmbus_sendpacket(dm->dev->channel, &status,
                                sizeof(struct dm_status),
                                (unsigned long)NULL,
index a2464bf07c49b03342c0dfcd79852cc0a232a703..e8e071fc1d6d1ab265d858772fc94bbd5cd8930c 100644 (file)
@@ -690,7 +690,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
        if (ret)
                pr_err("Unable to register child device\n");
        else
-               pr_info("child device %s registered\n",
+               pr_debug("child device %s registered\n",
                        dev_name(&child_device_obj->device));
 
        return ret;
@@ -702,14 +702,14 @@ int vmbus_device_register(struct hv_device *child_device_obj)
  */
 void vmbus_device_unregister(struct hv_device *device_obj)
 {
+       pr_debug("child device %s unregistered\n",
+               dev_name(&device_obj->device));
+
        /*
         * Kick off the process of unregistering the device.
         * This will call vmbus_remove() and eventually vmbus_device_release()
         */
        device_unregister(&device_obj->device);
-
-       pr_info("child device %s unregistered\n",
-               dev_name(&device_obj->device));
 }
 
 
index 048f2947e08b1721a6ffab0de2ba5dd24d1a23de..e45f5575fd4dde8d2768daaa553ab57b9d0023b1 100644 (file)
 #include "bcache.h"
 #include "btree.h"
 
+#include <linux/freezer.h>
+#include <linux/kthread.h>
 #include <linux/random.h>
+#include <trace/events/bcache.h>
 
 #define MAX_IN_FLIGHT_DISCARDS         8U
 
@@ -151,7 +154,7 @@ static void discard_finish(struct work_struct *w)
        mutex_unlock(&ca->set->bucket_lock);
 
        closure_wake_up(&ca->set->bucket_wait);
-       wake_up(&ca->set->alloc_wait);
+       wake_up_process(ca->alloc_thread);
 
        closure_put(&ca->set->cl);
 }
@@ -350,38 +353,30 @@ static void invalidate_buckets(struct cache *ca)
                break;
        }
 
-       pr_debug("free %zu/%zu free_inc %zu/%zu unused %zu/%zu",
-                fifo_used(&ca->free), ca->free.size,
-                fifo_used(&ca->free_inc), ca->free_inc.size,
-                fifo_used(&ca->unused), ca->unused.size);
+       trace_bcache_alloc_invalidate(ca);
 }
 
 #define allocator_wait(ca, cond)                                       \
 do {                                                                   \
-       DEFINE_WAIT(__wait);                                            \
-                                                                       \
        while (1) {                                                     \
-               prepare_to_wait(&ca->set->alloc_wait,                   \
-                               &__wait, TASK_INTERRUPTIBLE);           \
+               set_current_state(TASK_INTERRUPTIBLE);                  \
                if (cond)                                               \
                        break;                                          \
                                                                        \
                mutex_unlock(&(ca)->set->bucket_lock);                  \
-               if (test_bit(CACHE_SET_STOPPING_2, &ca->set->flags)) {  \
-                       finish_wait(&ca->set->alloc_wait, &__wait);     \
-                       closure_return(cl);                             \
-               }                                                       \
+               if (kthread_should_stop())                              \
+                       return 0;                                       \
                                                                        \
+               try_to_freeze();                                        \
                schedule();                                             \
                mutex_lock(&(ca)->set->bucket_lock);                    \
        }                                                               \
-                                                                       \
-       finish_wait(&ca->set->alloc_wait, &__wait);                     \
+       __set_current_state(TASK_RUNNING);                              \
 } while (0)
 
-void bch_allocator_thread(struct closure *cl)
+static int bch_allocator_thread(void *arg)
 {
-       struct cache *ca = container_of(cl, struct cache, alloc);
+       struct cache *ca = arg;
 
        mutex_lock(&ca->set->bucket_lock);
 
@@ -442,7 +437,7 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl)
 {
        long r = -1;
 again:
-       wake_up(&ca->set->alloc_wait);
+       wake_up_process(ca->alloc_thread);
 
        if (fifo_used(&ca->free) > ca->watermark[watermark] &&
            fifo_pop(&ca->free, r)) {
@@ -476,9 +471,7 @@ again:
                return r;
        }
 
-       pr_debug("alloc failure: blocked %i free %zu free_inc %zu unused %zu",
-                atomic_read(&ca->set->prio_blocked), fifo_used(&ca->free),
-                fifo_used(&ca->free_inc), fifo_used(&ca->unused));
+       trace_bcache_alloc_fail(ca);
 
        if (cl) {
                closure_wait(&ca->set->bucket_wait, cl);
@@ -552,6 +545,17 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
 
 /* Init */
 
+int bch_cache_allocator_start(struct cache *ca)
+{
+       struct task_struct *k = kthread_run(bch_allocator_thread,
+                                           ca, "bcache_allocator");
+       if (IS_ERR(k))
+               return PTR_ERR(k);
+
+       ca->alloc_thread = k;
+       return 0;
+}
+
 void bch_cache_allocator_exit(struct cache *ca)
 {
        struct discard *d;
index d3e15b42a4ab97655d989c3fe8240b09613acdc4..b39f6f0b45f27b89f29e580844e41d8d315d9c18 100644 (file)
 #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
 
 #include <linux/bio.h>
-#include <linux/blktrace_api.h>
 #include <linux/kobject.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
@@ -388,8 +387,6 @@ struct keybuf_key {
 typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *);
 
 struct keybuf {
-       keybuf_pred_fn          *key_predicate;
-
        struct bkey             last_scanned;
        spinlock_t              lock;
 
@@ -437,9 +434,12 @@ struct bcache_device {
 
        /* If nonzero, we're detaching/unregistering from cache set */
        atomic_t                detaching;
+       int                     flush_done;
+
+       uint64_t                nr_stripes;
+       unsigned                stripe_size_bits;
+       atomic_t                *stripe_sectors_dirty;
 
-       atomic_long_t           sectors_dirty;
-       unsigned long           sectors_dirty_gc;
        unsigned long           sectors_dirty_last;
        long                    sectors_dirty_derivative;
 
@@ -531,6 +531,7 @@ struct cached_dev {
        unsigned                sequential_merge:1;
        unsigned                verify:1;
 
+       unsigned                partial_stripes_expensive:1;
        unsigned                writeback_metadata:1;
        unsigned                writeback_running:1;
        unsigned char           writeback_percent;
@@ -565,8 +566,7 @@ struct cache {
 
        unsigned                watermark[WATERMARK_MAX];
 
-       struct closure          alloc;
-       struct workqueue_struct *alloc_workqueue;
+       struct task_struct      *alloc_thread;
 
        struct closure          prio;
        struct prio_set         *disk_buckets;
@@ -664,13 +664,9 @@ struct gc_stat {
  * CACHE_SET_STOPPING always gets set first when we're closing down a cache set;
  * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e.
  * flushing dirty data).
- *
- * CACHE_SET_STOPPING_2 gets set at the last phase, when it's time to shut down
- * the allocation thread.
  */
 #define CACHE_SET_UNREGISTERING                0
 #define        CACHE_SET_STOPPING              1
-#define        CACHE_SET_STOPPING_2            2
 
 struct cache_set {
        struct closure          cl;
@@ -703,9 +699,6 @@ struct cache_set {
        /* For the btree cache */
        struct shrinker         shrink;
 
-       /* For the allocator itself */
-       wait_queue_head_t       alloc_wait;
-
        /* For the btree cache and anything allocation related */
        struct mutex            bucket_lock;
 
@@ -823,10 +816,9 @@ struct cache_set {
 
        /*
         * A btree node on disk could have too many bsets for an iterator to fit
-        * on the stack - this is a single element mempool for btree_read_work()
+        * on the stack - have to dynamically allocate them
         */
-       struct mutex            fill_lock;
-       struct btree_iter       *fill_iter;
+       mempool_t               *fill_iter;
 
        /*
         * btree_sort() is a merge sort and requires temporary space - single
@@ -834,6 +826,7 @@ struct cache_set {
         */
        struct mutex            sort_lock;
        struct bset             *sort;
+       unsigned                sort_crit_factor;
 
        /* List of buckets we're currently writing data to */
        struct list_head        data_buckets;
@@ -906,8 +899,6 @@ static inline unsigned local_clock_us(void)
        return local_clock() >> 10;
 }
 
-#define MAX_BSETS              4U
-
 #define BTREE_PRIO             USHRT_MAX
 #define INITIAL_PRIO           32768
 
@@ -1112,23 +1103,6 @@ static inline void __bkey_put(struct cache_set *c, struct bkey *k)
                atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin);
 }
 
-/* Blktrace macros */
-
-#define blktrace_msg(c, fmt, ...)                                      \
-do {                                                                   \
-       struct request_queue *q = bdev_get_queue(c->bdev);              \
-       if (q)                                                          \
-               blk_add_trace_msg(q, fmt, ##__VA_ARGS__);               \
-} while (0)
-
-#define blktrace_msg_all(s, fmt, ...)                                  \
-do {                                                                   \
-       struct cache *_c;                                               \
-       unsigned i;                                                     \
-       for_each_cache(_c, (s), i)                                      \
-               blktrace_msg(_c, fmt, ##__VA_ARGS__);                   \
-} while (0)
-
 static inline void cached_dev_put(struct cached_dev *dc)
 {
        if (atomic_dec_and_test(&dc->count))
@@ -1173,10 +1147,16 @@ static inline uint8_t bucket_disk_gen(struct bucket *b)
        static struct kobj_attribute ksysfs_##n =                       \
                __ATTR(n, S_IWUSR|S_IRUSR, show, store)
 
-/* Forward declarations */
+static inline void wake_up_allocators(struct cache_set *c)
+{
+       struct cache *ca;
+       unsigned i;
+
+       for_each_cache(ca, c, i)
+               wake_up_process(ca->alloc_thread);
+}
 
-void bch_writeback_queue(struct cached_dev *);
-void bch_writeback_add(struct cached_dev *, unsigned);
+/* Forward declarations */
 
 void bch_count_io_errors(struct cache *, int, const char *);
 void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
@@ -1193,7 +1173,6 @@ void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
 uint8_t bch_inc_gen(struct cache *, struct bucket *);
 void bch_rescale_priorities(struct cache_set *, int);
 bool bch_bucket_add_unused(struct cache *, struct bucket *);
-void bch_allocator_thread(struct closure *);
 
 long bch_bucket_alloc(struct cache *, unsigned, struct closure *);
 void bch_bucket_free(struct cache_set *, struct bkey *);
@@ -1241,9 +1220,9 @@ void bch_cache_set_stop(struct cache_set *);
 struct cache_set *bch_cache_set_alloc(struct cache_sb *);
 void bch_btree_cache_free(struct cache_set *);
 int bch_btree_cache_alloc(struct cache_set *);
-void bch_cached_dev_writeback_init(struct cached_dev *);
 void bch_moving_init_cache_set(struct cache_set *);
 
+int bch_cache_allocator_start(struct cache *ca);
 void bch_cache_allocator_exit(struct cache *ca);
 int bch_cache_allocator_init(struct cache *ca);
 
index 1d27d3af32514055e7512a677395c709a713c28c..8010eed06a51c8320786a1c49463c98c8aa70555 100644 (file)
@@ -78,6 +78,7 @@ struct bkey *bch_keylist_pop(struct keylist *l)
 bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k)
 {
        unsigned i;
+       char buf[80];
 
        if (level && (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k)))
                goto bad;
@@ -102,7 +103,8 @@ bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k)
 
        return false;
 bad:
-       cache_bug(c, "spotted bad key %s: %s", pkey(k), bch_ptr_status(c, k));
+       bch_bkey_to_text(buf, sizeof(buf), k);
+       cache_bug(c, "spotted bad key %s: %s", buf, bch_ptr_status(c, k));
        return true;
 }
 
@@ -162,10 +164,16 @@ bool bch_ptr_bad(struct btree *b, const struct bkey *k)
 #ifdef CONFIG_BCACHE_EDEBUG
 bug:
        mutex_unlock(&b->c->bucket_lock);
-       btree_bug(b,
+
+       {
+               char buf[80];
+
+               bch_bkey_to_text(buf, sizeof(buf), k);
+               btree_bug(b,
 "inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
-                 pkey(k), PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
-                 g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
+                         buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
+                         g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
+       }
        return true;
 #endif
 }
@@ -1084,33 +1092,39 @@ void bch_btree_sort_into(struct btree *b, struct btree *new)
        new->sets->size = 0;
 }
 
+#define SORT_CRIT      (4096 / sizeof(uint64_t))
+
 void bch_btree_sort_lazy(struct btree *b)
 {
-       if (b->nsets) {
-               unsigned i, j, keys = 0, total;
+       unsigned crit = SORT_CRIT;
+       int i;
 
-               for (i = 0; i <= b->nsets; i++)
-                       keys += b->sets[i].data->keys;
-
-               total = keys;
+       /* Don't sort if nothing to do */
+       if (!b->nsets)
+               goto out;
 
-               for (j = 0; j < b->nsets; j++) {
-                       if (keys * 2 < total ||
-                           keys < 1000) {
-                               bch_btree_sort_partial(b, j);
-                               return;
-                       }
+       /* If not a leaf node, always sort */
+       if (b->level) {
+               bch_btree_sort(b);
+               return;
+       }
 
-                       keys -= b->sets[j].data->keys;
-               }
+       for (i = b->nsets - 1; i >= 0; --i) {
+               crit *= b->c->sort_crit_factor;
 
-               /* Must sort if b->nsets == 3 or we'll overflow */
-               if (b->nsets >= (MAX_BSETS - 1) - b->level) {
-                       bch_btree_sort(b);
+               if (b->sets[i].data->keys < crit) {
+                       bch_btree_sort_partial(b, i);
                        return;
                }
        }
 
+       /* Sort if we'd overflow */
+       if (b->nsets + 1 == MAX_BSETS) {
+               bch_btree_sort(b);
+               return;
+       }
+
+out:
        bset_build_written_tree(b);
 }
 
index 57a9cff4154613900f31307ee33015527f953d87..ae115a253d7312f81422a4b162e2ca4952556a44 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _BCACHE_BSET_H
 #define _BCACHE_BSET_H
 
+#include <linux/slab.h>
+
 /*
  * BKEYS:
  *
 
 /* Btree key comparison/iteration */
 
+#define MAX_BSETS              4U
+
 struct btree_iter {
        size_t size, used;
        struct btree_iter_set {
index 7a5658f04e62092610b2144faab12ae5fa3976a0..ee372884c405444886671901e45c3834626f383a 100644 (file)
@@ -24,6 +24,7 @@
 #include "btree.h"
 #include "debug.h"
 #include "request.h"
+#include "writeback.h"
 
 #include <linux/slab.h>
 #include <linux/bitops.h>
@@ -134,44 +135,17 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i)
        return crc ^ 0xffffffffffffffffULL;
 }
 
-static void btree_bio_endio(struct bio *bio, int error)
+static void bch_btree_node_read_done(struct btree *b)
 {
-       struct closure *cl = bio->bi_private;
-       struct btree *b = container_of(cl, struct btree, io.cl);
-
-       if (error)
-               set_btree_node_io_error(b);
-
-       bch_bbio_count_io_errors(b->c, bio, error, (bio->bi_rw & WRITE)
-                                ? "writing btree" : "reading btree");
-       closure_put(cl);
-}
-
-static void btree_bio_init(struct btree *b)
-{
-       BUG_ON(b->bio);
-       b->bio = bch_bbio_alloc(b->c);
-
-       b->bio->bi_end_io       = btree_bio_endio;
-       b->bio->bi_private      = &b->io.cl;
-}
-
-void bch_btree_read_done(struct closure *cl)
-{
-       struct btree *b = container_of(cl, struct btree, io.cl);
-       struct bset *i = b->sets[0].data;
-       struct btree_iter *iter = b->c->fill_iter;
        const char *err = "bad btree header";
-       BUG_ON(b->nsets || b->written);
-
-       bch_bbio_free(b->bio, b->c);
-       b->bio = NULL;
+       struct bset *i = b->sets[0].data;
+       struct btree_iter *iter;
 
-       mutex_lock(&b->c->fill_lock);
+       iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT);
+       iter->size = b->c->sb.bucket_size / b->c->sb.block_size;
        iter->used = 0;
 
-       if (btree_node_io_error(b) ||
-           !i->seq)
+       if (!i->seq)
                goto err;
 
        for (;
@@ -228,17 +202,8 @@ void bch_btree_read_done(struct closure *cl)
        if (b->written < btree_blocks(b))
                bch_bset_init_next(b);
 out:
-
-       mutex_unlock(&b->c->fill_lock);
-
-       spin_lock(&b->c->btree_read_time_lock);
-       bch_time_stats_update(&b->c->btree_read_time, b->io_start_time);
-       spin_unlock(&b->c->btree_read_time_lock);
-
-       smp_wmb(); /* read_done is our write lock */
-       set_btree_node_read_done(b);
-
-       closure_return(cl);
+       mempool_free(iter, b->c->fill_iter);
+       return;
 err:
        set_btree_node_io_error(b);
        bch_cache_set_error(b->c, "%s at bucket %zu, block %zu, %u keys",
@@ -247,48 +212,69 @@ err:
        goto out;
 }
 
-void bch_btree_read(struct btree *b)
+static void btree_node_read_endio(struct bio *bio, int error)
+{
+       struct closure *cl = bio->bi_private;
+       closure_put(cl);
+}
+
+void bch_btree_node_read(struct btree *b)
 {
-       BUG_ON(b->nsets || b->written);
+       uint64_t start_time = local_clock();
+       struct closure cl;
+       struct bio *bio;
+
+       trace_bcache_btree_read(b);
+
+       closure_init_stack(&cl);
+
+       bio = bch_bbio_alloc(b->c);
+       bio->bi_rw      = REQ_META|READ_SYNC;
+       bio->bi_size    = KEY_SIZE(&b->key) << 9;
+       bio->bi_end_io  = btree_node_read_endio;
+       bio->bi_private = &cl;
+
+       bch_bio_map(bio, b->sets[0].data);
+
+       bch_submit_bbio(bio, b->c, &b->key, 0);
+       closure_sync(&cl);
 
-       if (!closure_trylock(&b->io.cl, &b->c->cl))
-               BUG();
+       if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+               set_btree_node_io_error(b);
 
-       b->io_start_time = local_clock();
+       bch_bbio_free(bio, b->c);
 
-       btree_bio_init(b);
-       b->bio->bi_rw   = REQ_META|READ_SYNC;
-       b->bio->bi_size = KEY_SIZE(&b->key) << 9;
+       if (btree_node_io_error(b))
+               goto err;
 
-       bch_bio_map(b->bio, b->sets[0].data);
+       bch_btree_node_read_done(b);
 
-       pr_debug("%s", pbtree(b));
-       trace_bcache_btree_read(b->bio);
-       bch_submit_bbio(b->bio, b->c, &b->key, 0);
+       spin_lock(&b->c->btree_read_time_lock);
+       bch_time_stats_update(&b->c->btree_read_time, start_time);
+       spin_unlock(&b->c->btree_read_time_lock);
 
-       continue_at(&b->io.cl, bch_btree_read_done, system_wq);
+       return;
+err:
+       bch_cache_set_error(b->c, "io error reading bucket %lu",
+                           PTR_BUCKET_NR(b->c, &b->key, 0));
 }
 
 static void btree_complete_write(struct btree *b, struct btree_write *w)
 {
        if (w->prio_blocked &&
            !atomic_sub_return(w->prio_blocked, &b->c->prio_blocked))
-               wake_up(&b->c->alloc_wait);
+               wake_up_allocators(b->c);
 
        if (w->journal) {
                atomic_dec_bug(w->journal);
                __closure_wake_up(&b->c->journal.wait);
        }
 
-       if (w->owner)
-               closure_put(w->owner);
-
        w->prio_blocked = 0;
        w->journal      = NULL;
-       w->owner        = NULL;
 }
 
-static void __btree_write_done(struct closure *cl)
+static void __btree_node_write_done(struct closure *cl)
 {
        struct btree *b = container_of(cl, struct btree, io.cl);
        struct btree_write *w = btree_prev_write(b);
@@ -304,7 +290,7 @@ static void __btree_write_done(struct closure *cl)
        closure_return(cl);
 }
 
-static void btree_write_done(struct closure *cl)
+static void btree_node_write_done(struct closure *cl)
 {
        struct btree *b = container_of(cl, struct btree, io.cl);
        struct bio_vec *bv;
@@ -313,10 +299,22 @@ static void btree_write_done(struct closure *cl)
        __bio_for_each_segment(bv, b->bio, n, 0)
                __free_page(bv->bv_page);
 
-       __btree_write_done(cl);
+       __btree_node_write_done(cl);
 }
 
-static void do_btree_write(struct btree *b)
+static void btree_node_write_endio(struct bio *bio, int error)
+{
+       struct closure *cl = bio->bi_private;
+       struct btree *b = container_of(cl, struct btree, io.cl);
+
+       if (error)
+               set_btree_node_io_error(b);
+
+       bch_bbio_count_io_errors(b->c, bio, error, "writing btree");
+       closure_put(cl);
+}
+
+static void do_btree_node_write(struct btree *b)
 {
        struct closure *cl = &b->io.cl;
        struct bset *i = b->sets[b->nsets].data;
@@ -325,15 +323,34 @@ static void do_btree_write(struct btree *b)
        i->version      = BCACHE_BSET_VERSION;
        i->csum         = btree_csum_set(b, i);
 
-       btree_bio_init(b);
-       b->bio->bi_rw   = REQ_META|WRITE_SYNC;
-       b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c);
+       BUG_ON(b->bio);
+       b->bio = bch_bbio_alloc(b->c);
+
+       b->bio->bi_end_io       = btree_node_write_endio;
+       b->bio->bi_private      = &b->io.cl;
+       b->bio->bi_rw           = REQ_META|WRITE_SYNC|REQ_FUA;
+       b->bio->bi_size         = set_blocks(i, b->c) * block_bytes(b->c);
        bch_bio_map(b->bio, i);
 
+       /*
+        * If we're appending to a leaf node, we don't technically need FUA -
+        * this write just needs to be persisted before the next journal write,
+        * which will be marked FLUSH|FUA.
+        *
+        * Similarly if we're writing a new btree root - the pointer is going to
+        * be in the next journal entry.
+        *
+        * But if we're writing a new btree node (that isn't a root) or
+        * appending to a non leaf btree node, we need either FUA or a flush
+        * when we write the parent with the new pointer. FUA is cheaper than a
+        * flush, and writes appending to leaf nodes aren't blocking anything so
+        * just make all btree node writes FUA to keep things sane.
+        */
+
        bkey_copy(&k.key, &b->key);
        SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i));
 
-       if (!bch_bio_alloc_pages(b->bio, GFP_NOIO)) {
+       if (!bio_alloc_pages(b->bio, GFP_NOIO)) {
                int j;
                struct bio_vec *bv;
                void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
@@ -342,40 +359,41 @@ static void do_btree_write(struct btree *b)
                        memcpy(page_address(bv->bv_page),
                               base + j * PAGE_SIZE, PAGE_SIZE);
 
-               trace_bcache_btree_write(b->bio);
                bch_submit_bbio(b->bio, b->c, &k.key, 0);
 
-               continue_at(cl, btree_write_done, NULL);
+               continue_at(cl, btree_node_write_done, NULL);
        } else {
                b->bio->bi_vcnt = 0;
                bch_bio_map(b->bio, i);
 
-               trace_bcache_btree_write(b->bio);
                bch_submit_bbio(b->bio, b->c, &k.key, 0);
 
                closure_sync(cl);
-               __btree_write_done(cl);
+               __btree_node_write_done(cl);
        }
 }
 
-static void __btree_write(struct btree *b)
+void bch_btree_node_write(struct btree *b, struct closure *parent)
 {
        struct bset *i = b->sets[b->nsets].data;
 
+       trace_bcache_btree_write(b);
+
        BUG_ON(current->bio_list);
+       BUG_ON(b->written >= btree_blocks(b));
+       BUG_ON(b->written && !i->keys);
+       BUG_ON(b->sets->data->seq != i->seq);
+       bch_check_key_order(b, i);
 
-       closure_lock(&b->io, &b->c->cl);
        cancel_delayed_work(&b->work);
 
+       /* If caller isn't waiting for write, parent refcount is cache set */
+       closure_lock(&b->io, parent ?: &b->c->cl);
+
        clear_bit(BTREE_NODE_dirty,      &b->flags);
        change_bit(BTREE_NODE_write_idx, &b->flags);
 
-       bch_check_key_order(b, i);
-       BUG_ON(b->written && !i->keys);
-
-       do_btree_write(b);
-
-       pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys);
+       do_btree_node_write(b);
 
        b->written += set_blocks(i, b->c);
        atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,
@@ -387,37 +405,31 @@ static void __btree_write(struct btree *b)
                bch_bset_init_next(b);
 }
 
-static void btree_write_work(struct work_struct *w)
+static void btree_node_write_work(struct work_struct *w)
 {
        struct btree *b = container_of(to_delayed_work(w), struct btree, work);
 
-       down_write(&b->lock);
+       rw_lock(true, b, b->level);
 
        if (btree_node_dirty(b))
-               __btree_write(b);
-       up_write(&b->lock);
+               bch_btree_node_write(b, NULL);
+       rw_unlock(true, b);
 }
 
-void bch_btree_write(struct btree *b, bool now, struct btree_op *op)
+static void bch_btree_leaf_dirty(struct btree *b, struct btree_op *op)
 {
        struct bset *i = b->sets[b->nsets].data;
        struct btree_write *w = btree_current_write(b);
 
-       BUG_ON(b->written &&
-              (b->written >= btree_blocks(b) ||
-               i->seq != b->sets[0].data->seq ||
-               !i->keys));
+       BUG_ON(!b->written);
+       BUG_ON(!i->keys);
 
-       if (!btree_node_dirty(b)) {
-               set_btree_node_dirty(b);
-               queue_delayed_work(btree_io_wq, &b->work,
-                                  msecs_to_jiffies(30000));
-       }
+       if (!btree_node_dirty(b))
+               queue_delayed_work(btree_io_wq, &b->work, 30 * HZ);
 
-       w->prio_blocked += b->prio_blocked;
-       b->prio_blocked = 0;
+       set_btree_node_dirty(b);
 
-       if (op && op->journal && !b->level) {
+       if (op && op->journal) {
                if (w->journal &&
                    journal_pin_cmp(b->c, w, op)) {
                        atomic_dec_bug(w->journal);
@@ -430,23 +442,10 @@ void bch_btree_write(struct btree *b, bool now, struct btree_op *op)
                }
        }
 
-       if (current->bio_list)
-               return;
-
        /* Force write if set is too big */
-       if (now ||
-           b->level ||
-           set_bytes(i) > PAGE_SIZE - 48) {
-               if (op && now) {
-                       /* Must wait on multiple writes */
-                       BUG_ON(w->owner);
-                       w->owner = &op->cl;
-                       closure_get(&op->cl);
-               }
-
-               __btree_write(b);
-       }
-       BUG_ON(!b->written);
+       if (set_bytes(i) > PAGE_SIZE - 48 &&
+           !current->bio_list)
+               bch_btree_node_write(b, NULL);
 }
 
 /*
@@ -559,7 +558,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,
        init_rwsem(&b->lock);
        lockdep_set_novalidate_class(&b->lock);
        INIT_LIST_HEAD(&b->list);
-       INIT_DELAYED_WORK(&b->work, btree_write_work);
+       INIT_DELAYED_WORK(&b->work, btree_node_write_work);
        b->c = c;
        closure_init_unlocked(&b->io);
 
@@ -582,7 +581,7 @@ static int mca_reap(struct btree *b, struct closure *cl, unsigned min_order)
        BUG_ON(btree_node_dirty(b) && !b->sets[0].data);
 
        if (cl && btree_node_dirty(b))
-               bch_btree_write(b, true, NULL);
+               bch_btree_node_write(b, NULL);
 
        if (cl)
                closure_wait_event_async(&b->io.wait, cl,
@@ -623,6 +622,13 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc)
        else if (!mutex_trylock(&c->bucket_lock))
                return -1;
 
+       /*
+        * It's _really_ critical that we don't free too many btree nodes - we
+        * have to always leave ourselves a reserve. The reserve is how we
+        * guarantee that allocating memory for a new btree node can always
+        * succeed, so that inserting keys into the btree can always succeed and
+        * IO can always make forward progress:
+        */
        nr /= c->btree_pages;
        nr = min_t(unsigned long, nr, mca_can_free(c));
 
@@ -766,6 +772,8 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k,
        int ret = -ENOMEM;
        struct btree *i;
 
+       trace_bcache_btree_cache_cannibalize(c);
+
        if (!cl)
                return ERR_PTR(-ENOMEM);
 
@@ -784,7 +792,6 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k,
                return ERR_PTR(-EAGAIN);
        }
 
-       /* XXX: tracepoint */
        c->try_harder = cl;
        c->try_harder_start = local_clock();
 retry:
@@ -905,6 +912,9 @@ retry:
        b = mca_find(c, k);
 
        if (!b) {
+               if (current->bio_list)
+                       return ERR_PTR(-EAGAIN);
+
                mutex_lock(&c->bucket_lock);
                b = mca_alloc(c, k, level, &op->cl);
                mutex_unlock(&c->bucket_lock);
@@ -914,7 +924,7 @@ retry:
                if (IS_ERR(b))
                        return b;
 
-               bch_btree_read(b);
+               bch_btree_node_read(b);
 
                if (!write)
                        downgrade_write(&b->lock);
@@ -937,15 +947,12 @@ retry:
        for (; i <= b->nsets; i++)
                prefetch(b->sets[i].data);
 
-       if (!closure_wait_event(&b->io.wait, &op->cl,
-                               btree_node_read_done(b))) {
-               rw_unlock(write, b);
-               b = ERR_PTR(-EAGAIN);
-       } else if (btree_node_io_error(b)) {
+       if (btree_node_io_error(b)) {
                rw_unlock(write, b);
-               b = ERR_PTR(-EIO);
-       } else
-               BUG_ON(!b->written);
+               return ERR_PTR(-EIO);
+       }
+
+       BUG_ON(!b->written);
 
        return b;
 }
@@ -959,7 +966,7 @@ static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level)
        mutex_unlock(&c->bucket_lock);
 
        if (!IS_ERR_OR_NULL(b)) {
-               bch_btree_read(b);
+               bch_btree_node_read(b);
                rw_unlock(true, b);
        }
 }
@@ -970,24 +977,19 @@ static void btree_node_free(struct btree *b, struct btree_op *op)
 {
        unsigned i;
 
+       trace_bcache_btree_node_free(b);
+
        /*
         * The BUG_ON() in btree_node_get() implies that we must have a write
         * lock on parent to free or even invalidate a node
         */
        BUG_ON(op->lock <= b->level);
        BUG_ON(b == b->c->root);
-       pr_debug("bucket %s", pbtree(b));
 
        if (btree_node_dirty(b))
                btree_complete_write(b, btree_current_write(b));
        clear_bit(BTREE_NODE_dirty, &b->flags);
 
-       if (b->prio_blocked &&
-           !atomic_sub_return(b->prio_blocked, &b->c->prio_blocked))
-               wake_up(&b->c->alloc_wait);
-
-       b->prio_blocked = 0;
-
        cancel_delayed_work(&b->work);
 
        mutex_lock(&b->c->bucket_lock);
@@ -1028,17 +1030,20 @@ retry:
                goto retry;
        }
 
-       set_btree_node_read_done(b);
        b->accessed = 1;
        bch_bset_init_next(b);
 
        mutex_unlock(&c->bucket_lock);
+
+       trace_bcache_btree_node_alloc(b);
        return b;
 err_free:
        bch_bucket_free(c, &k.key);
        __bkey_put(c, &k.key);
 err:
        mutex_unlock(&c->bucket_lock);
+
+       trace_bcache_btree_node_alloc_fail(b);
        return b;
 }
 
@@ -1137,11 +1142,8 @@ static int btree_gc_mark_node(struct btree *b, unsigned *keys,
                gc->nkeys++;
 
                gc->data += KEY_SIZE(k);
-               if (KEY_DIRTY(k)) {
+               if (KEY_DIRTY(k))
                        gc->dirty += KEY_SIZE(k);
-                       if (d)
-                               d->sectors_dirty_gc += KEY_SIZE(k);
-               }
        }
 
        for (t = b->sets; t <= &b->sets[b->nsets]; t++)
@@ -1166,14 +1168,11 @@ static struct btree *btree_gc_alloc(struct btree *b, struct bkey *k,
 
        if (!IS_ERR_OR_NULL(n)) {
                swap(b, n);
+               __bkey_put(b->c, &b->key);
 
                memcpy(k->ptr, b->key.ptr,
                       sizeof(uint64_t) * KEY_PTRS(&b->key));
 
-               __bkey_put(b->c, &b->key);
-               atomic_inc(&b->c->prio_blocked);
-               b->prio_blocked++;
-
                btree_node_free(n, op);
                up_write(&n->lock);
        }
@@ -1278,7 +1277,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op,
        btree_node_free(r->b, op);
        up_write(&r->b->lock);
 
-       pr_debug("coalesced %u nodes", nodes);
+       trace_bcache_btree_gc_coalesce(nodes);
 
        gc->nodes--;
        nodes--;
@@ -1293,14 +1292,9 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
        void write(struct btree *r)
        {
                if (!r->written)
-                       bch_btree_write(r, true, op);
-               else if (btree_node_dirty(r)) {
-                       BUG_ON(btree_current_write(r)->owner);
-                       btree_current_write(r)->owner = writes;
-                       closure_get(writes);
-
-                       bch_btree_write(r, true, NULL);
-               }
+                       bch_btree_node_write(r, &op->cl);
+               else if (btree_node_dirty(r))
+                       bch_btree_node_write(r, writes);
 
                up_write(&r->lock);
        }
@@ -1386,9 +1380,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op,
                ret = btree_gc_recurse(b, op, writes, gc);
 
        if (!b->written || btree_node_dirty(b)) {
-               atomic_inc(&b->c->prio_blocked);
-               b->prio_blocked++;
-               bch_btree_write(b, true, n ? op : NULL);
+               bch_btree_node_write(b, n ? &op->cl : NULL);
        }
 
        if (!IS_ERR_OR_NULL(n)) {
@@ -1405,7 +1397,6 @@ static void btree_gc_start(struct cache_set *c)
 {
        struct cache *ca;
        struct bucket *b;
-       struct bcache_device **d;
        unsigned i;
 
        if (!c->gc_mark_valid)
@@ -1419,16 +1410,12 @@ static void btree_gc_start(struct cache_set *c)
        for_each_cache(ca, c, i)
                for_each_bucket(b, ca) {
                        b->gc_gen = b->gen;
-                       if (!atomic_read(&b->pin))
+                       if (!atomic_read(&b->pin)) {
                                SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
+                               SET_GC_SECTORS_USED(b, 0);
+                       }
                }
 
-       for (d = c->devices;
-            d < c->devices + c->nr_uuids;
-            d++)
-               if (*d)
-                       (*d)->sectors_dirty_gc = 0;
-
        mutex_unlock(&c->bucket_lock);
 }
 
@@ -1437,7 +1424,6 @@ size_t bch_btree_gc_finish(struct cache_set *c)
        size_t available = 0;
        struct bucket *b;
        struct cache *ca;
-       struct bcache_device **d;
        unsigned i;
 
        mutex_lock(&c->bucket_lock);
@@ -1480,22 +1466,6 @@ size_t bch_btree_gc_finish(struct cache_set *c)
                }
        }
 
-       for (d = c->devices;
-            d < c->devices + c->nr_uuids;
-            d++)
-               if (*d) {
-                       unsigned long last =
-                               atomic_long_read(&((*d)->sectors_dirty));
-                       long difference = (*d)->sectors_dirty_gc - last;
-
-                       pr_debug("sectors dirty off by %li", difference);
-
-                       (*d)->sectors_dirty_last += difference;
-
-                       atomic_long_set(&((*d)->sectors_dirty),
-                                       (*d)->sectors_dirty_gc);
-               }
-
        mutex_unlock(&c->bucket_lock);
        return available;
 }
@@ -1508,10 +1478,9 @@ static void bch_btree_gc(struct closure *cl)
        struct gc_stat stats;
        struct closure writes;
        struct btree_op op;
-
        uint64_t start_time = local_clock();
-       trace_bcache_gc_start(c->sb.set_uuid);
-       blktrace_msg_all(c, "Starting gc");
+
+       trace_bcache_gc_start(c);
 
        memset(&stats, 0, sizeof(struct gc_stat));
        closure_init_stack(&writes);
@@ -1520,14 +1489,14 @@ static void bch_btree_gc(struct closure *cl)
 
        btree_gc_start(c);
 
+       atomic_inc(&c->prio_blocked);
+
        ret = btree_root(gc_root, c, &op, &writes, &stats);
        closure_sync(&op.cl);
        closure_sync(&writes);
 
        if (ret) {
-               blktrace_msg_all(c, "Stopped gc");
                pr_warn("gc failed!");
-
                continue_at(cl, bch_btree_gc, bch_gc_wq);
        }
 
@@ -1537,6 +1506,9 @@ static void bch_btree_gc(struct closure *cl)
 
        available = bch_btree_gc_finish(c);
 
+       atomic_dec(&c->prio_blocked);
+       wake_up_allocators(c);
+
        bch_time_stats_update(&c->btree_gc_time, start_time);
 
        stats.key_bytes *= sizeof(uint64_t);
@@ -1544,10 +1516,8 @@ static void bch_btree_gc(struct closure *cl)
        stats.data      <<= 9;
        stats.in_use    = (c->nbuckets - available) * 100 / c->nbuckets;
        memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat));
-       blktrace_msg_all(c, "Finished gc");
 
-       trace_bcache_gc_end(c->sb.set_uuid);
-       wake_up(&c->alloc_wait);
+       trace_bcache_gc_end(c);
 
        continue_at(cl, bch_moving_gc, bch_gc_wq);
 }
@@ -1654,14 +1624,14 @@ static bool fix_overlapping_extents(struct btree *b,
                                    struct btree_iter *iter,
                                    struct btree_op *op)
 {
-       void subtract_dirty(struct bkey *k, int sectors)
+       void subtract_dirty(struct bkey *k, uint64_t offset, int sectors)
        {
-               struct bcache_device *d = b->c->devices[KEY_INODE(k)];
-
-               if (KEY_DIRTY(k) && d)
-                       atomic_long_sub(sectors, &d->sectors_dirty);
+               if (KEY_DIRTY(k))
+                       bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
+                                                    offset, -sectors);
        }
 
+       uint64_t old_offset;
        unsigned old_size, sectors_found = 0;
 
        while (1) {
@@ -1673,6 +1643,7 @@ static bool fix_overlapping_extents(struct btree *b,
                if (bkey_cmp(k, &START_KEY(insert)) <= 0)
                        continue;
 
+               old_offset = KEY_START(k);
                old_size = KEY_SIZE(k);
 
                /*
@@ -1728,7 +1699,7 @@ static bool fix_overlapping_extents(struct btree *b,
 
                        struct bkey *top;
 
-                       subtract_dirty(k, KEY_SIZE(insert));
+                       subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert));
 
                        if (bkey_written(b, k)) {
                                /*
@@ -1775,7 +1746,7 @@ static bool fix_overlapping_extents(struct btree *b,
                        }
                }
 
-               subtract_dirty(k, old_size - KEY_SIZE(k));
+               subtract_dirty(k, old_offset, old_size - KEY_SIZE(k));
        }
 
 check_failed:
@@ -1798,7 +1769,7 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op,
 {
        struct bset *i = b->sets[b->nsets].data;
        struct bkey *m, *prev;
-       const char *status = "insert";
+       unsigned status = BTREE_INSERT_STATUS_INSERT;
 
        BUG_ON(bkey_cmp(k, &b->key) > 0);
        BUG_ON(b->level && !KEY_PTRS(k));
@@ -1831,17 +1802,17 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op,
                        goto insert;
 
                /* prev is in the tree, if we merge we're done */
-               status = "back merging";
+               status = BTREE_INSERT_STATUS_BACK_MERGE;
                if (prev &&
                    bch_bkey_try_merge(b, prev, k))
                        goto merged;
 
-               status = "overwrote front";
+               status = BTREE_INSERT_STATUS_OVERWROTE;
                if (m != end(i) &&
                    KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m))
                        goto copy;
 
-               status = "front merge";
+               status = BTREE_INSERT_STATUS_FRONT_MERGE;
                if (m != end(i) &&
                    bch_bkey_try_merge(b, k, m))
                        goto copy;
@@ -1851,21 +1822,21 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op,
 insert:        shift_keys(b, m, k);
 copy:  bkey_copy(m, k);
 merged:
-       bch_check_keys(b, "%s for %s at %s: %s", status,
-                      op_type(op), pbtree(b), pkey(k));
-       bch_check_key_order_msg(b, i, "%s for %s at %s: %s", status,
-                               op_type(op), pbtree(b), pkey(k));
+       if (KEY_DIRTY(k))
+               bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
+                                            KEY_START(k), KEY_SIZE(k));
+
+       bch_check_keys(b, "%u for %s", status, op_type(op));
 
        if (b->level && !KEY_OFFSET(k))
-               b->prio_blocked++;
+               btree_current_write(b)->prio_blocked++;
 
-       pr_debug("%s for %s at %s: %s", status,
-                op_type(op), pbtree(b), pkey(k));
+       trace_bcache_btree_insert_key(b, k, op->type, status);
 
        return true;
 }
 
-bool bch_btree_insert_keys(struct btree *b, struct btree_op *op)
+static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op)
 {
        bool ret = false;
        struct bkey *k;
@@ -1896,7 +1867,7 @@ bool bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
            should_split(b))
                goto out;
 
-       op->replace = KEY(op->inode, bio_end(bio), bio_sectors(bio));
+       op->replace = KEY(op->inode, bio_end_sector(bio), bio_sectors(bio));
 
        SET_KEY_PTRS(&op->replace, 1);
        get_random_bytes(&op->replace.ptr[0], sizeof(uint64_t));
@@ -1907,7 +1878,6 @@ bool bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
 
        BUG_ON(op->type != BTREE_INSERT);
        BUG_ON(!btree_insert_key(b, op, &tmp.k));
-       bch_btree_write(b, false, NULL);
        ret = true;
 out:
        downgrade_write(&b->lock);
@@ -1929,12 +1899,11 @@ static int btree_split(struct btree *b, struct btree_op *op)
 
        split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5;
 
-       pr_debug("%ssplitting at %s keys %i", split ? "" : "not ",
-                pbtree(b), n1->sets[0].data->keys);
-
        if (split) {
                unsigned keys = 0;
 
+               trace_bcache_btree_node_split(b, n1->sets[0].data->keys);
+
                n2 = bch_btree_node_alloc(b->c, b->level, &op->cl);
                if (IS_ERR(n2))
                        goto err_free1;
@@ -1967,18 +1936,21 @@ static int btree_split(struct btree *b, struct btree_op *op)
                bkey_copy_key(&n2->key, &b->key);
 
                bch_keylist_add(&op->keys, &n2->key);
-               bch_btree_write(n2, true, op);
+               bch_btree_node_write(n2, &op->cl);
                rw_unlock(true, n2);
-       } else
+       } else {
+               trace_bcache_btree_node_compact(b, n1->sets[0].data->keys);
+
                bch_btree_insert_keys(n1, op);
+       }
 
        bch_keylist_add(&op->keys, &n1->key);
-       bch_btree_write(n1, true, op);
+       bch_btree_node_write(n1, &op->cl);
 
        if (n3) {
                bkey_copy_key(&n3->key, &MAX_KEY);
                bch_btree_insert_keys(n3, op);
-               bch_btree_write(n3, true, op);
+               bch_btree_node_write(n3, &op->cl);
 
                closure_sync(&op->cl);
                bch_btree_set_root(n3);
@@ -2082,8 +2054,12 @@ static int bch_btree_insert_recurse(struct btree *b, struct btree_op *op,
 
                BUG_ON(write_block(b) != b->sets[b->nsets].data);
 
-               if (bch_btree_insert_keys(b, op))
-                       bch_btree_write(b, false, op);
+               if (bch_btree_insert_keys(b, op)) {
+                       if (!b->level)
+                               bch_btree_leaf_dirty(b, op);
+                       else
+                               bch_btree_node_write(b, &op->cl);
+               }
        }
 
        return 0;
@@ -2140,6 +2116,11 @@ int bch_btree_insert(struct btree_op *op, struct cache_set *c)
 void bch_btree_set_root(struct btree *b)
 {
        unsigned i;
+       struct closure cl;
+
+       closure_init_stack(&cl);
+
+       trace_bcache_btree_set_root(b);
 
        BUG_ON(!b->written);
 
@@ -2153,8 +2134,8 @@ void bch_btree_set_root(struct btree *b)
        b->c->root = b;
        __bkey_put(b->c, &b->key);
 
-       bch_journal_meta(b->c, NULL);
-       pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0));
+       bch_journal_meta(b->c, &cl);
+       closure_sync(&cl);
 }
 
 /* Cache lookup */
@@ -2215,9 +2196,6 @@ static int submit_partial_cache_hit(struct btree *b, struct btree_op *op,
                                         KEY_OFFSET(k) - bio->bi_sector);
 
                n = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split);
-               if (!n)
-                       return -EAGAIN;
-
                if (n == bio)
                        op->lookup_done = true;
 
@@ -2240,7 +2218,6 @@ static int submit_partial_cache_hit(struct btree *b, struct btree_op *op,
                n->bi_end_io    = bch_cache_read_endio;
                n->bi_private   = &s->cl;
 
-               trace_bcache_cache_hit(n);
                __bch_submit_bbio(n, b->c);
        }
 
@@ -2257,9 +2234,6 @@ int bch_btree_search_recurse(struct btree *b, struct btree_op *op)
        struct btree_iter iter;
        bch_btree_iter_init(b, &iter, &KEY(op->inode, bio->bi_sector, 0));
 
-       pr_debug("at %s searching for %u:%llu", pbtree(b), op->inode,
-                (uint64_t) bio->bi_sector);
-
        do {
                k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad);
                if (!k) {
@@ -2303,7 +2277,8 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l,
 }
 
 static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
-                                  struct keybuf *buf, struct bkey *end)
+                                  struct keybuf *buf, struct bkey *end,
+                                  keybuf_pred_fn *pred)
 {
        struct btree_iter iter;
        bch_btree_iter_init(b, &iter, &buf->last_scanned);
@@ -2322,11 +2297,9 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
                        if (bkey_cmp(&buf->last_scanned, end) >= 0)
                                break;
 
-                       if (buf->key_predicate(buf, k)) {
+                       if (pred(buf, k)) {
                                struct keybuf_key *w;
 
-                               pr_debug("%s", pkey(k));
-
                                spin_lock(&buf->lock);
 
                                w = array_alloc(&buf->freelist);
@@ -2343,7 +2316,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
                        if (!k)
                                break;
 
-                       btree(refill_keybuf, k, b, op, buf, end);
+                       btree(refill_keybuf, k, b, op, buf, end, pred);
                        /*
                         * Might get an error here, but can't really do anything
                         * and it'll get logged elsewhere. Just read what we
@@ -2361,7 +2334,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
 }
 
 void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
-                         struct bkey *end)
+                      struct bkey *end, keybuf_pred_fn *pred)
 {
        struct bkey start = buf->last_scanned;
        struct btree_op op;
@@ -2369,7 +2342,7 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
 
        cond_resched();
 
-       btree_root(refill_keybuf, c, &op, buf, end);
+       btree_root(refill_keybuf, c, &op, buf, end, pred);
        closure_sync(&op.cl);
 
        pr_debug("found %s keys from %llu:%llu to %llu:%llu",
@@ -2455,7 +2428,8 @@ struct keybuf_key *bch_keybuf_next(struct keybuf *buf)
 
 struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
                                             struct keybuf *buf,
-                                            struct bkey *end)
+                                            struct bkey *end,
+                                            keybuf_pred_fn *pred)
 {
        struct keybuf_key *ret;
 
@@ -2469,15 +2443,14 @@ struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
                        break;
                }
 
-               bch_refill_keybuf(c, buf, end);
+               bch_refill_keybuf(c, buf, end, pred);
        }
 
        return ret;
 }
 
-void bch_keybuf_init(struct keybuf *buf, keybuf_pred_fn *fn)
+void bch_keybuf_init(struct keybuf *buf)
 {
-       buf->key_predicate      = fn;
        buf->last_scanned       = MAX_KEY;
        buf->keys               = RB_ROOT;
 
index af4a7092a28c89d47bf2b4165312b7988f1921a6..3333d3723633c424e0a22d51d7475969392c1130 100644 (file)
 #include "debug.h"
 
 struct btree_write {
-       struct closure          *owner;
        atomic_t                *journal;
 
        /* If btree_split() frees a btree node, it writes a new pointer to that
@@ -142,16 +141,12 @@ struct btree {
         */
        struct bset_tree        sets[MAX_BSETS];
 
-       /* Used to refcount bio splits, also protects b->bio */
+       /* For outstanding btree writes, used as a lock - protects write_idx */
        struct closure_with_waitlist    io;
 
-       /* Gets transferred to w->prio_blocked - see the comment there */
-       int                     prio_blocked;
-
        struct list_head        list;
        struct delayed_work     work;
 
-       uint64_t                io_start_time;
        struct btree_write      writes[2];
        struct bio              *bio;
 };
@@ -164,13 +159,11 @@ static inline void set_btree_node_ ## flag(struct btree *b)               \
 {      set_bit(BTREE_NODE_ ## flag, &b->flags); }                      \
 
 enum btree_flags {
-       BTREE_NODE_read_done,
        BTREE_NODE_io_error,
        BTREE_NODE_dirty,
        BTREE_NODE_write_idx,
 };
 
-BTREE_FLAG(read_done);
 BTREE_FLAG(io_error);
 BTREE_FLAG(dirty);
 BTREE_FLAG(write_idx);
@@ -278,6 +271,13 @@ struct btree_op {
        BKEY_PADDED(replace);
 };
 
+enum {
+       BTREE_INSERT_STATUS_INSERT,
+       BTREE_INSERT_STATUS_BACK_MERGE,
+       BTREE_INSERT_STATUS_OVERWROTE,
+       BTREE_INSERT_STATUS_FRONT_MERGE,
+};
+
 void bch_btree_op_init_stack(struct btree_op *);
 
 static inline void rw_lock(bool w, struct btree *b, int level)
@@ -293,9 +293,7 @@ static inline void rw_unlock(bool w, struct btree *b)
 #ifdef CONFIG_BCACHE_EDEBUG
        unsigned i;
 
-       if (w &&
-           b->key.ptr[0] &&
-           btree_node_read_done(b))
+       if (w && b->key.ptr[0])
                for (i = 0; i <= b->nsets; i++)
                        bch_check_key_order(b, b->sets[i].data);
 #endif
@@ -370,9 +368,8 @@ static inline bool should_split(struct btree *b)
                 > btree_blocks(b));
 }
 
-void bch_btree_read_done(struct closure *);
-void bch_btree_read(struct btree *);
-void bch_btree_write(struct btree *b, bool now, struct btree_op *op);
+void bch_btree_node_read(struct btree *);
+void bch_btree_node_write(struct btree *, struct closure *);
 
 void bch_cannibalize_unlock(struct cache_set *, struct closure *);
 void bch_btree_set_root(struct btree *);
@@ -380,7 +377,6 @@ struct btree *bch_btree_node_alloc(struct cache_set *, int, struct closure *);
 struct btree *bch_btree_node_get(struct cache_set *, struct bkey *,
                                int, struct btree_op *);
 
-bool bch_btree_insert_keys(struct btree *, struct btree_op *);
 bool bch_btree_insert_check_key(struct btree *, struct btree_op *,
                                   struct bio *);
 int bch_btree_insert(struct btree_op *, struct cache_set *);
@@ -393,13 +389,14 @@ void bch_moving_gc(struct closure *);
 int bch_btree_check(struct cache_set *, struct btree_op *);
 uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *);
 
-void bch_keybuf_init(struct keybuf *, keybuf_pred_fn *);
-void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *);
+void bch_keybuf_init(struct keybuf *);
+void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *,
+                      keybuf_pred_fn *);
 bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *,
                                  struct bkey *);
 void bch_keybuf_del(struct keybuf *, struct keybuf_key *);
 struct keybuf_key *bch_keybuf_next(struct keybuf *);
-struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *,
-                                         struct keybuf *, struct bkey *);
+struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *,
+                                         struct bkey *, keybuf_pred_fn *);
 
 #endif
index bd05a9a8c7cf933e028be5eefbd1cd178681e495..9aba2017f0d1685ac5858ccf716c1f829681c853 100644 (file)
@@ -66,16 +66,18 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
                } else {
                        struct closure *parent = cl->parent;
                        struct closure_waitlist *wait = closure_waitlist(cl);
+                       closure_fn *destructor = cl->fn;
 
                        closure_debug_destroy(cl);
 
+                       smp_mb();
                        atomic_set(&cl->remaining, -1);
 
                        if (wait)
                                closure_wake_up(wait);
 
-                       if (cl->fn)
-                               cl->fn(cl);
+                       if (destructor)
+                               destructor(cl);
 
                        if (parent)
                                closure_put(parent);
index 89fd5204924e88bc2486d0616c291279161c51bb..88e6411eab4f15829ba22c535a7b100b370bc5a6 100644 (file)
@@ -47,11 +47,10 @@ const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
        return "";
 }
 
-struct keyprint_hack bch_pkey(const struct bkey *k)
+int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k)
 {
        unsigned i = 0;
-       struct keyprint_hack r;
-       char *out = r.s, *end = r.s + KEYHACK_SIZE;
+       char *out = buf, *end = buf + size;
 
 #define p(...) (out += scnprintf(out, end - out, __VA_ARGS__))
 
@@ -75,16 +74,14 @@ struct keyprint_hack bch_pkey(const struct bkey *k)
        if (KEY_CSUM(k))
                p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]);
 #undef p
-       return r;
+       return out - buf;
 }
 
-struct keyprint_hack bch_pbtree(const struct btree *b)
+int bch_btree_to_text(char *buf, size_t size, const struct btree *b)
 {
-       struct keyprint_hack r;
-
-       snprintf(r.s, 40, "%zu level %i/%i", PTR_BUCKET_NR(b->c, &b->key, 0),
-                b->level, b->c->root ? b->c->root->level : -1);
-       return r;
+       return scnprintf(buf, size, "%zu level %i/%i",
+                        PTR_BUCKET_NR(b->c, &b->key, 0),
+                        b->level, b->c->root ? b->c->root->level : -1);
 }
 
 #if defined(CONFIG_BCACHE_DEBUG) || defined(CONFIG_BCACHE_EDEBUG)
@@ -100,10 +97,12 @@ static void dump_bset(struct btree *b, struct bset *i)
 {
        struct bkey *k;
        unsigned j;
+       char buf[80];
 
        for (k = i->start; k < end(i); k = bkey_next(k)) {
+               bch_bkey_to_text(buf, sizeof(buf), k);
                printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b),
-                      (uint64_t *) k - i->d, i->keys, pkey(k));
+                      (uint64_t *) k - i->d, i->keys, buf);
 
                for (j = 0; j < KEY_PTRS(k); j++) {
                        size_t n = PTR_BUCKET_NR(b->c, k, j);
@@ -144,7 +143,7 @@ void bch_btree_verify(struct btree *b, struct bset *new)
        v->written = 0;
        v->level = b->level;
 
-       bch_btree_read(v);
+       bch_btree_node_read(v);
        closure_wait_event(&v->io.wait, &cl,
                           atomic_read(&b->io.cl.remaining) == -1);
 
@@ -200,7 +199,7 @@ void bch_data_verify(struct search *s)
        if (!check)
                return;
 
-       if (bch_bio_alloc_pages(check, GFP_NOIO))
+       if (bio_alloc_pages(check, GFP_NOIO))
                goto out_put;
 
        check->bi_rw            = READ_SYNC;
@@ -252,6 +251,7 @@ static void vdump_bucket_and_panic(struct btree *b, const char *fmt,
                                   va_list args)
 {
        unsigned i;
+       char buf[80];
 
        console_lock();
 
@@ -262,7 +262,8 @@ static void vdump_bucket_and_panic(struct btree *b, const char *fmt,
 
        console_unlock();
 
-       panic("at %s\n", pbtree(b));
+       bch_btree_to_text(buf, sizeof(buf), b);
+       panic("at %s\n", buf);
 }
 
 void bch_check_key_order_msg(struct btree *b, struct bset *i,
@@ -337,6 +338,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
 {
        struct dump_iterator *i = file->private_data;
        ssize_t ret = 0;
+       char kbuf[80];
 
        while (size) {
                struct keybuf_key *w;
@@ -355,11 +357,12 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
                if (i->bytes)
                        break;
 
-               w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY);
+               w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY, dump_pred);
                if (!w)
                        break;
 
-               i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", pkey(&w->key));
+               bch_bkey_to_text(kbuf, sizeof(kbuf), &w->key);
+               i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", kbuf);
                bch_keybuf_del(&i->keys, w);
        }
 
@@ -377,7 +380,7 @@ static int bch_dump_open(struct inode *inode, struct file *file)
 
        file->private_data = i;
        i->c = c;
-       bch_keybuf_init(&i->keys, dump_pred);
+       bch_keybuf_init(&i->keys);
        i->keys.last_scanned = KEY(0, 0, 0);
 
        return 0;
@@ -409,142 +412,6 @@ void bch_debug_init_cache_set(struct cache_set *c)
 
 #endif
 
-/* Fuzz tester has rotted: */
-#if 0
-
-static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a,
-                         const char *buffer, size_t size)
-{
-       void dump(struct btree *b)
-       {
-               struct bset *i;
-
-               for (i = b->sets[0].data;
-                    index(i, b) < btree_blocks(b) &&
-                    i->seq == b->sets[0].data->seq;
-                    i = ((void *) i) + set_blocks(i, b->c) * block_bytes(b->c))
-                       dump_bset(b, i);
-       }
-
-       struct cache_sb *sb;
-       struct cache_set *c;
-       struct btree *all[3], *b, *fill, *orig;
-       int j;
-
-       struct btree_op op;
-       bch_btree_op_init_stack(&op);
-
-       sb = kzalloc(sizeof(struct cache_sb), GFP_KERNEL);
-       if (!sb)
-               return -ENOMEM;
-
-       sb->bucket_size = 128;
-       sb->block_size = 4;
-
-       c = bch_cache_set_alloc(sb);
-       if (!c)
-               return -ENOMEM;
-
-       for (j = 0; j < 3; j++) {
-               BUG_ON(list_empty(&c->btree_cache));
-               all[j] = list_first_entry(&c->btree_cache, struct btree, list);
-               list_del_init(&all[j]->list);
-
-               all[j]->key = KEY(0, 0, c->sb.bucket_size);
-               bkey_copy_key(&all[j]->key, &MAX_KEY);
-       }
-
-       b = all[0];
-       fill = all[1];
-       orig = all[2];
-
-       while (1) {
-               for (j = 0; j < 3; j++)
-                       all[j]->written = all[j]->nsets = 0;
-
-               bch_bset_init_next(b);
-
-               while (1) {
-                       struct bset *i = write_block(b);
-                       struct bkey *k = op.keys.top;
-                       unsigned rand;
-
-                       bkey_init(k);
-                       rand = get_random_int();
-
-                       op.type = rand & 1
-                               ? BTREE_INSERT
-                               : BTREE_REPLACE;
-                       rand >>= 1;
-
-                       SET_KEY_SIZE(k, bucket_remainder(c, rand));
-                       rand >>= c->bucket_bits;
-                       rand &= 1024 * 512 - 1;
-                       rand += c->sb.bucket_size;
-                       SET_KEY_OFFSET(k, rand);
-#if 0
-                       SET_KEY_PTRS(k, 1);
-#endif
-                       bch_keylist_push(&op.keys);
-                       bch_btree_insert_keys(b, &op);
-
-                       if (should_split(b) ||
-                           set_blocks(i, b->c) !=
-                           __set_blocks(i, i->keys + 15, b->c)) {
-                               i->csum = csum_set(i);
-
-                               memcpy(write_block(fill),
-                                      i, set_bytes(i));
-
-                               b->written += set_blocks(i, b->c);
-                               fill->written = b->written;
-                               if (b->written == btree_blocks(b))
-                                       break;
-
-                               bch_btree_sort_lazy(b);
-                               bch_bset_init_next(b);
-                       }
-               }
-
-               memcpy(orig->sets[0].data,
-                      fill->sets[0].data,
-                      btree_bytes(c));
-
-               bch_btree_sort(b);
-               fill->written = 0;
-               bch_btree_read_done(&fill->io.cl);
-
-               if (b->sets[0].data->keys != fill->sets[0].data->keys ||
-                   memcmp(b->sets[0].data->start,
-                          fill->sets[0].data->start,
-                          b->sets[0].data->keys * sizeof(uint64_t))) {
-                       struct bset *i = b->sets[0].data;
-                       struct bkey *k, *l;
-
-                       for (k = i->start,
-                            l = fill->sets[0].data->start;
-                            k < end(i);
-                            k = bkey_next(k), l = bkey_next(l))
-                               if (bkey_cmp(k, l) ||
-                                   KEY_SIZE(k) != KEY_SIZE(l))
-                                       pr_err("key %zi differs: %s != %s",
-                                              (uint64_t *) k - i->d,
-                                              pkey(k), pkey(l));
-
-                       for (j = 0; j < 3; j++) {
-                               pr_err("**** Set %i ****", j);
-                               dump(all[j]);
-                       }
-                       panic("\n");
-               }
-
-               pr_info("fuzz complete: %i keys", b->sets[0].data->keys);
-       }
-}
-
-kobj_attribute_write(fuzz, btree_fuzz);
-#endif
-
 void bch_debug_exit(void)
 {
        if (!IS_ERR_OR_NULL(debug))
@@ -554,11 +421,6 @@ void bch_debug_exit(void)
 int __init bch_debug_init(struct kobject *kobj)
 {
        int ret = 0;
-#if 0
-       ret = sysfs_create_file(kobj, &ksysfs_fuzz.attr);
-       if (ret)
-               return ret;
-#endif
 
        debug = debugfs_create_dir("bcache", NULL);
        return ret;
index f9378a21814871033a9bc2218d184e6e422d2bd4..1c39b5a2489bd1158782f0afb1a28ef4fcb0a7a4 100644 (file)
@@ -3,15 +3,8 @@
 
 /* Btree/bkey debug printing */
 
-#define KEYHACK_SIZE 80
-struct keyprint_hack {
-       char s[KEYHACK_SIZE];
-};
-
-struct keyprint_hack bch_pkey(const struct bkey *k);
-struct keyprint_hack bch_pbtree(const struct btree *b);
-#define pkey(k)                (&bch_pkey(k).s[0])
-#define pbtree(b)      (&bch_pbtree(b).s[0])
+int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k);
+int bch_btree_to_text(char *buf, size_t size, const struct btree *b);
 
 #ifdef CONFIG_BCACHE_EDEBUG
 
index 48efd4dea645bde1b12cf69927e9f628fd1ae077..9056632995b1b8a2de9de607c60cd693704c9c65 100644 (file)
@@ -9,6 +9,8 @@
 #include "bset.h"
 #include "debug.h"
 
+#include <linux/blkdev.h>
+
 static void bch_bi_idx_hack_endio(struct bio *bio, int error)
 {
        struct bio *p = bio->bi_private;
@@ -66,13 +68,6 @@ static void bch_generic_make_request_hack(struct bio *bio)
  * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a
  * bvec boundry; it is the caller's responsibility to ensure that @bio is not
  * freed before the split.
- *
- * If bch_bio_split() is running under generic_make_request(), it's not safe to
- * allocate more than one bio from the same bio set. Therefore, if it is running
- * under generic_make_request() it masks out __GFP_WAIT when doing the
- * allocation. The caller must check for failure if there's any possibility of
- * it being called from under generic_make_request(); it is then the caller's
- * responsibility to retry from a safe context (by e.g. punting to workqueue).
  */
 struct bio *bch_bio_split(struct bio *bio, int sectors,
                          gfp_t gfp, struct bio_set *bs)
@@ -83,20 +78,13 @@ struct bio *bch_bio_split(struct bio *bio, int sectors,
 
        BUG_ON(sectors <= 0);
 
-       /*
-        * If we're being called from underneath generic_make_request() and we
-        * already allocated any bios from this bio set, we risk deadlock if we
-        * use the mempool. So instead, we possibly fail and let the caller punt
-        * to workqueue or somesuch and retry in a safe context.
-        */
-       if (current->bio_list)
-               gfp &= ~__GFP_WAIT;
-
        if (sectors >= bio_sectors(bio))
                return bio;
 
        if (bio->bi_rw & REQ_DISCARD) {
                ret = bio_alloc_bioset(gfp, 1, bs);
+               if (!ret)
+                       return NULL;
                idx = 0;
                goto out;
        }
@@ -160,17 +148,18 @@ static unsigned bch_bio_max_sectors(struct bio *bio)
        struct request_queue *q = bdev_get_queue(bio->bi_bdev);
        unsigned max_segments = min_t(unsigned, BIO_MAX_PAGES,
                                      queue_max_segments(q));
-       struct bio_vec *bv, *end = bio_iovec(bio) +
-               min_t(int, bio_segments(bio), max_segments);
 
        if (bio->bi_rw & REQ_DISCARD)
                return min(ret, q->limits.max_discard_sectors);
 
        if (bio_segments(bio) > max_segments ||
            q->merge_bvec_fn) {
+               struct bio_vec *bv;
+               int i, seg = 0;
+
                ret = 0;
 
-               for (bv = bio_iovec(bio); bv < end; bv++) {
+               bio_for_each_segment(bv, bio, i) {
                        struct bvec_merge_data bvm = {
                                .bi_bdev        = bio->bi_bdev,
                                .bi_sector      = bio->bi_sector,
@@ -178,10 +167,14 @@ static unsigned bch_bio_max_sectors(struct bio *bio)
                                .bi_rw          = bio->bi_rw,
                        };
 
+                       if (seg == max_segments)
+                               break;
+
                        if (q->merge_bvec_fn &&
                            q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len)
                                break;
 
+                       seg++;
                        ret += bv->bv_len >> 9;
                }
        }
@@ -218,30 +211,10 @@ static void bch_bio_submit_split_endio(struct bio *bio, int error)
        closure_put(cl);
 }
 
-static void __bch_bio_submit_split(struct closure *cl)
-{
-       struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl);
-       struct bio *bio = s->bio, *n;
-
-       do {
-               n = bch_bio_split(bio, bch_bio_max_sectors(bio),
-                                 GFP_NOIO, s->p->bio_split);
-               if (!n)
-                       continue_at(cl, __bch_bio_submit_split, system_wq);
-
-               n->bi_end_io    = bch_bio_submit_split_endio;
-               n->bi_private   = cl;
-
-               closure_get(cl);
-               bch_generic_make_request_hack(n);
-       } while (n != bio);
-
-       continue_at(cl, bch_bio_submit_split_done, NULL);
-}
-
 void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p)
 {
        struct bio_split_hook *s;
+       struct bio *n;
 
        if (!bio_has_data(bio) && !(bio->bi_rw & REQ_DISCARD))
                goto submit;
@@ -250,6 +223,7 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p)
                goto submit;
 
        s = mempool_alloc(p->bio_split_hook, GFP_NOIO);
+       closure_init(&s->cl, NULL);
 
        s->bio          = bio;
        s->p            = p;
@@ -257,8 +231,18 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p)
        s->bi_private   = bio->bi_private;
        bio_get(bio);
 
-       closure_call(&s->cl, __bch_bio_submit_split, NULL, NULL);
-       return;
+       do {
+               n = bch_bio_split(bio, bch_bio_max_sectors(bio),
+                                 GFP_NOIO, s->p->bio_split);
+
+               n->bi_end_io    = bch_bio_submit_split_endio;
+               n->bi_private   = &s->cl;
+
+               closure_get(&s->cl);
+               bch_generic_make_request_hack(n);
+       } while (n != bio);
+
+       continue_at(&s->cl, bch_bio_submit_split_done, NULL);
 submit:
        bch_generic_make_request_hack(bio);
 }
index 8c8dfdcd9d4cc9bc4a3d0799b98401855b280bcc..ba95ab84b2be5a5a32292625d229bb29e051a53d 100644 (file)
@@ -9,6 +9,8 @@
 #include "debug.h"
 #include "request.h"
 
+#include <trace/events/bcache.h>
+
 /*
  * Journal replay/recovery:
  *
@@ -182,9 +184,14 @@ bsearch:
                pr_debug("starting binary search, l %u r %u", l, r);
 
                while (l + 1 < r) {
+                       seq = list_entry(list->prev, struct journal_replay,
+                                        list)->j.seq;
+
                        m = (l + r) >> 1;
+                       read_bucket(m);
 
-                       if (read_bucket(m))
+                       if (seq != list_entry(list->prev, struct journal_replay,
+                                             list)->j.seq)
                                l = m;
                        else
                                r = m;
@@ -300,7 +307,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list,
                for (k = i->j.start;
                     k < end(&i->j);
                     k = bkey_next(k)) {
-                       pr_debug("%s", pkey(k));
+                       trace_bcache_journal_replay_key(k);
+
                        bkey_copy(op->keys.top, k);
                        bch_keylist_push(&op->keys);
 
@@ -384,7 +392,7 @@ out:
                return;
 found:
        if (btree_node_dirty(best))
-               bch_btree_write(best, true, NULL);
+               bch_btree_node_write(best, NULL);
        rw_unlock(true, best);
 }
 
@@ -617,7 +625,7 @@ static void journal_write_unlocked(struct closure *cl)
                bio_reset(bio);
                bio->bi_sector  = PTR_OFFSET(k, i);
                bio->bi_bdev    = ca->bdev;
-               bio->bi_rw      = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH;
+               bio->bi_rw      = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA;
                bio->bi_size    = sectors << 9;
 
                bio->bi_end_io  = journal_write_endio;
@@ -712,7 +720,8 @@ void bch_journal(struct closure *cl)
        spin_lock(&c->journal.lock);
 
        if (journal_full(&c->journal)) {
-               /* XXX: tracepoint */
+               trace_bcache_journal_full(c);
+
                closure_wait(&c->journal.wait, cl);
 
                journal_reclaim(c);
@@ -728,13 +737,15 @@ void bch_journal(struct closure *cl)
 
        if (b * c->sb.block_size > PAGE_SECTORS << JSET_BITS ||
            b > c->journal.blocks_free) {
-               /* XXX: If we were inserting so many keys that they won't fit in
+               trace_bcache_journal_entry_full(c);
+
+               /*
+                * XXX: If we were inserting so many keys that they won't fit in
                 * an _empty_ journal write, we'll deadlock. For now, handle
                 * this in bch_keylist_realloc() - but something to think about.
                 */
                BUG_ON(!w->data->keys);
 
-               /* XXX: tracepoint */
                BUG_ON(!closure_wait(&w->wait, cl));
 
                closure_flush(&c->journal.io);
index 8589512c972e3cfd1e973d53783313802b4db6ca..1a3b4f4786c3cee411d16c3a9a453e1fcd33b681 100644 (file)
@@ -9,6 +9,8 @@
 #include "debug.h"
 #include "request.h"
 
+#include <trace/events/bcache.h>
+
 struct moving_io {
        struct keybuf_key       *w;
        struct search           s;
@@ -44,14 +46,14 @@ static void write_moving_finish(struct closure *cl)
 {
        struct moving_io *io = container_of(cl, struct moving_io, s.cl);
        struct bio *bio = &io->bio.bio;
-       struct bio_vec *bv = bio_iovec_idx(bio, bio->bi_vcnt);
+       struct bio_vec *bv;
+       int i;
 
-       while (bv-- != bio->bi_io_vec)
+       bio_for_each_segment_all(bv, bio, i)
                __free_page(bv->bv_page);
 
-       pr_debug("%s %s", io->s.op.insert_collision
-                ? "collision moving" : "moved",
-                pkey(&io->w->key));
+       if (io->s.op.insert_collision)
+               trace_bcache_gc_copy_collision(&io->w->key);
 
        bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w);
 
@@ -94,8 +96,6 @@ static void write_moving(struct closure *cl)
        struct moving_io *io = container_of(s, struct moving_io, s);
 
        if (!s->error) {
-               trace_bcache_write_moving(&io->bio.bio);
-
                moving_init(io);
 
                io->bio.bio.bi_sector   = KEY_START(&io->w->key);
@@ -122,7 +122,6 @@ static void read_moving_submit(struct closure *cl)
        struct moving_io *io = container_of(s, struct moving_io, s);
        struct bio *bio = &io->bio.bio;
 
-       trace_bcache_read_moving(bio);
        bch_submit_bbio(bio, s->op.c, &io->w->key, 0);
 
        continue_at(cl, write_moving, bch_gc_wq);
@@ -138,7 +137,8 @@ static void read_moving(struct closure *cl)
        /* XXX: if we error, background writeback could stall indefinitely */
 
        while (!test_bit(CACHE_SET_STOPPING, &c->flags)) {
-               w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, &MAX_KEY);
+               w = bch_keybuf_next_rescan(c, &c->moving_gc_keys,
+                                          &MAX_KEY, moving_pred);
                if (!w)
                        break;
 
@@ -159,10 +159,10 @@ static void read_moving(struct closure *cl)
                bio->bi_rw      = READ;
                bio->bi_end_io  = read_moving_endio;
 
-               if (bch_bio_alloc_pages(bio, GFP_KERNEL))
+               if (bio_alloc_pages(bio, GFP_KERNEL))
                        goto err;
 
-               pr_debug("%s", pkey(&w->key));
+               trace_bcache_gc_copy(&w->key);
 
                closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl);
 
@@ -250,5 +250,5 @@ void bch_moving_gc(struct closure *cl)
 
 void bch_moving_init_cache_set(struct cache_set *c)
 {
-       bch_keybuf_init(&c->moving_gc_keys, moving_pred);
+       bch_keybuf_init(&c->moving_gc_keys);
 }
index e5ff12e52d5b277bea9cc4fa4aa5eb5bdf23540a..786a1a4f74d853fafe3ab2ae263d2ecf780134aa 100644 (file)
@@ -10,6 +10,7 @@
 #include "btree.h"
 #include "debug.h"
 #include "request.h"
+#include "writeback.h"
 
 #include <linux/cgroup.h>
 #include <linux/module.h>
@@ -21,8 +22,6 @@
 
 #define CUTOFF_CACHE_ADD       95
 #define CUTOFF_CACHE_READA     90
-#define CUTOFF_WRITEBACK       50
-#define CUTOFF_WRITEBACK_SYNC  75
 
 struct kmem_cache *bch_search_cache;
 
@@ -489,6 +488,12 @@ static void bch_insert_data_loop(struct closure *cl)
                bch_queue_gc(op->c);
        }
 
+       /*
+        * Journal writes are marked REQ_FLUSH; if the original write was a
+        * flush, it'll wait on the journal write.
+        */
+       bio->bi_rw &= ~(REQ_FLUSH|REQ_FUA);
+
        do {
                unsigned i;
                struct bkey *k;
@@ -510,10 +515,6 @@ static void bch_insert_data_loop(struct closure *cl)
                        goto err;
 
                n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split);
-               if (!n) {
-                       __bkey_put(op->c, k);
-                       continue_at(cl, bch_insert_data_loop, bcache_wq);
-               }
 
                n->bi_end_io    = bch_insert_data_endio;
                n->bi_private   = cl;
@@ -530,10 +531,9 @@ static void bch_insert_data_loop(struct closure *cl)
                if (KEY_CSUM(k))
                        bio_csum(n, k);
 
-               pr_debug("%s", pkey(k));
+               trace_bcache_cache_insert(k);
                bch_keylist_push(&op->keys);
 
-               trace_bcache_cache_insert(n, n->bi_sector, n->bi_bdev);
                n->bi_rw |= REQ_WRITE;
                bch_submit_bbio(n, op->c, k, 0);
        } while (n != bio);
@@ -716,7 +716,7 @@ static struct search *search_alloc(struct bio *bio, struct bcache_device *d)
        s->task                 = current;
        s->orig_bio             = bio;
        s->write                = (bio->bi_rw & REQ_WRITE) != 0;
-       s->op.flush_journal     = (bio->bi_rw & REQ_FLUSH) != 0;
+       s->op.flush_journal     = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0;
        s->op.skip              = (bio->bi_rw & REQ_DISCARD) != 0;
        s->recoverable          = 1;
        s->start_time           = jiffies;
@@ -784,11 +784,8 @@ static void request_read_error(struct closure *cl)
        int i;
 
        if (s->recoverable) {
-               /* The cache read failed, but we can retry from the backing
-                * device.
-                */
-               pr_debug("recovering at sector %llu",
-                        (uint64_t) s->orig_bio->bi_sector);
+               /* Retry from the backing device: */
+               trace_bcache_read_retry(s->orig_bio);
 
                s->error = 0;
                bv = s->bio.bio.bi_io_vec;
@@ -806,7 +803,6 @@ static void request_read_error(struct closure *cl)
 
                /* XXX: invalidate cache */
 
-               trace_bcache_read_retry(&s->bio.bio);
                closure_bio_submit(&s->bio.bio, &s->cl, s->d);
        }
 
@@ -827,53 +823,13 @@ static void request_read_done(struct closure *cl)
         */
 
        if (s->op.cache_bio) {
-               struct bio_vec *src, *dst;
-               unsigned src_offset, dst_offset, bytes;
-               void *dst_ptr;
-
                bio_reset(s->op.cache_bio);
                s->op.cache_bio->bi_sector      = s->cache_miss->bi_sector;
                s->op.cache_bio->bi_bdev        = s->cache_miss->bi_bdev;
                s->op.cache_bio->bi_size        = s->cache_bio_sectors << 9;
                bch_bio_map(s->op.cache_bio, NULL);
 
-               src = bio_iovec(s->op.cache_bio);
-               dst = bio_iovec(s->cache_miss);
-               src_offset = src->bv_offset;
-               dst_offset = dst->bv_offset;
-               dst_ptr = kmap(dst->bv_page);
-
-               while (1) {
-                       if (dst_offset == dst->bv_offset + dst->bv_len) {
-                               kunmap(dst->bv_page);
-                               dst++;
-                               if (dst == bio_iovec_idx(s->cache_miss,
-                                               s->cache_miss->bi_vcnt))
-                                       break;
-
-                               dst_offset = dst->bv_offset;
-                               dst_ptr = kmap(dst->bv_page);
-                       }
-
-                       if (src_offset == src->bv_offset + src->bv_len) {
-                               src++;
-                               if (src == bio_iovec_idx(s->op.cache_bio,
-                                                s->op.cache_bio->bi_vcnt))
-                                       BUG();
-
-                               src_offset = src->bv_offset;
-                       }
-
-                       bytes = min(dst->bv_offset + dst->bv_len - dst_offset,
-                                   src->bv_offset + src->bv_len - src_offset);
-
-                       memcpy(dst_ptr + dst_offset,
-                              page_address(src->bv_page) + src_offset,
-                              bytes);
-
-                       src_offset      += bytes;
-                       dst_offset      += bytes;
-               }
+               bio_copy_data(s->cache_miss, s->op.cache_bio);
 
                bio_put(s->cache_miss);
                s->cache_miss = NULL;
@@ -899,6 +855,7 @@ static void request_read_done_bh(struct closure *cl)
        struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
 
        bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip);
+       trace_bcache_read(s->orig_bio, !s->cache_miss, s->op.skip);
 
        if (s->error)
                continue_at_nobarrier(cl, request_read_error, bcache_wq);
@@ -917,9 +874,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
        struct bio *miss;
 
        miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split);
-       if (!miss)
-               return -EAGAIN;
-
        if (miss == bio)
                s->op.lookup_done = true;
 
@@ -938,8 +892,9 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
                reada = min(dc->readahead >> 9,
                            sectors - bio_sectors(miss));
 
-               if (bio_end(miss) + reada > bdev_sectors(miss->bi_bdev))
-                       reada = bdev_sectors(miss->bi_bdev) - bio_end(miss);
+               if (bio_end_sector(miss) + reada > bdev_sectors(miss->bi_bdev))
+                       reada = bdev_sectors(miss->bi_bdev) -
+                               bio_end_sector(miss);
        }
 
        s->cache_bio_sectors = bio_sectors(miss) + reada;
@@ -963,13 +918,12 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
                goto out_put;
 
        bch_bio_map(s->op.cache_bio, NULL);
-       if (bch_bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO))
+       if (bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO))
                goto out_put;
 
        s->cache_miss = miss;
        bio_get(s->op.cache_bio);
 
-       trace_bcache_cache_miss(s->orig_bio);
        closure_bio_submit(s->op.cache_bio, &s->cl, s->d);
 
        return ret;
@@ -1002,24 +956,13 @@ static void cached_dev_write_complete(struct closure *cl)
        cached_dev_bio_complete(cl);
 }
 
-static bool should_writeback(struct cached_dev *dc, struct bio *bio)
-{
-       unsigned threshold = (bio->bi_rw & REQ_SYNC)
-               ? CUTOFF_WRITEBACK_SYNC
-               : CUTOFF_WRITEBACK;
-
-       return !atomic_read(&dc->disk.detaching) &&
-               cache_mode(dc, bio) == CACHE_MODE_WRITEBACK &&
-               dc->disk.c->gc_stats.in_use < threshold;
-}
-
 static void request_write(struct cached_dev *dc, struct search *s)
 {
        struct closure *cl = &s->cl;
        struct bio *bio = &s->bio.bio;
        struct bkey start, end;
        start = KEY(dc->disk.id, bio->bi_sector, 0);
-       end = KEY(dc->disk.id, bio_end(bio), 0);
+       end = KEY(dc->disk.id, bio_end_sector(bio), 0);
 
        bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, &start, &end);
 
@@ -1034,22 +977,37 @@ static void request_write(struct cached_dev *dc, struct search *s)
        if (bio->bi_rw & REQ_DISCARD)
                goto skip;
 
+       if (should_writeback(dc, s->orig_bio,
+                            cache_mode(dc, bio),
+                            s->op.skip)) {
+               s->op.skip = false;
+               s->writeback = true;
+       }
+
        if (s->op.skip)
                goto skip;
 
-       if (should_writeback(dc, s->orig_bio))
-               s->writeback = true;
+       trace_bcache_write(s->orig_bio, s->writeback, s->op.skip);
 
        if (!s->writeback) {
                s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
                                                   dc->disk.bio_split);
 
-               trace_bcache_writethrough(s->orig_bio);
                closure_bio_submit(bio, cl, s->d);
        } else {
-               s->op.cache_bio = bio;
-               trace_bcache_writeback(s->orig_bio);
-               bch_writeback_add(dc, bio_sectors(bio));
+               bch_writeback_add(dc);
+
+               if (s->op.flush_journal) {
+                       /* Also need to send a flush to the backing device */
+                       s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
+                                                          dc->disk.bio_split);
+
+                       bio->bi_size = 0;
+                       bio->bi_vcnt = 0;
+                       closure_bio_submit(bio, cl, s->d);
+               } else {
+                       s->op.cache_bio = bio;
+               }
        }
 out:
        closure_call(&s->op.cl, bch_insert_data, NULL, cl);
@@ -1058,7 +1016,6 @@ skip:
        s->op.skip = true;
        s->op.cache_bio = s->orig_bio;
        bio_get(s->op.cache_bio);
-       trace_bcache_write_skip(s->orig_bio);
 
        if ((bio->bi_rw & REQ_DISCARD) &&
            !blk_queue_discard(bdev_get_queue(dc->bdev)))
@@ -1088,9 +1045,10 @@ static void request_nodata(struct cached_dev *dc, struct search *s)
 
 /* Cached devices - read & write stuff */
 
-int bch_get_congested(struct cache_set *c)
+unsigned bch_get_congested(struct cache_set *c)
 {
        int i;
+       long rand;
 
        if (!c->congested_read_threshold_us &&
            !c->congested_write_threshold_us)
@@ -1106,7 +1064,13 @@ int bch_get_congested(struct cache_set *c)
 
        i += CONGESTED_MAX;
 
-       return i <= 0 ? 1 : fract_exp_two(i, 6);
+       if (i > 0)
+               i = fract_exp_two(i, 6);
+
+       rand = get_random_int();
+       i -= bitmap_weight(&rand, BITS_PER_LONG);
+
+       return i > 0 ? i : 1;
 }
 
 static void add_sequential(struct task_struct *t)
@@ -1126,10 +1090,8 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
 {
        struct cache_set *c = s->op.c;
        struct bio *bio = &s->bio.bio;
-
-       long rand;
-       int cutoff = bch_get_congested(c);
        unsigned mode = cache_mode(dc, bio);
+       unsigned sectors, congested = bch_get_congested(c);
 
        if (atomic_read(&dc->disk.detaching) ||
            c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
@@ -1147,17 +1109,14 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
                goto skip;
        }
 
-       if (!cutoff) {
-               cutoff = dc->sequential_cutoff >> 9;
+       if (!congested && !dc->sequential_cutoff)
+               goto rescale;
 
-               if (!cutoff)
-                       goto rescale;
-
-               if (mode == CACHE_MODE_WRITEBACK &&
-                   (bio->bi_rw & REQ_WRITE) &&
-                   (bio->bi_rw & REQ_SYNC))
-                       goto rescale;
-       }
+       if (!congested &&
+           mode == CACHE_MODE_WRITEBACK &&
+           (bio->bi_rw & REQ_WRITE) &&
+           (bio->bi_rw & REQ_SYNC))
+               goto rescale;
 
        if (dc->sequential_merge) {
                struct io *i;
@@ -1177,7 +1136,7 @@ found:
                if (i->sequential + bio->bi_size > i->sequential)
                        i->sequential   += bio->bi_size;
 
-               i->last                  = bio_end(bio);
+               i->last                  = bio_end_sector(bio);
                i->jiffies               = jiffies + msecs_to_jiffies(5000);
                s->task->sequential_io   = i->sequential;
 
@@ -1192,12 +1151,19 @@ found:
                add_sequential(s->task);
        }
 
-       rand = get_random_int();
-       cutoff -= bitmap_weight(&rand, BITS_PER_LONG);
+       sectors = max(s->task->sequential_io,
+                     s->task->sequential_io_avg) >> 9;
 
-       if (cutoff <= (int) (max(s->task->sequential_io,
-                                s->task->sequential_io_avg) >> 9))
+       if (dc->sequential_cutoff &&
+           sectors >= dc->sequential_cutoff >> 9) {
+               trace_bcache_bypass_sequential(s->orig_bio);
                goto skip;
+       }
+
+       if (congested && sectors >= congested) {
+               trace_bcache_bypass_congested(s->orig_bio);
+               goto skip;
+       }
 
 rescale:
        bch_rescale_priorities(c, bio_sectors(bio));
@@ -1288,30 +1254,25 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
 static int flash_dev_cache_miss(struct btree *b, struct search *s,
                                struct bio *bio, unsigned sectors)
 {
+       struct bio_vec *bv;
+       int i;
+
        /* Zero fill bio */
 
-       while (bio->bi_idx != bio->bi_vcnt) {
-               struct bio_vec *bv = bio_iovec(bio);
+       bio_for_each_segment(bv, bio, i) {
                unsigned j = min(bv->bv_len >> 9, sectors);
 
                void *p = kmap(bv->bv_page);
                memset(p + bv->bv_offset, 0, j << 9);
                kunmap(bv->bv_page);
 
-               bv->bv_len      -= j << 9;
-               bv->bv_offset   += j << 9;
-
-               if (bv->bv_len)
-                       return 0;
-
-               bio->bi_sector  += j;
-               bio->bi_size    -= j << 9;
-
-               bio->bi_idx++;
-               sectors         -= j;
+               sectors -= j;
        }
 
-       s->op.lookup_done = true;
+       bio_advance(bio, min(sectors << 9, bio->bi_size));
+
+       if (!bio->bi_size)
+               s->op.lookup_done = true;
 
        return 0;
 }
@@ -1338,8 +1299,8 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio)
                closure_call(&s->op.cl, btree_read_async, NULL, cl);
        } else if (bio_has_data(bio) || s->op.skip) {
                bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys,
-                                            &KEY(d->id, bio->bi_sector, 0),
-                                            &KEY(d->id, bio_end(bio), 0));
+                                       &KEY(d->id, bio->bi_sector, 0),
+                                       &KEY(d->id, bio_end_sector(bio), 0));
 
                s->writeback    = true;
                s->op.cache_bio = bio;
index 254d9ab5707cf4629e186d512d5f43e7eca3f94a..57dc4784f4f48c06b6816232d352d4992e45cee1 100644 (file)
@@ -30,7 +30,7 @@ struct search {
 };
 
 void bch_cache_read_endio(struct bio *, int);
-int bch_get_congested(struct cache_set *);
+unsigned bch_get_congested(struct cache_set *);
 void bch_insert_data(struct closure *cl);
 void bch_btree_insert_async(struct closure *);
 void bch_cache_read_endio(struct bio *, int);
index f88e2b653a3fc9c82a7b308c8988cd10eda0c96b..547c4c57b052efbb6fcd3df67c4b52c505023606 100644 (file)
 #include "btree.h"
 #include "debug.h"
 #include "request.h"
+#include "writeback.h"
 
+#include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/debugfs.h>
 #include <linux/genhd.h>
+#include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/reboot.h>
@@ -342,6 +345,7 @@ static void uuid_io(struct cache_set *c, unsigned long rw,
        struct closure *cl = &c->uuid_write.cl;
        struct uuid_entry *u;
        unsigned i;
+       char buf[80];
 
        BUG_ON(!parent);
        closure_lock(&c->uuid_write, parent);
@@ -362,8 +366,8 @@ static void uuid_io(struct cache_set *c, unsigned long rw,
                        break;
        }
 
-       pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read",
-                pkey(&c->uuid_bucket));
+       bch_bkey_to_text(buf, sizeof(buf), k);
+       pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", buf);
 
        for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
                if (!bch_is_zero(u->uuid, 16))
@@ -543,7 +547,6 @@ void bch_prio_write(struct cache *ca)
 
        pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
                 fifo_used(&ca->free_inc), fifo_used(&ca->unused));
-       blktrace_msg(ca, "Starting priorities: " buckets_free(ca));
 
        for (i = prio_buckets(ca) - 1; i >= 0; --i) {
                long bucket;
@@ -704,7 +707,8 @@ static void bcache_device_detach(struct bcache_device *d)
                atomic_set(&d->detaching, 0);
        }
 
-       bcache_device_unlink(d);
+       if (!d->flush_done)
+               bcache_device_unlink(d);
 
        d->c->devices[d->id] = NULL;
        closure_put(&d->c->caching);
@@ -743,13 +747,35 @@ static void bcache_device_free(struct bcache_device *d)
                mempool_destroy(d->unaligned_bvec);
        if (d->bio_split)
                bioset_free(d->bio_split);
+       if (is_vmalloc_addr(d->stripe_sectors_dirty))
+               vfree(d->stripe_sectors_dirty);
+       else
+               kfree(d->stripe_sectors_dirty);
 
        closure_debug_destroy(&d->cl);
 }
 
-static int bcache_device_init(struct bcache_device *d, unsigned block_size)
+static int bcache_device_init(struct bcache_device *d, unsigned block_size,
+                             sector_t sectors)
 {
        struct request_queue *q;
+       size_t n;
+
+       if (!d->stripe_size_bits)
+               d->stripe_size_bits = 31;
+
+       d->nr_stripes = round_up(sectors, 1 << d->stripe_size_bits) >>
+               d->stripe_size_bits;
+
+       if (!d->nr_stripes || d->nr_stripes > SIZE_MAX / sizeof(atomic_t))
+               return -ENOMEM;
+
+       n = d->nr_stripes * sizeof(atomic_t);
+       d->stripe_sectors_dirty = n < PAGE_SIZE << 6
+               ? kzalloc(n, GFP_KERNEL)
+               : vzalloc(n);
+       if (!d->stripe_sectors_dirty)
+               return -ENOMEM;
 
        if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
            !(d->unaligned_bvec = mempool_create_kmalloc_pool(1,
@@ -759,6 +785,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size)
            !(q = blk_alloc_queue(GFP_KERNEL)))
                return -ENOMEM;
 
+       set_capacity(d->disk, sectors);
        snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor);
 
        d->disk->major          = bcache_major;
@@ -781,6 +808,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size)
        set_bit(QUEUE_FLAG_NONROT,      &d->disk->queue->queue_flags);
        set_bit(QUEUE_FLAG_DISCARD,     &d->disk->queue->queue_flags);
 
+       blk_queue_flush(q, REQ_FLUSH|REQ_FUA);
+
        return 0;
 }
 
@@ -800,6 +829,17 @@ static void calc_cached_dev_sectors(struct cache_set *c)
 void bch_cached_dev_run(struct cached_dev *dc)
 {
        struct bcache_device *d = &dc->disk;
+       char buf[SB_LABEL_SIZE + 1];
+       char *env[] = {
+               "DRIVER=bcache",
+               kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid),
+               NULL,
+               NULL,
+       };
+
+       memcpy(buf, dc->sb.label, SB_LABEL_SIZE);
+       buf[SB_LABEL_SIZE] = '\0';
+       env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf);
 
        if (atomic_xchg(&dc->running, 1))
                return;
@@ -816,10 +856,12 @@ void bch_cached_dev_run(struct cached_dev *dc)
 
        add_disk(d->disk);
        bd_link_disk_holder(dc->bdev, dc->disk.disk);
-#if 0
-       char *env[] = { "SYMLINK=label" , NULL };
+       /* won't show up in the uevent file, use udevadm monitor -e instead
+        * only class / kset properties are persistent */
        kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env);
-#endif
+       kfree(env[1]);
+       kfree(env[2]);
+
        if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
            sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
                pr_debug("error creating sysfs link");
@@ -960,6 +1002,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
        atomic_set(&dc->count, 1);
 
        if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
+               bch_sectors_dirty_init(dc);
                atomic_set(&dc->has_dirty, 1);
                atomic_inc(&dc->count);
                bch_writeback_queue(dc);
@@ -1014,6 +1057,14 @@ static void cached_dev_flush(struct closure *cl)
        struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
        struct bcache_device *d = &dc->disk;
 
+       mutex_lock(&bch_register_lock);
+       d->flush_done = 1;
+
+       if (d->c)
+               bcache_device_unlink(d);
+
+       mutex_unlock(&bch_register_lock);
+
        bch_cache_accounting_destroy(&dc->accounting);
        kobject_del(&d->kobj);
 
@@ -1045,7 +1096,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
                hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
        }
 
-       ret = bcache_device_init(&dc->disk, block_size);
+       ret = bcache_device_init(&dc->disk, block_size,
+                        dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
        if (ret)
                return ret;
 
@@ -1144,11 +1196,10 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
 
        kobject_init(&d->kobj, &bch_flash_dev_ktype);
 
-       if (bcache_device_init(d, block_bytes(c)))
+       if (bcache_device_init(d, block_bytes(c), u->sectors))
                goto err;
 
        bcache_device_attach(d, c, u - c->uuids);
-       set_capacity(d->disk, u->sectors);
        bch_flash_dev_request_init(d);
        add_disk(d->disk);
 
@@ -1255,9 +1306,10 @@ static void cache_set_free(struct closure *cl)
        free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
        free_pages((unsigned long) c->sort, ilog2(bucket_pages(c)));
 
-       kfree(c->fill_iter);
        if (c->bio_split)
                bioset_free(c->bio_split);
+       if (c->fill_iter)
+               mempool_destroy(c->fill_iter);
        if (c->bio_meta)
                mempool_destroy(c->bio_meta);
        if (c->search)
@@ -1278,11 +1330,9 @@ static void cache_set_free(struct closure *cl)
 static void cache_set_flush(struct closure *cl)
 {
        struct cache_set *c = container_of(cl, struct cache_set, caching);
+       struct cache *ca;
        struct btree *b;
-
-       /* Shut down allocator threads */
-       set_bit(CACHE_SET_STOPPING_2, &c->flags);
-       wake_up(&c->alloc_wait);
+       unsigned i;
 
        bch_cache_accounting_destroy(&c->accounting);
 
@@ -1295,7 +1345,11 @@ static void cache_set_flush(struct closure *cl)
        /* Should skip this if we're unregistering because of an error */
        list_for_each_entry(b, &c->btree_cache, list)
                if (btree_node_dirty(b))
-                       bch_btree_write(b, true, NULL);
+                       bch_btree_node_write(b, NULL);
+
+       for_each_cache(ca, c, i)
+               if (ca->alloc_thread)
+                       kthread_stop(ca->alloc_thread);
 
        closure_return(cl);
 }
@@ -1303,18 +1357,22 @@ static void cache_set_flush(struct closure *cl)
 static void __cache_set_unregister(struct closure *cl)
 {
        struct cache_set *c = container_of(cl, struct cache_set, caching);
-       struct cached_dev *dc, *t;
+       struct cached_dev *dc;
        size_t i;
 
        mutex_lock(&bch_register_lock);
 
-       if (test_bit(CACHE_SET_UNREGISTERING, &c->flags))
-               list_for_each_entry_safe(dc, t, &c->cached_devs, list)
-                       bch_cached_dev_detach(dc);
-
        for (i = 0; i < c->nr_uuids; i++)
-               if (c->devices[i] && UUID_FLASH_ONLY(&c->uuids[i]))
-                       bcache_device_stop(c->devices[i]);
+               if (c->devices[i]) {
+                       if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
+                           test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
+                               dc = container_of(c->devices[i],
+                                                 struct cached_dev, disk);
+                               bch_cached_dev_detach(dc);
+                       } else {
+                               bcache_device_stop(c->devices[i]);
+                       }
+               }
 
        mutex_unlock(&bch_register_lock);
 
@@ -1373,9 +1431,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
                c->btree_pages = max_t(int, c->btree_pages / 4,
                                       BTREE_MAX_PAGES);
 
-       init_waitqueue_head(&c->alloc_wait);
+       c->sort_crit_factor = int_sqrt(c->btree_pages);
+
        mutex_init(&c->bucket_lock);
-       mutex_init(&c->fill_lock);
        mutex_init(&c->sort_lock);
        spin_lock_init(&c->sort_time_lock);
        closure_init_unlocked(&c->sb_write);
@@ -1401,8 +1459,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
            !(c->bio_meta = mempool_create_kmalloc_pool(2,
                                sizeof(struct bbio) + sizeof(struct bio_vec) *
                                bucket_pages(c))) ||
+           !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
            !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
-           !(c->fill_iter = kmalloc(iter_size, GFP_KERNEL)) ||
            !(c->sort = alloc_bucket_pages(GFP_KERNEL, c)) ||
            !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
            bch_journal_alloc(c) ||
@@ -1410,8 +1468,6 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
            bch_open_buckets_alloc(c))
                goto err;
 
-       c->fill_iter->size = sb->bucket_size / sb->block_size;
-
        c->congested_read_threshold_us  = 2000;
        c->congested_write_threshold_us = 20000;
        c->error_limit  = 8 << IO_ERROR_SHIFT;
@@ -1496,9 +1552,10 @@ static void run_cache_set(struct cache_set *c)
                 */
                bch_journal_next(&c->journal);
 
+               err = "error starting allocator thread";
                for_each_cache(ca, c, i)
-                       closure_call(&ca->alloc, bch_allocator_thread,
-                                    system_wq, &c->cl);
+                       if (bch_cache_allocator_start(ca))
+                               goto err;
 
                /*
                 * First place it's safe to allocate: btree_check() and
@@ -1531,17 +1588,16 @@ static void run_cache_set(struct cache_set *c)
 
                bch_btree_gc_finish(c);
 
+               err = "error starting allocator thread";
                for_each_cache(ca, c, i)
-                       closure_call(&ca->alloc, bch_allocator_thread,
-                                    ca->alloc_workqueue, &c->cl);
+                       if (bch_cache_allocator_start(ca))
+                               goto err;
 
                mutex_lock(&c->bucket_lock);
                for_each_cache(ca, c, i)
                        bch_prio_write(ca);
                mutex_unlock(&c->bucket_lock);
 
-               wake_up(&c->alloc_wait);
-
                err = "cannot allocate new UUID bucket";
                if (__uuid_write(c))
                        goto err_unlock_gc;
@@ -1552,7 +1608,7 @@ static void run_cache_set(struct cache_set *c)
                        goto err_unlock_gc;
 
                bkey_copy_key(&c->root->key, &MAX_KEY);
-               bch_btree_write(c->root, true, &op);
+               bch_btree_node_write(c->root, &op.cl);
 
                bch_btree_set_root(c->root);
                rw_unlock(true, c->root);
@@ -1673,9 +1729,6 @@ void bch_cache_release(struct kobject *kobj)
 
        bio_split_pool_free(&ca->bio_split_hook);
 
-       if (ca->alloc_workqueue)
-               destroy_workqueue(ca->alloc_workqueue);
-
        free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca)));
        kfree(ca->prio_buckets);
        vfree(ca->buckets);
@@ -1723,7 +1776,6 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
            !(ca->prio_buckets  = kzalloc(sizeof(uint64_t) * prio_buckets(ca) *
                                          2, GFP_KERNEL)) ||
            !(ca->disk_buckets  = alloc_bucket_pages(GFP_KERNEL, ca)) ||
-           !(ca->alloc_workqueue = alloc_workqueue("bch_allocator", 0, 1)) ||
            bio_split_pool_init(&ca->bio_split_hook))
                return -ENOMEM;
 
@@ -1786,6 +1838,36 @@ static ssize_t register_bcache(struct kobject *, struct kobj_attribute *,
 kobj_attribute_write(register,         register_bcache);
 kobj_attribute_write(register_quiet,   register_bcache);
 
+static bool bch_is_open_backing(struct block_device *bdev) {
+       struct cache_set *c, *tc;
+       struct cached_dev *dc, *t;
+
+       list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
+               list_for_each_entry_safe(dc, t, &c->cached_devs, list)
+                       if (dc->bdev == bdev)
+                               return true;
+       list_for_each_entry_safe(dc, t, &uncached_devices, list)
+               if (dc->bdev == bdev)
+                       return true;
+       return false;
+}
+
+static bool bch_is_open_cache(struct block_device *bdev) {
+       struct cache_set *c, *tc;
+       struct cache *ca;
+       unsigned i;
+
+       list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
+               for_each_cache(ca, c, i)
+                       if (ca->bdev == bdev)
+                               return true;
+       return false;
+}
+
+static bool bch_is_open(struct block_device *bdev) {
+       return bch_is_open_cache(bdev) || bch_is_open_backing(bdev);
+}
+
 static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
                               const char *buffer, size_t size)
 {
@@ -1810,8 +1892,13 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
                                  FMODE_READ|FMODE_WRITE|FMODE_EXCL,
                                  sb);
        if (IS_ERR(bdev)) {
-               if (bdev == ERR_PTR(-EBUSY))
-                       err = "device busy";
+               if (bdev == ERR_PTR(-EBUSY)) {
+                       bdev = lookup_bdev(strim(path));
+                       if (!IS_ERR(bdev) && bch_is_open(bdev))
+                               err = "device already registered";
+                       else
+                               err = "device busy";
+               }
                goto err;
        }
 
index 4d9cca47e4c6006cd20919080410007f75c3bc60..12a2c2846f994a1e2d51bf79374426df96893418 100644 (file)
@@ -9,7 +9,9 @@
 #include "sysfs.h"
 #include "btree.h"
 #include "request.h"
+#include "writeback.h"
 
+#include <linux/blkdev.h>
 #include <linux/sort.h>
 
 static const char * const cache_replacement_policies[] = {
@@ -79,6 +81,9 @@ rw_attribute(writeback_rate_p_term_inverse);
 rw_attribute(writeback_rate_d_smooth);
 read_attribute(writeback_rate_debug);
 
+read_attribute(stripe_size);
+read_attribute(partial_stripes_expensive);
+
 rw_attribute(synchronous);
 rw_attribute(journal_delay_ms);
 rw_attribute(discard);
@@ -127,7 +132,7 @@ SHOW(__bch_cached_dev)
                char derivative[20];
                char target[20];
                bch_hprint(dirty,
-                      atomic_long_read(&dc->disk.sectors_dirty) << 9);
+                          bcache_dev_sectors_dirty(&dc->disk) << 9);
                bch_hprint(derivative,  dc->writeback_rate_derivative << 9);
                bch_hprint(target,      dc->writeback_rate_target << 9);
 
@@ -143,7 +148,10 @@ SHOW(__bch_cached_dev)
        }
 
        sysfs_hprint(dirty_data,
-                    atomic_long_read(&dc->disk.sectors_dirty) << 9);
+                    bcache_dev_sectors_dirty(&dc->disk) << 9);
+
+       sysfs_hprint(stripe_size,       (1 << dc->disk.stripe_size_bits) << 9);
+       var_printf(partial_stripes_expensive,   "%u");
 
        var_printf(sequential_merge,    "%i");
        var_hprint(sequential_cutoff);
@@ -170,6 +178,7 @@ STORE(__cached_dev)
                                             disk.kobj);
        unsigned v = size;
        struct cache_set *c;
+       struct kobj_uevent_env *env;
 
 #define d_strtoul(var)         sysfs_strtoul(var, dc->var)
 #define d_strtoi_h(var)                sysfs_hatoi(var, dc->var)
@@ -214,6 +223,7 @@ STORE(__cached_dev)
        }
 
        if (attr == &sysfs_label) {
+               /* note: endlines are preserved */
                memcpy(dc->sb.label, buf, SB_LABEL_SIZE);
                bch_write_bdev_super(dc, NULL);
                if (dc->disk.c) {
@@ -221,6 +231,15 @@ STORE(__cached_dev)
                               buf, SB_LABEL_SIZE);
                        bch_uuid_write(dc->disk.c);
                }
+               env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
+               if (!env)
+                       return -ENOMEM;
+               add_uevent_var(env, "DRIVER=bcache");
+               add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid),
+               add_uevent_var(env, "CACHED_LABEL=%s", buf);
+               kobject_uevent_env(
+                       &disk_to_dev(dc->disk.disk)->kobj, KOBJ_CHANGE, env->envp);
+               kfree(env);
        }
 
        if (attr == &sysfs_attach) {
@@ -284,6 +303,8 @@ static struct attribute *bch_cached_dev_files[] = {
        &sysfs_writeback_rate_d_smooth,
        &sysfs_writeback_rate_debug,
        &sysfs_dirty_data,
+       &sysfs_stripe_size,
+       &sysfs_partial_stripes_expensive,
        &sysfs_sequential_cutoff,
        &sysfs_sequential_merge,
        &sysfs_clear_stats,
@@ -665,12 +686,10 @@ SHOW(__bch_cache)
                int cmp(const void *l, const void *r)
                {       return *((uint16_t *) r) - *((uint16_t *) l); }
 
-               /* Number of quantiles we compute */
-               const unsigned nq = 31;
-
                size_t n = ca->sb.nbuckets, i, unused, btree;
                uint64_t sum = 0;
-               uint16_t q[nq], *p, *cached;
+               /* Compute 31 quantiles */
+               uint16_t q[31], *p, *cached;
                ssize_t ret;
 
                cached = p = vmalloc(ca->sb.nbuckets * sizeof(uint16_t));
@@ -703,26 +722,29 @@ SHOW(__bch_cache)
                if (n)
                        do_div(sum, n);
 
-               for (i = 0; i < nq; i++)
-                       q[i] = INITIAL_PRIO - cached[n * (i + 1) / (nq + 1)];
+               for (i = 0; i < ARRAY_SIZE(q); i++)
+                       q[i] = INITIAL_PRIO - cached[n * (i + 1) /
+                               (ARRAY_SIZE(q) + 1)];
 
                vfree(p);
 
-               ret = snprintf(buf, PAGE_SIZE,
-                              "Unused:         %zu%%\n"
-                              "Metadata:       %zu%%\n"
-                              "Average:        %llu\n"
-                              "Sectors per Q:  %zu\n"
-                              "Quantiles:      [",
-                              unused * 100 / (size_t) ca->sb.nbuckets,
-                              btree * 100 / (size_t) ca->sb.nbuckets, sum,
-                              n * ca->sb.bucket_size / (nq + 1));
-
-               for (i = 0; i < nq && ret < (ssize_t) PAGE_SIZE; i++)
-                       ret += snprintf(buf + ret, PAGE_SIZE - ret,
-                                       i < nq - 1 ? "%u " : "%u]\n", q[i]);
-
-               buf[PAGE_SIZE - 1] = '\0';
+               ret = scnprintf(buf, PAGE_SIZE,
+                               "Unused:                %zu%%\n"
+                               "Metadata:      %zu%%\n"
+                               "Average:       %llu\n"
+                               "Sectors per Q: %zu\n"
+                               "Quantiles:     [",
+                               unused * 100 / (size_t) ca->sb.nbuckets,
+                               btree * 100 / (size_t) ca->sb.nbuckets, sum,
+                               n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1));
+
+               for (i = 0; i < ARRAY_SIZE(q); i++)
+                       ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+                                        "%u ", q[i]);
+               ret--;
+
+               ret += scnprintf(buf + ret, PAGE_SIZE - ret, "]\n");
+
                return ret;
        }
 
index 983f9bb411bc13e73be8d98cf6ef2fc0a48b07b8..f7b6c197f90f94f6aeb8ec59159265962af663c1 100644 (file)
@@ -2,6 +2,7 @@
 #include "btree.h"
 #include "request.h"
 
+#include <linux/blktrace_api.h>
 #include <linux/module.h>
 
 #define CREATE_TRACE_POINTS
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_start);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_end);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_passthrough);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_hit);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_miss);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_sequential);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_congested);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_retry);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writethrough);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_skip);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_replay_key);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_full);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_entry_full);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_cache_cannibalize);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_read);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_write);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_dirty);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_dirty);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc_fail);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_free);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_gc_coalesce);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_start);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy_collision);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_insert_key);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_invalidate);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_fail);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback_collision);
index da3a99e85b1e0db79a35a2735769b7595f2be159..98eb81159a22ba9f9e88fec53127e2e338ae3d4d 100644 (file)
@@ -228,23 +228,6 @@ start:             bv->bv_len      = min_t(size_t, PAGE_SIZE - bv->bv_offset,
        }
 }
 
-int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp)
-{
-       int i;
-       struct bio_vec *bv;
-
-       bio_for_each_segment(bv, bio, i) {
-               bv->bv_page = alloc_page(gfp);
-               if (!bv->bv_page) {
-                       while (bv-- != bio->bi_io_vec + bio->bi_idx)
-                               __free_page(bv->bv_page);
-                       return -ENOMEM;
-               }
-       }
-
-       return 0;
-}
-
 /*
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any
  * use permitted, subject to terms of PostgreSQL license; see.)
index 577393e38c3ac3c05076d7a24ce0707a946a0402..1ae2a73ad85f5628b5292b769d79b0f28252f4ca 100644 (file)
@@ -15,8 +15,6 @@
 
 struct closure;
 
-#include <trace/events/bcache.h>
-
 #ifdef CONFIG_BCACHE_EDEBUG
 
 #define atomic_dec_bug(v)      BUG_ON(atomic_dec_return(v) < 0)
@@ -566,12 +564,8 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
        return x;
 }
 
-#define bio_end(bio)   ((bio)->bi_sector + bio_sectors(bio))
-
 void bch_bio_map(struct bio *bio, void *base);
 
-int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp);
-
 static inline sector_t bdev_sectors(struct block_device *bdev)
 {
        return bdev->bd_inode->i_size >> 9;
index 2714ed3991d1b747518aeb22d70222d1653d5e40..22cbff551628f3c9cff87ccc35563c09974f03b3 100644 (file)
@@ -9,6 +9,9 @@
 #include "bcache.h"
 #include "btree.h"
 #include "debug.h"
+#include "writeback.h"
+
+#include <trace/events/bcache.h>
 
 static struct workqueue_struct *dirty_wq;
 
@@ -36,7 +39,7 @@ static void __update_writeback_rate(struct cached_dev *dc)
 
        int change = 0;
        int64_t error;
-       int64_t dirty = atomic_long_read(&dc->disk.sectors_dirty);
+       int64_t dirty = bcache_dev_sectors_dirty(&dc->disk);
        int64_t derivative = dirty - dc->disk.sectors_dirty_last;
 
        dc->disk.sectors_dirty_last = dirty;
@@ -105,6 +108,31 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k)
        return KEY_DIRTY(k);
 }
 
+static bool dirty_full_stripe_pred(struct keybuf *buf, struct bkey *k)
+{
+       uint64_t stripe;
+       unsigned nr_sectors = KEY_SIZE(k);
+       struct cached_dev *dc = container_of(buf, struct cached_dev,
+                                            writeback_keys);
+       unsigned stripe_size = 1 << dc->disk.stripe_size_bits;
+
+       if (!KEY_DIRTY(k))
+               return false;
+
+       stripe = KEY_START(k) >> dc->disk.stripe_size_bits;
+       while (1) {
+               if (atomic_read(dc->disk.stripe_sectors_dirty + stripe) !=
+                   stripe_size)
+                       return false;
+
+               if (nr_sectors <= stripe_size)
+                       return true;
+
+               nr_sectors -= stripe_size;
+               stripe++;
+       }
+}
+
 static void dirty_init(struct keybuf_key *w)
 {
        struct dirty_io *io = w->private;
@@ -149,7 +177,22 @@ static void refill_dirty(struct closure *cl)
                searched_from_start = true;
        }
 
-       bch_refill_keybuf(dc->disk.c, buf, &end);
+       if (dc->partial_stripes_expensive) {
+               uint64_t i;
+
+               for (i = 0; i < dc->disk.nr_stripes; i++)
+                       if (atomic_read(dc->disk.stripe_sectors_dirty + i) ==
+                           1 << dc->disk.stripe_size_bits)
+                               goto full_stripes;
+
+               goto normal_refill;
+full_stripes:
+               bch_refill_keybuf(dc->disk.c, buf, &end,
+                                 dirty_full_stripe_pred);
+       } else {
+normal_refill:
+               bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred);
+       }
 
        if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) {
                /* Searched the entire btree  - delay awhile */
@@ -181,10 +224,8 @@ void bch_writeback_queue(struct cached_dev *dc)
        }
 }
 
-void bch_writeback_add(struct cached_dev *dc, unsigned sectors)
+void bch_writeback_add(struct cached_dev *dc)
 {
-       atomic_long_add(sectors, &dc->disk.sectors_dirty);
-
        if (!atomic_read(&dc->has_dirty) &&
            !atomic_xchg(&dc->has_dirty, 1)) {
                atomic_inc(&dc->count);
@@ -203,6 +244,34 @@ void bch_writeback_add(struct cached_dev *dc, unsigned sectors)
        }
 }
 
+void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
+                                 uint64_t offset, int nr_sectors)
+{
+       struct bcache_device *d = c->devices[inode];
+       unsigned stripe_size, stripe_offset;
+       uint64_t stripe;
+
+       if (!d)
+               return;
+
+       stripe_size = 1 << d->stripe_size_bits;
+       stripe = offset >> d->stripe_size_bits;
+       stripe_offset = offset & (stripe_size - 1);
+
+       while (nr_sectors) {
+               int s = min_t(unsigned, abs(nr_sectors),
+                             stripe_size - stripe_offset);
+
+               if (nr_sectors < 0)
+                       s = -s;
+
+               atomic_add(s, d->stripe_sectors_dirty + stripe);
+               nr_sectors -= s;
+               stripe_offset = 0;
+               stripe++;
+       }
+}
+
 /* Background writeback - IO loop */
 
 static void dirty_io_destructor(struct closure *cl)
@@ -216,9 +285,10 @@ static void write_dirty_finish(struct closure *cl)
        struct dirty_io *io = container_of(cl, struct dirty_io, cl);
        struct keybuf_key *w = io->bio.bi_private;
        struct cached_dev *dc = io->dc;
-       struct bio_vec *bv = bio_iovec_idx(&io->bio, io->bio.bi_vcnt);
+       struct bio_vec *bv;
+       int i;
 
-       while (bv-- != io->bio.bi_io_vec)
+       bio_for_each_segment_all(bv, &io->bio, i)
                __free_page(bv->bv_page);
 
        /* This is kind of a dumb way of signalling errors. */
@@ -236,10 +306,12 @@ static void write_dirty_finish(struct closure *cl)
                for (i = 0; i < KEY_PTRS(&w->key); i++)
                        atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin);
 
-               pr_debug("clearing %s", pkey(&w->key));
                bch_btree_insert(&op, dc->disk.c);
                closure_sync(&op.cl);
 
+               if (op.insert_collision)
+                       trace_bcache_writeback_collision(&w->key);
+
                atomic_long_inc(op.insert_collision
                                ? &dc->disk.c->writeback_keys_failed
                                : &dc->disk.c->writeback_keys_done);
@@ -275,7 +347,6 @@ static void write_dirty(struct closure *cl)
        io->bio.bi_bdev         = io->dc->bdev;
        io->bio.bi_end_io       = dirty_endio;
 
-       trace_bcache_write_dirty(&io->bio);
        closure_bio_submit(&io->bio, cl, &io->dc->disk);
 
        continue_at(cl, write_dirty_finish, dirty_wq);
@@ -296,7 +367,6 @@ static void read_dirty_submit(struct closure *cl)
 {
        struct dirty_io *io = container_of(cl, struct dirty_io, cl);
 
-       trace_bcache_read_dirty(&io->bio);
        closure_bio_submit(&io->bio, cl, &io->dc->disk);
 
        continue_at(cl, write_dirty, dirty_wq);
@@ -349,10 +419,10 @@ static void read_dirty(struct closure *cl)
                io->bio.bi_rw           = READ;
                io->bio.bi_end_io       = read_dirty_endio;
 
-               if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL))
+               if (bio_alloc_pages(&io->bio, GFP_KERNEL))
                        goto err_free;
 
-               pr_debug("%s", pkey(&w->key));
+               trace_bcache_writeback(&w->key);
 
                closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl);
 
@@ -375,12 +445,49 @@ err:
        refill_dirty(cl);
 }
 
+/* Init */
+
+static int bch_btree_sectors_dirty_init(struct btree *b, struct btree_op *op,
+                                       struct cached_dev *dc)
+{
+       struct bkey *k;
+       struct btree_iter iter;
+
+       bch_btree_iter_init(b, &iter, &KEY(dc->disk.id, 0, 0));
+       while ((k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad)))
+               if (!b->level) {
+                       if (KEY_INODE(k) > dc->disk.id)
+                               break;
+
+                       if (KEY_DIRTY(k))
+                               bcache_dev_sectors_dirty_add(b->c, dc->disk.id,
+                                                            KEY_START(k),
+                                                            KEY_SIZE(k));
+               } else {
+                       btree(sectors_dirty_init, k, b, op, dc);
+                       if (KEY_INODE(k) > dc->disk.id)
+                               break;
+
+                       cond_resched();
+               }
+
+       return 0;
+}
+
+void bch_sectors_dirty_init(struct cached_dev *dc)
+{
+       struct btree_op op;
+
+       bch_btree_op_init_stack(&op);
+       btree_root(sectors_dirty_init, dc->disk.c, &op, dc);
+}
+
 void bch_cached_dev_writeback_init(struct cached_dev *dc)
 {
        closure_init_unlocked(&dc->writeback);
        init_rwsem(&dc->writeback_lock);
 
-       bch_keybuf_init(&dc->writeback_keys, dirty_pred);
+       bch_keybuf_init(&dc->writeback_keys);
 
        dc->writeback_metadata          = true;
        dc->writeback_running           = true;
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
new file mode 100644 (file)
index 0000000..c91f61b
--- /dev/null
@@ -0,0 +1,64 @@
+#ifndef _BCACHE_WRITEBACK_H
+#define _BCACHE_WRITEBACK_H
+
+#define CUTOFF_WRITEBACK       40
+#define CUTOFF_WRITEBACK_SYNC  70
+
+static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
+{
+       uint64_t i, ret = 0;
+
+       for (i = 0; i < d->nr_stripes; i++)
+               ret += atomic_read(d->stripe_sectors_dirty + i);
+
+       return ret;
+}
+
+static inline bool bcache_dev_stripe_dirty(struct bcache_device *d,
+                                          uint64_t offset,
+                                          unsigned nr_sectors)
+{
+       uint64_t stripe = offset >> d->stripe_size_bits;
+
+       while (1) {
+               if (atomic_read(d->stripe_sectors_dirty + stripe))
+                       return true;
+
+               if (nr_sectors <= 1 << d->stripe_size_bits)
+                       return false;
+
+               nr_sectors -= 1 << d->stripe_size_bits;
+               stripe++;
+       }
+}
+
+static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
+                                   unsigned cache_mode, bool would_skip)
+{
+       unsigned in_use = dc->disk.c->gc_stats.in_use;
+
+       if (cache_mode != CACHE_MODE_WRITEBACK ||
+           atomic_read(&dc->disk.detaching) ||
+           in_use > CUTOFF_WRITEBACK_SYNC)
+               return false;
+
+       if (dc->partial_stripes_expensive &&
+           bcache_dev_stripe_dirty(&dc->disk, bio->bi_sector,
+                                   bio_sectors(bio)))
+               return true;
+
+       if (would_skip)
+               return false;
+
+       return bio->bi_rw & REQ_SYNC ||
+               in_use <= CUTOFF_WRITEBACK;
+}
+
+void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
+void bch_writeback_queue(struct cached_dev *);
+void bch_writeback_add(struct cached_dev *);
+
+void bch_sectors_dirty_init(struct cached_dev *dc);
+void bch_cached_dev_writeback_init(struct cached_dev *);
+
+#endif
index 957a719e8c2f1545073d2d897a00ffdf17321878..df7b0a06b0ea2d820aec6ad5a752a13bb64e13e8 100644 (file)
@@ -2290,12 +2290,18 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
        d = r10_bio->devs[1].devnum;
        wbio = r10_bio->devs[1].bio;
        wbio2 = r10_bio->devs[1].repl_bio;
+       /* Need to test wbio2->bi_end_io before we call
+        * generic_make_request as if the former is NULL,
+        * the latter is free to free wbio2.
+        */
+       if (wbio2 && !wbio2->bi_end_io)
+               wbio2 = NULL;
        if (wbio->bi_end_io) {
                atomic_inc(&conf->mirrors[d].rdev->nr_pending);
                md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
                generic_make_request(wbio);
        }
-       if (wbio2 && wbio2->bi_end_io) {
+       if (wbio2) {
                atomic_inc(&conf->mirrors[d].replacement->nr_pending);
                md_sync_acct(conf->mirrors[d].replacement->bdev,
                             bio_sectors(wbio2));
index 2bf094a587cb8ff2efb7e1bcabe6f40cd7b52602..78ea44336e75e06d5769b4a6158f2a1e506214e0 100644 (file)
@@ -3462,6 +3462,7 @@ static void handle_stripe(struct stripe_head *sh)
                    test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
                        set_bit(STRIPE_SYNCING, &sh->state);
                        clear_bit(STRIPE_INSYNC, &sh->state);
+                       clear_bit(STRIPE_REPLACED, &sh->state);
                }
                spin_unlock(&sh->stripe_lock);
        }
@@ -3607,19 +3608,23 @@ static void handle_stripe(struct stripe_head *sh)
                        handle_parity_checks5(conf, sh, &s, disks);
        }
 
-       if (s.replacing && s.locked == 0
-           && !test_bit(STRIPE_INSYNC, &sh->state)) {
+       if ((s.replacing || s.syncing) && s.locked == 0
+           && !test_bit(STRIPE_COMPUTE_RUN, &sh->state)
+           && !test_bit(STRIPE_REPLACED, &sh->state)) {
                /* Write out to replacement devices where possible */
                for (i = 0; i < conf->raid_disks; i++)
-                       if (test_bit(R5_UPTODATE, &sh->dev[i].flags) &&
-                           test_bit(R5_NeedReplace, &sh->dev[i].flags)) {
+                       if (test_bit(R5_NeedReplace, &sh->dev[i].flags)) {
+                               WARN_ON(!test_bit(R5_UPTODATE, &sh->dev[i].flags));
                                set_bit(R5_WantReplace, &sh->dev[i].flags);
                                set_bit(R5_LOCKED, &sh->dev[i].flags);
                                s.locked++;
                        }
-               set_bit(STRIPE_INSYNC, &sh->state);
+               if (s.replacing)
+                       set_bit(STRIPE_INSYNC, &sh->state);
+               set_bit(STRIPE_REPLACED, &sh->state);
        }
        if ((s.syncing || s.replacing) && s.locked == 0 &&
+           !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
            test_bit(STRIPE_INSYNC, &sh->state)) {
                md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
                clear_bit(STRIPE_SYNCING, &sh->state);
index b0b663b119a8f324ba7fa3b58953b42c81030fa4..70c49329ca9a23fbbad73061477ac0a0cfe80e6e 100644 (file)
@@ -306,6 +306,7 @@ enum {
        STRIPE_SYNC_REQUESTED,
        STRIPE_SYNCING,
        STRIPE_INSYNC,
+       STRIPE_REPLACED,
        STRIPE_PREREAD_ACTIVE,
        STRIPE_DELAYED,
        STRIPE_DEGRADED,
index f7b90661e3213668310274b445f51d972ef4b9a5..e068a76a5f6f268bf183097007efb3aa571acb89 100644 (file)
@@ -66,14 +66,19 @@ EXPORT_SYMBOL(ssc_request);
 
 void ssc_free(struct ssc_device *ssc)
 {
+       bool disable_clk = true;
+
        spin_lock(&user_lock);
-       if (ssc->user) {
+       if (ssc->user)
                ssc->user--;
-               clk_disable_unprepare(ssc->clk);
-       } else {
+       else {
+               disable_clk = false;
                dev_dbg(&ssc->pdev->dev, "device already free\n");
        }
        spin_unlock(&user_lock);
+
+       if (disable_clk)
+               clk_disable_unprepare(ssc->clk);
 }
 EXPORT_SYMBOL(ssc_free);
 
index f9296abcf02ad88dd2b2fd6675c8c27bd7e0eda7..6127ab64bb399323e57e418e9742c2f5b8a9d86d 100644 (file)
@@ -167,7 +167,7 @@ int mei_hbm_start_req(struct mei_device *dev)
 
        dev->hbm_state = MEI_HBM_IDLE;
        if (mei_write_message(dev, mei_hdr, dev->wr_msg.data)) {
-               dev_err(&dev->pdev->dev, "version message writet failed\n");
+               dev_err(&dev->pdev->dev, "version message write failed\n");
                dev->dev_state = MEI_DEV_RESETTING;
                mei_reset(dev, 1);
                return -ENODEV;
index e4f8dec4dc3ccf134317abbd70594f503e9a5344..b22c7e2472254b89dbacc20c20772dfd297d575e 100644 (file)
@@ -239,14 +239,18 @@ static int mei_me_hw_ready_wait(struct mei_device *dev)
        if (mei_me_hw_is_ready(dev))
                return 0;
 
+       dev->recvd_hw_ready = false;
        mutex_unlock(&dev->device_lock);
        err = wait_event_interruptible_timeout(dev->wait_hw_ready,
-                       dev->recvd_hw_ready, MEI_INTEROP_TIMEOUT);
+                       dev->recvd_hw_ready,
+                       mei_secs_to_jiffies(MEI_INTEROP_TIMEOUT));
        mutex_lock(&dev->device_lock);
        if (!err && !dev->recvd_hw_ready) {
+               if (!err)
+                       err = -ETIMEDOUT;
                dev_err(&dev->pdev->dev,
-                       "wait hw ready failed. status = 0x%x\n", err);
-               return -ETIMEDOUT;
+                       "wait hw ready failed. status = %d\n", err);
+               return err;
        }
 
        dev->recvd_hw_ready = false;
@@ -483,7 +487,9 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id)
        /* check if ME wants a reset */
        if (!mei_hw_is_ready(dev) &&
            dev->dev_state != MEI_DEV_RESETTING &&
-           dev->dev_state != MEI_DEV_INITIALIZING) {
+           dev->dev_state != MEI_DEV_INITIALIZING &&
+           dev->dev_state != MEI_DEV_POWER_DOWN &&
+           dev->dev_state != MEI_DEV_POWER_UP) {
                dev_dbg(&dev->pdev->dev, "FW not ready.\n");
                mei_reset(dev, 1);
                mutex_unlock(&dev->device_lock);
index ed1d75203af6e702de1e7a5c3fbdb6b6e8a765d6..e6f16f83ecdee479bfd679f169e078866bd3354a 100644 (file)
@@ -148,7 +148,8 @@ void mei_reset(struct mei_device *dev, int interrupts_enabled)
 
        dev->hbm_state = MEI_HBM_IDLE;
 
-       if (dev->dev_state != MEI_DEV_INITIALIZING) {
+       if (dev->dev_state != MEI_DEV_INITIALIZING &&
+           dev->dev_state != MEI_DEV_POWER_UP) {
                if (dev->dev_state != MEI_DEV_DISABLED &&
                    dev->dev_state != MEI_DEV_POWER_DOWN)
                        dev->dev_state = MEI_DEV_RESETTING;
index a3c1c5aae6a9eaa5de0ecd0dd0335771ac8f7fab..1264923ade0f2c7973528080d3fa75167b6be0b4 100644 (file)
@@ -345,6 +345,7 @@ int of_irq_to_resource(struct device_node *dev, int index, struct resource *r)
        if (r && irq) {
                const char *name = NULL;
 
+               memset(r, 0, sizeof(*r));
                /*
                 * Get optional "interrupts-names" property to add a name
                 * to the resource.
@@ -482,8 +483,9 @@ void __init of_irq_init(const struct of_device_id *matches)
                }
 
                /* Get the next pending parent that might have children */
-               desc = list_first_entry(&intc_parent_list, typeof(*desc), list);
-               if (list_empty(&intc_parent_list) || !desc) {
+               desc = list_first_entry_or_null(&intc_parent_list,
+                                               typeof(*desc), list);
+               if (!desc) {
                        pr_err("of_irq_init: children remain, but no parents\n");
                        break;
                }
index b29e20b7862f168673b3ad980200dd3c80fff175..bb7af78e4eedd359a08db0fab40682a82abac5e1 100644 (file)
@@ -388,7 +388,6 @@ int dlpar_remove_pci_slot(char *drc_name, struct device_node *dn)
        /* Remove the EADS bridge device itself */
        BUG_ON(!bus->self);
        pr_debug("PCI: Now removing bridge device %s\n", pci_name(bus->self));
-       eeh_remove_bus_device(bus->self, true);
        pci_stop_and_remove_bus_device(bus->self);
 
        return 0;
index 080abf2faf972ab3b999ac58f923c03dbec7e9b8..a8c344422a77d5cce6c34fcadfd7d61781d660bc 100644 (file)
@@ -469,7 +469,7 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov,
                         unsigned long nr_segs, loff_t ppos)
 {
        struct logger_log *log = file_get_log(iocb->ki_filp);
-       size_t orig = log->w_off;
+       size_t orig;
        struct logger_entry header;
        struct timespec now;
        ssize_t ret = 0;
@@ -490,6 +490,8 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
        mutex_lock(&log->mutex);
 
+       orig = log->w_off;
+
        /*
         * Fix up any readers, pulling them forward to the first readable
         * entry after (what will be) the new write offset. We do this now
index b10f739b7e3e1fcbe16acaf7d66705b59e534441..fa8da9aada3079b6a18fc0189d4fdca36af753c6 100644 (file)
@@ -9,4 +9,4 @@ TODO:
 Please send patches to Greg Kroah-Hartman <greg@kroah.com> and
 copy:
        Ian Abbott <abbotti@mev.co.uk>
-       Frank Mori Hess <fmhess@users.sourceforge.net>
+       H Hartley Sweeten <hsweeten@visionengravers.com>
index 8647518259f6dd5ab3379babc218d5ef6cf24831..f4a197b2d1fd951f8879b78cd96e6a2cce1f24d3 100644 (file)
@@ -1413,22 +1413,19 @@ static int do_cmd_ioctl(struct comedi_device *dev,
                DPRINTK("subdevice busy\n");
                return -EBUSY;
        }
-       s->busy = file;
 
        /* make sure channel/gain list isn't too long */
        if (cmd.chanlist_len > s->len_chanlist) {
                DPRINTK("channel/gain list too long %u > %d\n",
                        cmd.chanlist_len, s->len_chanlist);
-               ret = -EINVAL;
-               goto cleanup;
+               return -EINVAL;
        }
 
        /* make sure channel/gain list isn't too short */
        if (cmd.chanlist_len < 1) {
                DPRINTK("channel/gain list too short %u < 1\n",
                        cmd.chanlist_len);
-               ret = -EINVAL;
-               goto cleanup;
+               return -EINVAL;
        }
 
        async->cmd = cmd;
@@ -1438,8 +1435,7 @@ static int do_cmd_ioctl(struct comedi_device *dev,
            kmalloc(async->cmd.chanlist_len * sizeof(int), GFP_KERNEL);
        if (!async->cmd.chanlist) {
                DPRINTK("allocation failed\n");
-               ret = -ENOMEM;
-               goto cleanup;
+               return -ENOMEM;
        }
 
        if (copy_from_user(async->cmd.chanlist, user_chanlist,
@@ -1491,6 +1487,9 @@ static int do_cmd_ioctl(struct comedi_device *dev,
 
        comedi_set_subdevice_runflags(s, ~0, SRF_USER | SRF_RUNNING);
 
+       /* set s->busy _after_ setting SRF_RUNNING flag to avoid race with
+        * comedi_read() or comedi_write() */
+       s->busy = file;
        ret = s->do_cmd(dev, s);
        if (ret == 0)
                return 0;
@@ -1705,6 +1704,7 @@ static int do_cancel_ioctl(struct comedi_device *dev, unsigned int arg,
                           void *file)
 {
        struct comedi_subdevice *s;
+       int ret;
 
        if (arg >= dev->n_subdevices)
                return -EINVAL;
@@ -1721,7 +1721,11 @@ static int do_cancel_ioctl(struct comedi_device *dev, unsigned int arg,
        if (s->busy != file)
                return -EBUSY;
 
-       return do_cancel(dev, s);
+       ret = do_cancel(dev, s);
+       if (comedi_get_subdevice_runflags(s) & SRF_USER)
+               wake_up_interruptible(&s->async->wait_head);
+
+       return ret;
 }
 
 /*
@@ -2053,11 +2057,13 @@ static ssize_t comedi_write(struct file *file, const char __user *buf,
 
                if (!comedi_is_subdevice_running(s)) {
                        if (count == 0) {
+                               mutex_lock(&dev->mutex);
                                if (comedi_is_subdevice_in_error(s))
                                        retval = -EPIPE;
                                else
                                        retval = 0;
                                do_become_nonbusy(dev, s);
+                               mutex_unlock(&dev->mutex);
                        }
                        break;
                }
@@ -2156,11 +2162,13 @@ static ssize_t comedi_read(struct file *file, char __user *buf, size_t nbytes,
 
                if (n == 0) {
                        if (!comedi_is_subdevice_running(s)) {
+                               mutex_lock(&dev->mutex);
                                do_become_nonbusy(dev, s);
                                if (comedi_is_subdevice_in_error(s))
                                        retval = -EPIPE;
                                else
                                        retval = 0;
+                               mutex_unlock(&dev->mutex);
                                break;
                        }
                        if (file->f_flags & O_NONBLOCK) {
@@ -2198,9 +2206,11 @@ static ssize_t comedi_read(struct file *file, char __user *buf, size_t nbytes,
                buf += n;
                break;          /* makes device work like a pipe */
        }
-       if (comedi_is_subdevice_idle(s) &&
-           async->buf_read_count - async->buf_write_count == 0) {
-               do_become_nonbusy(dev, s);
+       if (comedi_is_subdevice_idle(s)) {
+               mutex_lock(&dev->mutex);
+               if (async->buf_read_count - async->buf_write_count == 0)
+                       do_become_nonbusy(dev, s);
+               mutex_unlock(&dev->mutex);
        }
        set_current_state(TASK_RUNNING);
        remove_wait_queue(&async->wait_head, &wait);
index 5590ebf1da15218c50561fcbd01b0ebc907ce693..817f837b240d656ccf4379bd3f68be4bbf49b867 100644 (file)
@@ -827,11 +827,11 @@ static void usb_alphatrack_disconnect(struct usb_interface *intf)
                mutex_unlock(&dev->mtx);
                usb_alphatrack_delete(dev);
        } else {
+               atomic_set(&dev->writes_pending, 0);
                dev->intf = NULL;
                mutex_unlock(&dev->mtx);
        }
 
-       atomic_set(&dev->writes_pending, 0);
        mutex_unlock(&disconnect_mutex);
 
        dev_info(&intf->dev, "Alphatrack Surface #%d now disconnected\n",
index b795353e8348a3fcab6d1a01d0472a7168c370a1..cc3692439a5c95fedc0f2c584a52b3d03c0bf391 100644 (file)
@@ -250,8 +250,8 @@ static void send_qos_list(struct nic *nic, struct list_head *head)
 
        list_for_each_entry_safe(entry, n, head, list) {
                list_del(&entry->list);
-               free_qos_entry(entry);
                gdm_wimax_send_tx(entry->skb, entry->dev);
+               free_qos_entry(entry);
        }
 }
 
index 22339059837ff34a15919ca319ede0e0bb1bbf9f..bd0f2fd01db4be91e4f16709c1d788ab1ec2336f 100644 (file)
@@ -33,7 +33,6 @@ config DRM_IMX_TVE
 config DRM_IMX_LDB
        tristate "Support for LVDS displays"
        depends on DRM_IMX
-       select OF_VIDEOMODE
        help
          Choose this to enable the internal LVDS Display Bridge (LDB)
          found on i.MX53 and i.MX6 processors.
index c191ae20356583a48aaba72526a64c92261c073b..41e88abe47af1f518ddf7f0ac955d8ca0812203e 100644 (file)
@@ -1120,8 +1120,11 @@ static int dbll_rmm_alloc(struct dynamic_loader_allocate *this,
           or DYN_EXTERNAL, then mem granularity information is present
           within the section name - only process if there are at least three
           tokens within the section name (just a minor optimization) */
-       if (count >= 3)
-               strict_strtol(sz_last_token, 10, (long *)&req);
+       if (count >= 3) {
+               status = kstrtos32(sz_last_token, 10, &req);
+               if (status)
+                       goto func_cont;
+       }
 
        if ((req == 0) || (req == 1)) {
                if (strcmp(sz_sec_last_token, "DYN_DARAM") == 0) {
index 82c7202fd5cc963668dc8e74caf7dfce11e8f66a..e77fb6ea40c905f3cb72571ef3ca424b44b37440 100644 (file)
@@ -527,8 +527,11 @@ static void zram_reset_device(struct zram *zram)
        size_t index;
        struct zram_meta *meta;
 
-       if (!zram->init_done)
+       down_write(&zram->init_lock);
+       if (!zram->init_done) {
+               up_write(&zram->init_lock);
                return;
+       }
 
        meta = zram->meta;
        zram->init_done = 0;
@@ -549,6 +552,7 @@ static void zram_reset_device(struct zram *zram)
 
        zram->disksize = 0;
        set_capacity(zram->disk, 0);
+       up_write(&zram->init_lock);
 }
 
 static void zram_init_device(struct zram *zram, struct zram_meta *meta)
index 5de56f671a9dd207faa954cf1868e7ef21284747..f36950e4134f55deb02e3a6eb172e9481828ea18 100644 (file)
@@ -54,6 +54,8 @@ MODULE_PARM_DESC(notify_delay_ms,
 * is some wrong values returned by cpuid for number of thresholds.
 */
 #define MAX_NUMBER_OF_TRIPS    2
+/* Limit number of package temp zones */
+#define MAX_PKG_TEMP_ZONE_IDS  256
 
 struct phy_dev_entry {
        struct list_head list;
@@ -394,12 +396,16 @@ static int pkg_temp_thermal_device_add(unsigned int cpu)
        char buffer[30];
        int thres_count;
        u32 eax, ebx, ecx, edx;
+       u8 *temp;
 
        cpuid(6, &eax, &ebx, &ecx, &edx);
        thres_count = ebx & 0x07;
        if (!thres_count)
                return -ENODEV;
 
+       if (topology_physical_package_id(cpu) > MAX_PKG_TEMP_ZONE_IDS)
+               return -ENODEV;
+
        thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS);
 
        err = get_tj_max(cpu, &tj_max);
@@ -417,13 +423,14 @@ static int pkg_temp_thermal_device_add(unsigned int cpu)
        spin_lock(&pkg_work_lock);
        if (topology_physical_package_id(cpu) > max_phy_id)
                max_phy_id = topology_physical_package_id(cpu);
-       pkg_work_scheduled = krealloc(pkg_work_scheduled,
-                               (max_phy_id+1) * sizeof(u8), GFP_ATOMIC);
-       if (!pkg_work_scheduled) {
+       temp = krealloc(pkg_work_scheduled,
+                       (max_phy_id+1) * sizeof(u8), GFP_ATOMIC);
+       if (!temp) {
                spin_unlock(&pkg_work_lock);
                err = -ENOMEM;
                goto err_ret_free;
        }
+       pkg_work_scheduled = temp;
        pkg_work_scheduled[topology_physical_package_id(cpu)] = 0;
        spin_unlock(&pkg_work_lock);
 
@@ -511,7 +518,7 @@ static int get_core_online(unsigned int cpu)
 
        /* Check if there is already an instance for this package */
        if (!phdev) {
-               if (!cpu_has(c, X86_FEATURE_DTHERM) &&
+               if (!cpu_has(c, X86_FEATURE_DTHERM) ||
                                        !cpu_has(c, X86_FEATURE_PTS))
                        return -ENODEV;
                if (pkg_temp_thermal_device_add(cpu))
@@ -562,7 +569,7 @@ static struct notifier_block pkg_temp_thermal_notifier __refdata = {
 };
 
 static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = {
-       { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_DTHERM },
+       { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_PTS },
        {}
 };
 MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids);
@@ -592,7 +599,6 @@ static int __init pkg_temp_thermal_init(void)
        return 0;
 
 err_ret:
-       get_online_cpus();
        for_each_online_cpu(i)
                put_core_offline(i);
        put_online_cpus();
index 721904f8efa92b8816665410822b355619679f47..946ddd2b3a541990277aabad488843a429e3e6f5 100644 (file)
@@ -193,7 +193,8 @@ static int __init parse_options(struct early_serial8250_device *device,
        if (options) {
                options++;
                device->baud = simple_strtoul(options, NULL, 0);
-               length = min(strcspn(options, " "), sizeof(device->options));
+               length = min(strcspn(options, " ") + 1,
+                            sizeof(device->options));
                strlcpy(device->options, options, length);
        } else {
                device->baud = probe_baud(port);
index 5e3d68917ffed2529c3cd0181d40d7ae95f8ebcd..1456673bcca09b58cc4d1e7c27fd31569b8439bf 100644 (file)
@@ -277,7 +277,7 @@ config SERIAL_TEGRA
        select SERIAL_CORE
        help
          Support for the on-chip UARTs on the NVIDIA Tegra series SOCs
-         providing /dev/ttyHS0, 1, 2, 3 and 4 (note, some machines may not
+         providing /dev/ttyTHS0, 1, 2, 3 and 4 (note, some machines may not
          provide all of these ports, depending on how the serial port
          are enabled). This driver uses the APB DMA to achieve higher baudrate
          and better performance.
index ff171384ea5256cabcf018c3a3dc839279f71392..dc6e96996ead4796eace1ba3bb4b61232f132302 100644 (file)
@@ -3478,7 +3478,7 @@ static int alloc_buf_list(SLMP_INFO *info)
        for ( i = 0; i < info->rx_buf_count; i++ ) {
                /* calculate and store physical address of this buffer entry */
                info->rx_buf_list_ex[i].phys_entry =
-                       info->buffer_list_phys + (i * sizeof(SCABUFSIZE));
+                       info->buffer_list_phys + (i * SCABUFSIZE);
 
                /* calculate and store physical address of */
                /* next entry in cirular list of entries */
index 4191db32f12c2ccf4901287c6ccb1a357c9b552a..4a8a1d68002c0fa1695078c6566c616bf764fa35 100644 (file)
@@ -668,6 +668,15 @@ resubmit:
 static inline int
 hub_clear_tt_buffer (struct usb_device *hdev, u16 devinfo, u16 tt)
 {
+       /* Need to clear both directions for control ep */
+       if (((devinfo >> 11) & USB_ENDPOINT_XFERTYPE_MASK) ==
+                       USB_ENDPOINT_XFER_CONTROL) {
+               int status = usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0),
+                               HUB_CLEAR_TT_BUFFER, USB_RT_PORT,
+                               devinfo ^ 0x8000, tt, NULL, 0, 1000);
+               if (status)
+                       return status;
+       }
        return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0),
                               HUB_CLEAR_TT_BUFFER, USB_RT_PORT, devinfo,
                               tt, NULL, 0, 1000);
@@ -2848,6 +2857,15 @@ static int usb_disable_function_remotewakeup(struct usb_device *udev)
                                USB_CTRL_SET_TIMEOUT);
 }
 
+/* Count of wakeup-enabled devices at or below udev */
+static unsigned wakeup_enabled_descendants(struct usb_device *udev)
+{
+       struct usb_hub *hub = usb_hub_to_struct_hub(udev);
+
+       return udev->do_remote_wakeup +
+                       (hub ? hub->wakeup_enabled_descendants : 0);
+}
+
 /*
  * usb_port_suspend - suspend a usb device's upstream port
  * @udev: device that's no longer in active use, not a root hub
@@ -2888,8 +2906,8 @@ static int usb_disable_function_remotewakeup(struct usb_device *udev)
  * Linux (2.6) currently has NO mechanisms to initiate that:  no khubd
  * timer, no SRP, no requests through sysfs.
  *
- * If Runtime PM isn't enabled or used, non-SuperSpeed devices really get
- * suspended only when their bus goes into global suspend (i.e., the root
+ * If Runtime PM isn't enabled or used, non-SuperSpeed devices may not get
+ * suspended until their bus goes into global suspend (i.e., the root
  * hub is suspended).  Nevertheless, we change @udev->state to
  * USB_STATE_SUSPENDED as this is the device's "logical" state.  The actual
  * upstream port setting is stored in @udev->port_is_suspended.
@@ -2960,15 +2978,21 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg)
        /* see 7.1.7.6 */
        if (hub_is_superspeed(hub->hdev))
                status = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_U3);
-       else if (PMSG_IS_AUTO(msg))
-               status = set_port_feature(hub->hdev, port1,
-                                               USB_PORT_FEAT_SUSPEND);
+
        /*
         * For system suspend, we do not need to enable the suspend feature
         * on individual USB-2 ports.  The devices will automatically go
         * into suspend a few ms after the root hub stops sending packets.
         * The USB 2.0 spec calls this "global suspend".
+        *
+        * However, many USB hubs have a bug: They don't relay wakeup requests
+        * from a downstream port if the port's suspend feature isn't on.
+        * Therefore we will turn on the suspend feature if udev or any of its
+        * descendants is enabled for remote wakeup.
         */
+       else if (PMSG_IS_AUTO(msg) || wakeup_enabled_descendants(udev) > 0)
+               status = set_port_feature(hub->hdev, port1,
+                               USB_PORT_FEAT_SUSPEND);
        else {
                really_suspend = false;
                status = 0;
@@ -3003,15 +3027,16 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg)
                if (!PMSG_IS_AUTO(msg))
                        status = 0;
        } else {
-               /* device has up to 10 msec to fully suspend */
                dev_dbg(&udev->dev, "usb %ssuspend, wakeup %d\n",
                                (PMSG_IS_AUTO(msg) ? "auto-" : ""),
                                udev->do_remote_wakeup);
-               usb_set_device_state(udev, USB_STATE_SUSPENDED);
                if (really_suspend) {
                        udev->port_is_suspended = 1;
+
+                       /* device has up to 10 msec to fully suspend */
                        msleep(10);
                }
+               usb_set_device_state(udev, USB_STATE_SUSPENDED);
        }
 
        /*
@@ -3293,7 +3318,11 @@ static int hub_suspend(struct usb_interface *intf, pm_message_t msg)
        unsigned                port1;
        int                     status;
 
-       /* Warn if children aren't already suspended */
+       /*
+        * Warn if children aren't already suspended.
+        * Also, add up the number of wakeup-enabled descendants.
+        */
+       hub->wakeup_enabled_descendants = 0;
        for (port1 = 1; port1 <= hdev->maxchild; port1++) {
                struct usb_device       *udev;
 
@@ -3303,6 +3332,9 @@ static int hub_suspend(struct usb_interface *intf, pm_message_t msg)
                        if (PMSG_IS_AUTO(msg))
                                return -EBUSY;
                }
+               if (udev)
+                       hub->wakeup_enabled_descendants +=
+                                       wakeup_enabled_descendants(udev);
        }
 
        if (hdev->do_remote_wakeup && hub->quirk_check_port_auto_suspend) {
index 6508e02b3dac91718c31dda7e90b6984fed0c898..4e4790dea3439fc10a57b9fac30f73d77245fdf8 100644 (file)
@@ -59,6 +59,9 @@ struct usb_hub {
        struct usb_tt           tt;             /* Transaction Translator */
 
        unsigned                mA_per_port;    /* current for each child */
+#ifdef CONFIG_PM
+       unsigned                wakeup_enabled_descendants;
+#endif
 
        unsigned                limited_power:1;
        unsigned                quiescing:1;
index 757aa18027d05ca0eb198d97d92f1710e8e58def..2378958ea63e01426edbda87183674256a4334ff 100644 (file)
@@ -1,6 +1,6 @@
 config USB_DWC3
        tristate "DesignWare USB3 DRD Core Support"
-       depends on (USB || USB_GADGET) && GENERIC_HARDIRQS
+       depends on (USB || USB_GADGET) && GENERIC_HARDIRQS && HAS_DMA
        select USB_XHCI_PLATFORM if USB_SUPPORT && USB_XHCI_HCD
        help
          Say Y or M here if your system has a Dual Role SuperSpeed
index c35d49d39b7615f83e7e71a9f009643e33c8d760..358375e0b291098d8be40d65bdfbbff7f97f0826 100644 (file)
@@ -450,7 +450,7 @@ static int dwc3_probe(struct platform_device *pdev)
        }
 
        if (IS_ERR(dwc->usb3_phy)) {
-               ret = PTR_ERR(dwc->usb2_phy);
+               ret = PTR_ERR(dwc->usb3_phy);
 
                /*
                 * if -ENXIO is returned, it means PHY layer wasn't
index b69d322e3cab51c4bcdb784d8f0e3eeda66028e2..27dad993b00706ef5a14a257c40bd353ca6ee560 100644 (file)
@@ -759,8 +759,8 @@ struct dwc3 {
 
 struct dwc3_event_type {
        u32     is_devspec:1;
-       u32     type:6;
-       u32     reserved8_31:25;
+       u32     type:7;
+       u32     reserved8_31:24;
 } __packed;
 
 #define DWC3_DEPEVT_XFERCOMPLETE       0x01
index b5e5b35df49c8ff49ca5817ced0838e79dad9ccf..f77083fedc68d0d1bfc7afb579e6aae96edb1d63 100644 (file)
@@ -1584,6 +1584,7 @@ err1:
        __dwc3_gadget_ep_disable(dwc->eps[0]);
 
 err0:
+       dwc->gadget_driver = NULL;
        spin_unlock_irqrestore(&dwc->lock, flags);
 
        return ret;
index 62f6802f6e0fd98bec3aa03abe3d0fff4a1ee191..8e9368330b103a60cc7fb53be21991315c489fd8 100644 (file)
@@ -193,6 +193,7 @@ config USB_FUSB300
           Faraday usb device controller FUSB300 driver
 
 config USB_FOTG210_UDC
+       depends on HAS_DMA
        tristate "Faraday FOTG210 USB Peripheral Controller"
        help
           Faraday USB2.0 OTG controller which can be configured as
@@ -328,13 +329,14 @@ config USB_S3C_HSUDC
 
 config USB_MV_UDC
        tristate "Marvell USB2.0 Device Controller"
-       depends on GENERIC_HARDIRQS
+       depends on GENERIC_HARDIRQS && HAS_DMA
        help
          Marvell Socs (including PXA and MMP series) include a high speed
          USB2.0 OTG controller, which can be configured as high speed or
          full speed USB peripheral.
 
 config USB_MV_U3D
+       depends on HAS_DMA
        tristate "MARVELL PXA2128 USB 3.0 controller"
        help
          MARVELL PXA2128 Processor series include a super speed USB3.0 device
@@ -639,6 +641,7 @@ config USB_CONFIGFS_RNDIS
        depends on USB_CONFIGFS
        depends on NET
        select USB_U_ETHER
+       select USB_U_RNDIS
        select USB_F_RNDIS
        help
           Microsoft Windows XP bundles the "Remote NDIS" (RNDIS) protocol,
index 073b938f913546793d24443a89dc928982eecaaa..d9a6add0c8526e06778addb733ce3e53da78a291 100644 (file)
@@ -870,8 +870,8 @@ static void clk_on(struct at91_udc *udc)
        if (udc->clocked)
                return;
        udc->clocked = 1;
-       clk_enable(udc->iclk);
-       clk_enable(udc->fclk);
+       clk_prepare_enable(udc->iclk);
+       clk_prepare_enable(udc->fclk);
 }
 
 static void clk_off(struct at91_udc *udc)
@@ -880,8 +880,8 @@ static void clk_off(struct at91_udc *udc)
                return;
        udc->clocked = 0;
        udc->gadget.speed = USB_SPEED_UNKNOWN;
-       clk_disable(udc->fclk);
-       clk_disable(udc->iclk);
+       clk_disable_unprepare(udc->fclk);
+       clk_disable_unprepare(udc->iclk);
 }
 
 /*
@@ -1725,7 +1725,7 @@ static int at91udc_probe(struct platform_device *pdev)
        /* init software state */
        udc = &controller;
        udc->gadget.dev.parent = dev;
-       if (pdev->dev.of_node)
+       if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node)
                at91udc_of_init(udc, pdev->dev.of_node);
        else
                memcpy(&udc->board, dev->platform_data,
@@ -1782,12 +1782,14 @@ static int at91udc_probe(struct platform_device *pdev)
        }
 
        /* don't do anything until we have both gadget driver and VBUS */
-       clk_enable(udc->iclk);
+       retval = clk_prepare_enable(udc->iclk);
+       if (retval)
+               goto fail1;
        at91_udp_write(udc, AT91_UDP_TXVC, AT91_UDP_TXVC_TXVDIS);
        at91_udp_write(udc, AT91_UDP_IDR, 0xffffffff);
        /* Clear all pending interrupts - UDP may be used by bootloader. */
        at91_udp_write(udc, AT91_UDP_ICR, 0xffffffff);
-       clk_disable(udc->iclk);
+       clk_disable_unprepare(udc->iclk);
 
        /* request UDC and maybe VBUS irqs */
        udc->udp_irq = platform_get_irq(pdev, 0);
index 5d3561ea1c1595ea30f07bc3ad35253647701d97..edab45da37417e16960b75033a46a8453475b77b 100644 (file)
@@ -959,8 +959,11 @@ static struct usb_function_instance *ecm_alloc_inst(void)
        mutex_init(&opts->lock);
        opts->func_inst.free_func_inst = ecm_free_inst;
        opts->net = gether_setup_default();
-       if (IS_ERR(opts->net))
-               return ERR_PTR(PTR_ERR(opts->net));
+       if (IS_ERR(opts->net)) {
+               struct net_device *net = opts->net;
+               kfree(opts);
+               return ERR_CAST(net);
+       }
 
        config_group_init_type_name(&opts->func_inst.group, "", &ecm_func_type);
 
index 90ee8022e8d80c29fbff9a1298d62c13ec869b42..d00392d879db3aa3e75d093d4e26041fa42bcb54 100644 (file)
@@ -593,8 +593,11 @@ static struct usb_function_instance *eem_alloc_inst(void)
        mutex_init(&opts->lock);
        opts->func_inst.free_func_inst = eem_free_inst;
        opts->net = gether_setup_default();
-       if (IS_ERR(opts->net))
-               return ERR_CAST(opts->net);
+       if (IS_ERR(opts->net)) {
+               struct net_device *net = opts->net;
+               kfree(opts);
+               return ERR_CAST(net);
+       }
 
        config_group_init_type_name(&opts->func_inst.group, "", &eem_func_type);
 
index 952177f7eb9b40e8045bfa85730239ab22efd97c..1c28fe13328a13935a3976ecc409cba113a81221 100644 (file)
@@ -1350,8 +1350,11 @@ static struct usb_function_instance *ncm_alloc_inst(void)
        mutex_init(&opts->lock);
        opts->func_inst.free_func_inst = ncm_free_inst;
        opts->net = gether_setup_default();
-       if (IS_ERR(opts->net))
-               return ERR_PTR(PTR_ERR(opts->net));
+       if (IS_ERR(opts->net)) {
+               struct net_device *net = opts->net;
+               kfree(opts);
+               return ERR_CAST(net);
+       }
 
        config_group_init_type_name(&opts->func_inst.group, "", &ncm_func_type);
 
index 7944fb0efe3b6a67471e7379a03a5d8ebc9b23fb..1bf26e9f38cd3c4573331ff77d6a9caa55a0be7e 100644 (file)
@@ -656,8 +656,11 @@ static struct usb_function_instance *phonet_alloc_inst(void)
 
        opts->func_inst.free_func_inst = phonet_free_inst;
        opts->net = gphonet_setup_default();
-       if (IS_ERR(opts->net))
-               return ERR_PTR(PTR_ERR(opts->net));
+       if (IS_ERR(opts->net)) {
+               struct net_device *net = opts->net;
+               kfree(opts);
+               return ERR_CAST(net);
+       }
 
        config_group_init_type_name(&opts->func_inst.group, "",
                        &phonet_func_type);
index 191df35ae69d04e31c6fc70ac7d76f2b4ff83be3..717ed7f956395412bf5d6da1d605921c71b82532 100644 (file)
@@ -963,8 +963,11 @@ static struct usb_function_instance *rndis_alloc_inst(void)
        mutex_init(&opts->lock);
        opts->func_inst.free_func_inst = rndis_free_inst;
        opts->net = gether_setup_default();
-       if (IS_ERR(opts->net))
-               return ERR_CAST(opts->net);
+       if (IS_ERR(opts->net)) {
+               struct net_device *net = opts->net;
+               kfree(opts);
+               return ERR_CAST(net);
+       }
 
        config_group_init_type_name(&opts->func_inst.group, "",
                                    &rndis_func_type);
index 5601e1d96c4fabd7f8a44bc80703080495fba56a..7c8674fa7e8094074aa794fe9407e2adeab7548a 100644 (file)
@@ -505,8 +505,11 @@ static struct usb_function_instance *geth_alloc_inst(void)
        mutex_init(&opts->lock);
        opts->func_inst.free_func_inst = geth_free_inst;
        opts->net = gether_setup_default();
-       if (IS_ERR(opts->net))
-               return ERR_CAST(opts->net);
+       if (IS_ERR(opts->net)) {
+               struct net_device *net = opts->net;
+               kfree(opts);
+               return ERR_CAST(net);
+       }
 
        config_group_init_type_name(&opts->func_inst.group, "",
                                    &gether_func_type);
index cce5535b1dc6744f86a2a3c88d35c756ef4335cd..10cd18ddd0d40207f2d33bd94e41dff040c842f2 100644 (file)
@@ -1074,7 +1074,7 @@ static struct usb_gadget_ops fotg210_gadget_ops = {
        .udc_stop               = fotg210_udc_stop,
 };
 
-static int __exit fotg210_udc_remove(struct platform_device *pdev)
+static int fotg210_udc_remove(struct platform_device *pdev)
 {
        struct fotg210_udc *fotg210 = dev_get_drvdata(&pdev->dev);
 
@@ -1088,7 +1088,7 @@ static int __exit fotg210_udc_remove(struct platform_device *pdev)
        return 0;
 }
 
-static int __init fotg210_udc_probe(struct platform_device *pdev)
+static int fotg210_udc_probe(struct platform_device *pdev)
 {
        struct resource *res, *ires;
        struct fotg210_udc *fotg210 = NULL;
index 07fdb3eaf48a4fd1be386e346e6e9fccc53e78ce..ec6a2d2903988ffb7a23aa12b63b0f95fd4f601a 100644 (file)
@@ -1776,7 +1776,7 @@ static int mv_u3d_remove(struct platform_device *dev)
        kfree(u3d->eps);
 
        if (u3d->irq)
-               free_irq(u3d->irq, &dev->dev);
+               free_irq(u3d->irq, u3d);
 
        if (u3d->cap_regs)
                iounmap(u3d->cap_regs);
@@ -1974,7 +1974,7 @@ static int mv_u3d_probe(struct platform_device *dev)
        return 0;
 
 err_unregister:
-       free_irq(u3d->irq, &dev->dev);
+       free_irq(u3d->irq, u3d);
 err_request_irq:
 err_get_irq:
        kfree(u3d->status_req);
index ffd8fa5411015f823bc5701777e20cd928848f56..c28ac9872030ab1487a4dd74ffb5ee54af950d67 100644 (file)
@@ -50,6 +50,8 @@ static DEFINE_MUTEX(udc_lock);
 
 /* ------------------------------------------------------------------------- */
 
+#ifdef CONFIG_HAS_DMA
+
 int usb_gadget_map_request(struct usb_gadget *gadget,
                struct usb_request *req, int is_in)
 {
@@ -99,6 +101,8 @@ void usb_gadget_unmap_request(struct usb_gadget *gadget,
 }
 EXPORT_SYMBOL_GPL(usb_gadget_unmap_request);
 
+#endif /* CONFIG_HAS_DMA */
+
 /* ------------------------------------------------------------------------- */
 
 void usb_gadget_set_state(struct usb_gadget *gadget,
@@ -194,9 +198,11 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
        dev_set_name(&gadget->dev, "gadget");
        gadget->dev.parent = parent;
 
+#ifdef CONFIG_HAS_DMA
        dma_set_coherent_mask(&gadget->dev, parent->coherent_dma_mask);
        gadget->dev.dma_parms = parent->dma_parms;
        gadget->dev.dma_mask = parent->dma_mask;
+#endif
 
        if (release)
                gadget->dev.release = release;
index 2b702772d04d3353cc570c340f04c9701fa11210..6dce37555c4f6303b5efa657af88096db2b2140e 100644 (file)
@@ -874,6 +874,7 @@ static int ehci_hub_control (
                                ehci->reset_done[wIndex] = jiffies
                                                + msecs_to_jiffies(20);
                                usb_hcd_start_port_resume(&hcd->self, wIndex);
+                               set_bit(wIndex, &ehci->resuming_ports);
                                /* check the port again */
                                mod_timer(&ehci_to_hcd(ehci)->rh_timer,
                                                ehci->reset_done[wIndex]);
index 4b8a2092432f8911b5d73053e34b135041fd0e4d..978c849f9c9a1ef9631d86fa80ccea93371f61ea 100644 (file)
@@ -13,6 +13,7 @@ void usb_enable_xhci_ports(struct pci_dev *xhci_pdev);
 void usb_disable_xhci_ports(struct pci_dev *xhci_pdev);
 void sb800_prefetch(struct device *dev, int on);
 #else
+struct pci_dev;
 static inline void usb_amd_quirk_pll_disable(void) {}
 static inline void usb_amd_quirk_pll_enable(void) {}
 static inline void usb_amd_dev_put(void) {}
index cc24e39b97d5b8b2bdf7faada7f7767b4f8013c2..f00cb203faea569b2e87e79e8371455a7abd77b4 100644 (file)
@@ -93,7 +93,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
        }
        if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
                        pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI) {
-               xhci->quirks |= XHCI_SPURIOUS_SUCCESS;
                xhci->quirks |= XHCI_EP_LIMIT_QUIRK;
                xhci->limit_active_eps = 64;
                xhci->quirks |= XHCI_SW_BW_CHECKING;
index 1e57eafa69101aaee797b7890020c66865ef4c16..5b08cd85f8e7fe9646228da946166dfb188c10d2 100644 (file)
@@ -434,7 +434,7 @@ static void ring_doorbell_for_active_rings(struct xhci_hcd *xhci,
 
        /* A ring has pending URBs if its TD list is not empty */
        if (!(ep->ep_state & EP_HAS_STREAMS)) {
-               if (!(list_empty(&ep->ring->td_list)))
+               if (ep->ring && !(list_empty(&ep->ring->td_list)))
                        xhci_ring_ep_doorbell(xhci, slot_id, ep_index, 0);
                return;
        }
index 2c49f00260ca2ad9cd0e79bb3078dbd25bb864d6..41eb4fc33453525422b95218b6d51b34aaa617b5 100644 (file)
@@ -329,7 +329,7 @@ static void xhci_cleanup_msix(struct xhci_hcd *xhci)
        return;
 }
 
-static void xhci_msix_sync_irqs(struct xhci_hcd *xhci)
+static void __maybe_unused xhci_msix_sync_irqs(struct xhci_hcd *xhci)
 {
        int i;
 
@@ -1181,9 +1181,6 @@ static int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev,
        }
 
        xhci = hcd_to_xhci(hcd);
-       if (xhci->xhc_state & XHCI_STATE_HALTED)
-               return -ENODEV;
-
        if (check_virt_dev) {
                if (!udev->slot_id || !xhci->devs[udev->slot_id]) {
                        printk(KERN_DEBUG "xHCI %s called with unaddressed "
@@ -1199,6 +1196,9 @@ static int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev,
                }
        }
 
+       if (xhci->xhc_state & XHCI_STATE_HALTED)
+               return -ENODEV;
+
        return 1;
 }
 
@@ -3898,7 +3898,7 @@ int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1)
  * Issue an Evaluate Context command to change the Maximum Exit Latency in the
  * slot context.  If that succeeds, store the new MEL in the xhci_virt_device.
  */
-static int xhci_change_max_exit_latency(struct xhci_hcd *xhci,
+static int __maybe_unused xhci_change_max_exit_latency(struct xhci_hcd *xhci,
                        struct usb_device *udev, u16 max_exit_latency)
 {
        struct xhci_virt_device *virt_dev;
@@ -4892,6 +4892,13 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks)
 
        get_quirks(dev, xhci);
 
+       /* In xhci controllers which follow xhci 1.0 spec gives a spurious
+        * success event after a short transfer. This quirk will ignore such
+        * spurious event.
+        */
+       if (xhci->hci_version > 0x96)
+               xhci->quirks |= XHCI_SPURIOUS_SUCCESS;
+
        /* Make sure the HC is halted. */
        retval = xhci_halt(xhci);
        if (retval)
index c21386ec5d35d142b7d0d84abe2e3f3cb5be7125..de98906f786d08ebac625b3cc8213de5d79cf68e 100644 (file)
@@ -3247,6 +3247,7 @@ static const struct usb_device_id sisusb_table[] = {
        { USB_DEVICE(0x0711, 0x0903) },
        { USB_DEVICE(0x0711, 0x0918) },
        { USB_DEVICE(0x0711, 0x0920) },
+       { USB_DEVICE(0x0711, 0x0950) },
        { USB_DEVICE(0x182d, 0x021c) },
        { USB_DEVICE(0x182d, 0x0269) },
        { }
index efe6e1464f45b59fb831a9ca5c05361593b3b7a5..a2fb30bbb971c41f76fc8ad1eb358449f0117657 100644 (file)
@@ -71,9 +71,9 @@ static struct usb_dpll_params omap_usb3_dpll_params[NUM_SYS_CLKS] = {
        {1250, 5, 4, 20, 0},            /* 12 MHz */
        {3125, 20, 4, 20, 0},           /* 16.8 MHz */
        {1172, 8, 4, 20, 65537},        /* 19.2 MHz */
+       {1000, 7, 4, 10, 0},            /* 20 MHz */
        {1250, 12, 4, 20, 0},           /* 26 MHz */
        {3125, 47, 4, 20, 92843},       /* 38.4 MHz */
-       {1000, 7, 4, 10, 0},            /* 20 MHz */
 
 };
 
index 1011c16ade7e4b2de81db46d0ab8ddfc599b24bc..758b86d0fcb386af39561d2d25e8ad9983c97fbf 100644 (file)
@@ -388,7 +388,7 @@ static int samsung_usb2phy_probe(struct platform_device *pdev)
                clk = devm_clk_get(dev, "otg");
 
        if (IS_ERR(clk)) {
-               dev_err(dev, "Failed to get otg clock\n");
+               dev_err(dev, "Failed to get usbhost/otg clock\n");
                return PTR_ERR(clk);
        }
 
index ed4949faa70d14502a6669a3fb35b6fb814b35c6..805940c373532bd4d548b5cf4d868ce4d75024ad 100644 (file)
@@ -855,10 +855,6 @@ static int usbhsg_gadget_stop(struct usb_gadget *gadget,
        struct usbhsg_gpriv *gpriv = usbhsg_gadget_to_gpriv(gadget);
        struct usbhs_priv *priv = usbhsg_gpriv_to_priv(gpriv);
 
-       if (!driver             ||
-           !driver->unbind)
-               return -EINVAL;
-
        usbhsg_try_stop(priv, USBHSG_STATUS_REGISTERD);
        gpriv->driver = NULL;
 
index d6ef2f8da37dca2f7206f26db7d6269bdf0d13e6..0eae4ba3760ed87f633b0d449a833f1e5109107d 100644 (file)
@@ -53,6 +53,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */
        { USB_DEVICE(0x0489, 0xE003) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */
        { USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */
+       { USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */
        { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */
        { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */
        { USB_DEVICE(0x0BED, 0x1100) }, /* MEI (TM) Cashflow-SC Bill/Voucher Acceptor */
@@ -118,6 +119,8 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
        { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */
        { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */
+       { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */
+       { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */
        { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
        { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */
        { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */
@@ -148,6 +151,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */
        { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
        { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
+       { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */
        { USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */
        { USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */
        { USB_DEVICE(0x1E29, 0x0501) }, /* Festo CMSP */
index 0a818b238508e26ab4b1932a844b740ba25c5d3c..603fb70dde8005ddfcf9743e2f3f8d0acd616bb5 100644 (file)
@@ -905,20 +905,20 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port)
        status = mos7840_get_reg_sync(port, mos7840_port->SpRegOffset, &Data);
        if (status < 0) {
                dev_dbg(&port->dev, "Reading Spreg failed\n");
-               return -1;
+               goto err;
        }
        Data |= 0x80;
        status = mos7840_set_reg_sync(port, mos7840_port->SpRegOffset, Data);
        if (status < 0) {
                dev_dbg(&port->dev, "writing Spreg failed\n");
-               return -1;
+               goto err;
        }
 
        Data &= ~0x80;
        status = mos7840_set_reg_sync(port, mos7840_port->SpRegOffset, Data);
        if (status < 0) {
                dev_dbg(&port->dev, "writing Spreg failed\n");
-               return -1;
+               goto err;
        }
        /* End of block to be checked */
 
@@ -927,7 +927,7 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port)
                                                                        &Data);
        if (status < 0) {
                dev_dbg(&port->dev, "Reading Controlreg failed\n");
-               return -1;
+               goto err;
        }
        Data |= 0x08;           /* Driver done bit */
        Data |= 0x20;           /* rx_disable */
@@ -935,7 +935,7 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port)
                                mos7840_port->ControlRegOffset, Data);
        if (status < 0) {
                dev_dbg(&port->dev, "writing Controlreg failed\n");
-               return -1;
+               goto err;
        }
        /* do register settings here */
        /* Set all regs to the device default values. */
@@ -946,21 +946,21 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port)
        status = mos7840_set_uart_reg(port, INTERRUPT_ENABLE_REGISTER, Data);
        if (status < 0) {
                dev_dbg(&port->dev, "disabling interrupts failed\n");
-               return -1;
+               goto err;
        }
        /* Set FIFO_CONTROL_REGISTER to the default value */
        Data = 0x00;
        status = mos7840_set_uart_reg(port, FIFO_CONTROL_REGISTER, Data);
        if (status < 0) {
                dev_dbg(&port->dev, "Writing FIFO_CONTROL_REGISTER  failed\n");
-               return -1;
+               goto err;
        }
 
        Data = 0xcf;
        status = mos7840_set_uart_reg(port, FIFO_CONTROL_REGISTER, Data);
        if (status < 0) {
                dev_dbg(&port->dev, "Writing FIFO_CONTROL_REGISTER  failed\n");
-               return -1;
+               goto err;
        }
 
        Data = 0x03;
@@ -1103,6 +1103,15 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port)
        /* mos7840_change_port_settings(mos7840_port,old_termios); */
 
        return 0;
+err:
+       for (j = 0; j < NUM_URBS; ++j) {
+               urb = mos7840_port->write_urb_pool[j];
+               if (!urb)
+                       continue;
+               kfree(urb->transfer_buffer);
+               usb_free_urb(urb);
+       }
+       return status;
 }
 
 /*****************************************************************************
index 5dd857de05b04f5ecf4cefe7a370e6013db67118..1cf6f125f5f078624b41bd1256686d35f28ee4b7 100644 (file)
@@ -341,17 +341,12 @@ static void option_instat_callback(struct urb *urb);
 #define OLIVETTI_VENDOR_ID                     0x0b3c
 #define OLIVETTI_PRODUCT_OLICARD100            0xc000
 #define OLIVETTI_PRODUCT_OLICARD145            0xc003
+#define OLIVETTI_PRODUCT_OLICARD200            0xc005
 
 /* Celot products */
 #define CELOT_VENDOR_ID                                0x211f
 #define CELOT_PRODUCT_CT680M                   0x6801
 
-/* ONDA Communication vendor id */
-#define ONDA_VENDOR_ID       0x1ee8
-
-/* ONDA MT825UP HSDPA 14.2 modem */
-#define ONDA_MT825UP         0x000b
-
 /* Samsung products */
 #define SAMSUNG_VENDOR_ID                       0x04e8
 #define SAMSUNG_PRODUCT_GT_B3730                0x6889
@@ -444,7 +439,8 @@ static void option_instat_callback(struct urb *urb);
 
 /* Hyundai Petatel Inc. products */
 #define PETATEL_VENDOR_ID                      0x1ff4
-#define PETATEL_PRODUCT_NP10T                  0x600e
+#define PETATEL_PRODUCT_NP10T_600A             0x600a
+#define PETATEL_PRODUCT_NP10T_600E             0x600e
 
 /* TP-LINK Incorporated products */
 #define TPLINK_VENDOR_ID                       0x2357
@@ -782,6 +778,7 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) },
        { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) },
        { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */
+       { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */
        { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6280) }, /* BP3-USB & BP3-EXT HSDPA */
        { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6008) },
@@ -817,7 +814,8 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0017, 0xff, 0xff, 0xff),
                .driver_info = (kernel_ulong_t)&net_intf3_blacklist },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0018, 0xff, 0xff, 0xff) },
-       { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0019, 0xff, 0xff, 0xff) },
+       { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0019, 0xff, 0xff, 0xff),
+               .driver_info = (kernel_ulong_t)&net_intf3_blacklist },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0020, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0021, 0xff, 0xff, 0xff),
                .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
@@ -1256,8 +1254,8 @@ static const struct usb_device_id option_ids[] = {
 
        { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100) },
        { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD145) },
+       { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD200) },
        { USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */
-       { USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */
        { USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM600) },
        { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM610) },
@@ -1329,9 +1327,12 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x02, 0x01) },
        { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x00, 0x00) },
        { USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) },
-       { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T) },
+       { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600A) },
+       { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600E) },
        { USB_DEVICE(TPLINK_VENDOR_ID, TPLINK_PRODUCT_MA180),
          .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+       { USB_DEVICE(TPLINK_VENDOR_ID, 0x9000),                                 /* TP-Link MA260 */
+         .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
        { USB_DEVICE(CHANGHONG_VENDOR_ID, CHANGHONG_PRODUCT_CH690) },
        { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d01, 0xff, 0x02, 0x01) },    /* D-Link DWM-156 (variant) */
        { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d01, 0xff, 0x00, 0x00) },    /* D-Link DWM-156 (variant) */
@@ -1339,6 +1340,8 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d02, 0xff, 0x00, 0x00) },
        { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x02, 0x01) },
        { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x00, 0x00) },
+       { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */
+       { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */
        { } /* Terminating entry */
 };
 MODULE_DEVICE_TABLE(usb, option_ids);
index 7182bb774b7958f70f50efb823b084c5ea9284d5..375b5a400b6fb8a63b0aaa83272299a15aef6e83 100644 (file)
@@ -371,7 +371,7 @@ static int ti_startup(struct usb_serial *serial)
        usb_set_serial_data(serial, tdev);
 
        /* determine device type */
-       if (usb_match_id(serial->interface, ti_id_table_3410))
+       if (serial->type == &ti_1port_device)
                tdev->td_is_3410 = 1;
        dev_dbg(&dev->dev, "%s - device type is %s\n", __func__,
                tdev->td_is_3410 ? "3410" : "5052");
index 1799335288bd5e60522a60def1c311f07df62286..c015f2c16729c5b4030fc829fae9a3cfb90acde6 100644 (file)
@@ -665,6 +665,13 @@ UNUSUAL_DEV(  0x054c, 0x016a, 0x0000, 0x9999,
                USB_SC_DEVICE, USB_PR_DEVICE, NULL,
                US_FL_FIX_INQUIRY ),
 
+/* Submitted by Ren Bigcren <bigcren.ren@sonymobile.com> */
+UNUSUAL_DEV(  0x054c, 0x02a5, 0x0100, 0x0100,
+               "Sony Corp.",
+               "MicroVault Flash Drive",
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+               US_FL_NO_READ_CAPACITY_16 ),
+
 /* floppy reports multiple luns */
 UNUSUAL_DEV(  0x055d, 0x2020, 0x0000, 0x0210,
                "SAMSUNG",
index 027be91db139c43eaadd5e12727c2b18c1a65bcc..969a85960e9f6bf09a5bb9a6b1ea828d9f5ae9fb 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
-#include <linux/rcupdate.h>
 #include <linux/file.h>
 #include <linux/slab.h>
 
@@ -346,12 +345,11 @@ static void handle_tx(struct vhost_net *net)
        struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
        bool zcopy, zcopy_used;
 
-       /* TODO: check that we are running from vhost_worker? */
-       sock = rcu_dereference_check(vq->private_data, 1);
+       mutex_lock(&vq->mutex);
+       sock = vq->private_data;
        if (!sock)
-               return;
+               goto out;
 
-       mutex_lock(&vq->mutex);
        vhost_disable_notify(&net->dev, vq);
 
        hdr_size = nvq->vhost_hlen;
@@ -461,7 +459,7 @@ static void handle_tx(struct vhost_net *net)
                        break;
                }
        }
-
+out:
        mutex_unlock(&vq->mutex);
 }
 
@@ -570,14 +568,14 @@ static void handle_rx(struct vhost_net *net)
        s16 headcount;
        size_t vhost_hlen, sock_hlen;
        size_t vhost_len, sock_len;
-       /* TODO: check that we are running from vhost_worker? */
-       struct socket *sock = rcu_dereference_check(vq->private_data, 1);
-
-       if (!sock)
-               return;
+       struct socket *sock;
 
        mutex_lock(&vq->mutex);
+       sock = vq->private_data;
+       if (!sock)
+               goto out;
        vhost_disable_notify(&net->dev, vq);
+
        vhost_hlen = nvq->vhost_hlen;
        sock_hlen = nvq->sock_hlen;
 
@@ -652,7 +650,7 @@ static void handle_rx(struct vhost_net *net)
                        break;
                }
        }
-
+out:
        mutex_unlock(&vq->mutex);
 }
 
@@ -750,8 +748,7 @@ static int vhost_net_enable_vq(struct vhost_net *n,
        struct vhost_poll *poll = n->poll + (nvq - n->vqs);
        struct socket *sock;
 
-       sock = rcu_dereference_protected(vq->private_data,
-                                        lockdep_is_held(&vq->mutex));
+       sock = vq->private_data;
        if (!sock)
                return 0;
 
@@ -764,10 +761,9 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n,
        struct socket *sock;
 
        mutex_lock(&vq->mutex);
-       sock = rcu_dereference_protected(vq->private_data,
-                                        lockdep_is_held(&vq->mutex));
+       sock = vq->private_data;
        vhost_net_disable_vq(n, vq);
-       rcu_assign_pointer(vq->private_data, NULL);
+       vq->private_data = NULL;
        mutex_unlock(&vq->mutex);
        return sock;
 }
@@ -923,8 +919,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
        }
 
        /* start polling new socket */
-       oldsock = rcu_dereference_protected(vq->private_data,
-                                           lockdep_is_held(&vq->mutex));
+       oldsock = vq->private_data;
        if (sock != oldsock) {
                ubufs = vhost_net_ubuf_alloc(vq,
                                             sock && vhost_sock_zcopy(sock));
@@ -934,7 +929,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
                }
 
                vhost_net_disable_vq(n, vq);
-               rcu_assign_pointer(vq->private_data, sock);
+               vq->private_data = sock;
                r = vhost_init_used(vq);
                if (r)
                        goto err_used;
@@ -968,7 +963,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
        return 0;
 
 err_used:
-       rcu_assign_pointer(vq->private_data, oldsock);
+       vq->private_data = oldsock;
        vhost_net_enable_vq(n, vq);
        if (ubufs)
                vhost_net_ubuf_put_wait_and_free(ubufs);
index 06adf31a92484a6f32f01ab71c2dd76c6553c920..0c27c7df1b093e6ad64292a5211387a3103543e2 100644 (file)
@@ -902,19 +902,15 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
        int head, ret;
        u8 target;
 
+       mutex_lock(&vq->mutex);
        /*
         * We can handle the vq only after the endpoint is setup by calling the
         * VHOST_SCSI_SET_ENDPOINT ioctl.
-        *
-        * TODO: Check that we are running from vhost_worker which acts
-        * as read-side critical section for vhost kind of RCU.
-        * See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h
         */
-       vs_tpg = rcu_dereference_check(vq->private_data, 1);
+       vs_tpg = vq->private_data;
        if (!vs_tpg)
-               return;
+               goto out;
 
-       mutex_lock(&vq->mutex);
        vhost_disable_notify(&vs->dev, vq);
 
        for (;;) {
@@ -1064,6 +1060,7 @@ err_free:
        vhost_scsi_free_cmd(cmd);
 err_cmd:
        vhost_scsi_send_bad_target(vs, vq, head, out);
+out:
        mutex_unlock(&vq->mutex);
 }
 
@@ -1232,9 +1229,8 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
                       sizeof(vs->vs_vhost_wwpn));
                for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
                        vq = &vs->vqs[i].vq;
-                       /* Flushing the vhost_work acts as synchronize_rcu */
                        mutex_lock(&vq->mutex);
-                       rcu_assign_pointer(vq->private_data, vs_tpg);
+                       vq->private_data = vs_tpg;
                        vhost_init_used(vq);
                        mutex_unlock(&vq->mutex);
                }
@@ -1313,9 +1309,8 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
        if (match) {
                for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
                        vq = &vs->vqs[i].vq;
-                       /* Flushing the vhost_work acts as synchronize_rcu */
                        mutex_lock(&vq->mutex);
-                       rcu_assign_pointer(vq->private_data, NULL);
+                       vq->private_data = NULL;
                        mutex_unlock(&vq->mutex);
                }
        }
index a73ea217f24dc79a0f92a95cca39c416e828a6b9..339eae85859a58afbd6ea563b4b9fae3f667100c 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
-#include <linux/rcupdate.h>
 #include <linux/file.h>
 #include <linux/slab.h>
 
@@ -200,9 +199,8 @@ static long vhost_test_run(struct vhost_test *n, int test)
                priv = test ? n : NULL;
 
                /* start polling new socket */
-               oldpriv = rcu_dereference_protected(vq->private_data,
-                                                   lockdep_is_held(&vq->mutex));
-               rcu_assign_pointer(vq->private_data, priv);
+               oldpriv = vq->private_data;
+               vq->private_data = priv;
 
                r = vhost_init_used(&n->vqs[index]);
 
index 42298cd23c73842e6a4ec7f8434428b56e700098..4465ed5f316d6e9b36e1f43c05df1c980aa908ae 100644 (file)
@@ -103,14 +103,8 @@ struct vhost_virtqueue {
        struct iovec iov[UIO_MAXIOV];
        struct iovec *indirect;
        struct vring_used_elem *heads;
-       /* We use a kind of RCU to access private pointer.
-        * All readers access it from worker, which makes it possible to
-        * flush the vhost_work instead of synchronize_rcu. Therefore readers do
-        * not need to call rcu_read_lock/rcu_read_unlock: the beginning of
-        * vhost_work execution acts instead of rcu_read_lock() and the end of
-        * vhost_work execution acts instead of rcu_read_unlock().
-        * Writers use virtqueue mutex. */
-       void __rcu *private_data;
+       /* Protected by virtqueue mutex. */
+       void *private_data;
        /* Log write descriptors */
        void __user *log_base;
        struct vhost_log *log;
index 0eda52738ec4d0fc6cac460fab9d4baddde4ff4b..72a5d5b04494ded2468bc4a2f5f30e30c9ca12f7 100644 (file)
@@ -1223,30 +1223,46 @@ static int fuse_direntplus_link(struct file *file,
                if (name.name[1] == '.' && name.len == 2)
                        return 0;
        }
+
+       if (invalid_nodeid(o->nodeid))
+               return -EIO;
+       if (!fuse_valid_type(o->attr.mode))
+               return -EIO;
+
        fc = get_fuse_conn(dir);
 
        name.hash = full_name_hash(name.name, name.len);
        dentry = d_lookup(parent, &name);
-       if (dentry && dentry->d_inode) {
+       if (dentry) {
                inode = dentry->d_inode;
-               if (get_node_id(inode) == o->nodeid) {
+               if (!inode) {
+                       d_drop(dentry);
+               } else if (get_node_id(inode) != o->nodeid ||
+                          ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
+                       err = d_invalidate(dentry);
+                       if (err)
+                               goto out;
+               } else if (is_bad_inode(inode)) {
+                       err = -EIO;
+                       goto out;
+               } else {
                        struct fuse_inode *fi;
                        fi = get_fuse_inode(inode);
                        spin_lock(&fc->lock);
                        fi->nlookup++;
                        spin_unlock(&fc->lock);
 
+                       fuse_change_attributes(inode, &o->attr,
+                                              entry_attr_timeout(o),
+                                              attr_version);
+
                        /*
                         * The other branch to 'found' comes via fuse_iget()
                         * which bumps nlookup inside
                         */
                        goto found;
                }
-               err = d_invalidate(dentry);
-               if (err)
-                       goto out;
                dput(dentry);
-               dentry = NULL;
        }
 
        dentry = d_alloc(parent, &name);
@@ -1259,25 +1275,30 @@ static int fuse_direntplus_link(struct file *file,
        if (!inode)
                goto out;
 
-       alias = d_materialise_unique(dentry, inode);
-       err = PTR_ERR(alias);
-       if (IS_ERR(alias))
-               goto out;
+       if (S_ISDIR(inode->i_mode)) {
+               mutex_lock(&fc->inst_mutex);
+               alias = fuse_d_add_directory(dentry, inode);
+               mutex_unlock(&fc->inst_mutex);
+               err = PTR_ERR(alias);
+               if (IS_ERR(alias)) {
+                       iput(inode);
+                       goto out;
+               }
+       } else {
+               alias = d_splice_alias(inode, dentry);
+       }
+
        if (alias) {
                dput(dentry);
                dentry = alias;
        }
 
 found:
-       fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o),
-                              attr_version);
-
        fuse_change_entry_timeout(dentry, o);
 
        err = 0;
 out:
-       if (dentry)
-               dput(dentry);
+       dput(dentry);
        return err;
 }
 
index c74d6168db99b917ad202c874a9f161f29e920e5..3850b018815f2d07e4740fdd3ff8200523b9fe92 100644 (file)
@@ -1118,11 +1118,11 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
                                len, ((char *)p - (char *)q) + 4);
                BUG();
        }
-       len = (char *)p - (char *)q - (bmval_len << 2);
        *q++ = htonl(bmval0);
        *q++ = htonl(bmval1);
        if (bmval_len == 3)
                *q++ = htonl(bmval2);
+       len = (char *)p - (char *)(q + 1);
        *q = htonl(len);
 
 /* out: */
index 8ff6a0019b0b9cc6016da02c3210ae6840a20dfa..c827acb0e943bdab08fd077e56228b170a6a9549 100644 (file)
@@ -830,9 +830,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
                        flags = O_WRONLY|O_LARGEFILE;
        }
        *filp = dentry_open(&path, flags, current_cred());
-       if (IS_ERR(*filp))
+       if (IS_ERR(*filp)) {
                host_err = PTR_ERR(*filp);
-       else {
+               *filp = NULL;
+       } else {
                host_err = ima_file_check(*filp, may_flags);
 
                if (may_flags & NFSD_MAY_64BIT_COOKIE)
index 07d735a80a0f777c342c4323ba9393556afe182b..e5869b50dc41acd1788123e0cd49ea2670101e3f 100644 (file)
@@ -39,6 +39,9 @@ typedef struct xfs_timestamp {
  * There is a very similar struct icdinode in xfs_inode which matches the
  * layout of the first 96 bytes of this structure, but is kept in native
  * format instead of big endian.
+ *
+ * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed
+ * padding field for v3 inodes.
  */
 typedef struct xfs_dinode {
        __be16          di_magic;       /* inode magic # = XFS_DINODE_MAGIC */
index b78481f99d9d6a683f0d18815432fce69d8d2838..bb262c25c8de463276e9282d64b299c975eceb7b 100644 (file)
@@ -896,7 +896,6 @@ xfs_dinode_to_disk(
        to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
        to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
        memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
-       to->di_flushiter = cpu_to_be16(from->di_flushiter);
        to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
        to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
        to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
@@ -924,6 +923,9 @@ xfs_dinode_to_disk(
                to->di_lsn = cpu_to_be64(from->di_lsn);
                memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
                uuid_copy(&to->di_uuid, &from->di_uuid);
+               to->di_flushiter = 0;
+       } else {
+               to->di_flushiter = cpu_to_be16(from->di_flushiter);
        }
 }
 
@@ -1029,10 +1031,14 @@ xfs_dinode_calc_crc(
 /*
  * Read the disk inode attributes into the in-core inode structure.
  *
- * If we are initialising a new inode and we are not utilising the
- * XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core
- * with a random generation number. If we are keeping inodes around, we need to
- * read the inode cluster to get the existing generation number off disk.
+ * For version 5 superblocks, if we are initialising a new inode and we are not
+ * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
+ * inode core with a random generation number. If we are keeping inodes around,
+ * we need to read the inode cluster to get the existing generation number off
+ * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
+ * format) then log recovery is dependent on the di_flushiter field being
+ * initialised from the current on-disk value and hence we must also read the
+ * inode off disk.
  */
 int
 xfs_iread(
@@ -1054,6 +1060,7 @@ xfs_iread(
 
        /* shortcut IO on inode allocation if possible */
        if ((iget_flags & XFS_IGET_CREATE) &&
+           xfs_sb_version_hascrc(&mp->m_sb) &&
            !(mp->m_flags & XFS_MOUNT_IKEEP)) {
                /* initialise the on-disk inode core */
                memset(&ip->i_d, 0, sizeof(ip->i_d));
@@ -2882,12 +2889,18 @@ xfs_iflush_int(
                        __func__, ip->i_ino, ip->i_d.di_forkoff, ip);
                goto corrupt_out;
        }
+
        /*
-        * bump the flush iteration count, used to detect flushes which
-        * postdate a log record during recovery. This is redundant as we now
-        * log every change and hence this can't happen. Still, it doesn't hurt.
+        * Inode item log recovery for v1/v2 inodes are dependent on the
+        * di_flushiter count for correct sequencing. We bump the flush
+        * iteration count so we can detect flushes which postdate a log record
+        * during recovery. This is redundant as we now log every change and
+        * hence this can't happen but we need to still do it to ensure
+        * backwards compatibility with old kernels that predate logging all
+        * inode changes.
         */
-       ip->i_d.di_flushiter++;
+       if (ip->i_d.di_version < 3)
+               ip->i_d.di_flushiter++;
 
        /*
         * Copy the dirty parts of the inode into the on-disk
index 6fcc910a50b9d7e3fbd3c15851aa0ee0cfc1b6af..7681b19aa5dc565a9807005ff3f1d6428a22f016 100644 (file)
@@ -2592,8 +2592,16 @@ xlog_recover_inode_pass2(
                goto error;
        }
 
-       /* Skip replay when the on disk inode is newer than the log one */
-       if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
+       /*
+        * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
+        * are transactional and if ordering is necessary we can determine that
+        * more accurately by the LSN field in the V3 inode core. Don't trust
+        * the inode versions we might be changing them here - use the
+        * superblock flag to determine whether we need to look at di_flushiter
+        * to skip replay when the on disk inode is newer than the log one
+        */
+       if (!xfs_sb_version_hascrc(&mp->m_sb) &&
+           dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
                /*
                 * Deal with the wrap case, DI_MAX_FLUSH is less
                 * than smaller numbers
@@ -2608,6 +2616,7 @@ xlog_recover_inode_pass2(
                        goto error;
                }
        }
+
        /* Take the opportunity to reset the flush iteration count */
        dicp->di_flushiter = 0;
 
index 297462b9f41acb7c95607daa36516b017e37be53..e9ac882868c0353f8f6bb7f2baba2a73c8602c41 100644 (file)
@@ -542,8 +542,7 @@ int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
 
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
-int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id,
-                                   char *buf, size_t buflen);
+int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
 
 int cgroup_task_count(const struct cgroup *cgrp);
 
index 6e7ec64b69ab4b7aa5ee7aacef569f633f0de296..b613ffd402d1bdd38747e45e26260b28a4355768 100644 (file)
@@ -1,86 +1,55 @@
-/* Add subsystem definitions of the form SUBSYS(<name>) in this
- * file. Surround each one by a line of comment markers so that
- * patches don't collide
+/*
+ * List of cgroup subsystems.
+ *
+ * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
  */
-
-/* */
-
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CPUSETS)
 SUBSYS(cpuset)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEBUG)
 SUBSYS(debug)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_SCHED)
 SUBSYS(cpu_cgroup)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_CPUACCT)
 SUBSYS(cpuacct)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_MEMCG)
 SUBSYS(mem_cgroup)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEVICE)
 SUBSYS(devices)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_FREEZER)
 SUBSYS(freezer)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_NET_CLS_CGROUP)
 SUBSYS(net_cls)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_BLK_CGROUP)
 SUBSYS(blkio)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_PERF)
 SUBSYS(perf)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_NETPRIO_CGROUP)
 SUBSYS(net_prio)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_HUGETLB)
 SUBSYS(hugetlb)
 #endif
-
-/* */
-
-#ifdef CONFIG_CGROUP_BCACHE
-SUBSYS(bcache)
-#endif
-
-/* */
+/*
+ * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
+ */
index b3cb71f0d3b0d19b8c08e70348dee1bb5e5ba6ad..a9c96d865ee7144b577afc3752c0a1a94cd95e11 100644 (file)
@@ -3,10 +3,6 @@
 
 #include <linux/types.h>
 
-#define CRC_T10DIF_DIGEST_SIZE 2
-#define CRC_T10DIF_BLOCK_SIZE 1
-
-__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len);
 __u16 crc_t10dif(unsigned char const *, size_t);
 
 #endif
index 1b4d4ee1168ffb3b8ac90c42aaad073753573b1f..de7d74ab3de6df5237c5b117fa2dced482779842 100644 (file)
@@ -177,7 +177,11 @@ enum drbd_ret_code {
        ERR_NEED_APV_100        = 163,
        ERR_NEED_ALLOW_TWO_PRI  = 164,
        ERR_MD_UNCLEAN          = 165,
-
+       ERR_MD_LAYOUT_CONNECTED = 166,
+       ERR_MD_LAYOUT_TOO_BIG   = 167,
+       ERR_MD_LAYOUT_TOO_SMALL = 168,
+       ERR_MD_LAYOUT_NO_FIT    = 169,
+       ERR_IMPLICIT_SHRINK     = 170,
        /* insert new ones above this line */
        AFTER_LAST_ERR_CODE
 };
index d0d8fac8a6e4ffa77141b038f898537f31ed28f0..e8c44572b8cba2d244d87c528d3e683173d5fd1e 100644 (file)
@@ -181,6 +181,8 @@ GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
        __u64_field(1, DRBD_GENLA_F_MANDATORY,  resize_size)
        __flg_field(2, DRBD_GENLA_F_MANDATORY,  resize_force)
        __flg_field(3, DRBD_GENLA_F_MANDATORY,  no_resync)
+       __u32_field_def(4, 0 /* OPTIONAL */, al_stripes, DRBD_AL_STRIPES_DEF)
+       __u32_field_def(5, 0 /* OPTIONAL */, al_stripe_size, DRBD_AL_STRIPE_SIZE_DEF)
 )
 
 GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
index 1fedf2b17cc801805581a185638fe97184be27b3..17e50bb00521362f0840eae9e79da52e2b34d139 100644 (file)
 #define DRBD_ALWAYS_ASBP_DEF   0
 #define DRBD_USE_RLE_DEF       1
 
+#define DRBD_AL_STRIPES_MIN     1
+#define DRBD_AL_STRIPES_MAX     1024
+#define DRBD_AL_STRIPES_DEF     1
+#define DRBD_AL_STRIPES_SCALE   '1'
+
+#define DRBD_AL_STRIPE_SIZE_MIN   4
+#define DRBD_AL_STRIPE_SIZE_MAX   16777216
+#define DRBD_AL_STRIPE_SIZE_DEF   32
+#define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */
 #endif
index 0b763276f6199a255d304b572e0c414c90252056..5c6d7fbaf89e00dbed2d9cc418318980f7de3d2b 100644 (file)
@@ -622,7 +622,7 @@ struct edac_raw_error_desc {
  */
 struct mem_ctl_info {
        struct device                   dev;
-       struct bus_type                 bus;
+       struct bus_type                 *bus;
 
        struct list_head link;  /* for global list of mem_ctl_info structs */
 
@@ -742,4 +742,9 @@ struct mem_ctl_info {
 #endif
 };
 
+/*
+ * Maximum number of memory controllers in the coherent fabric.
+ */
+#define EDAC_MAX_MCS   16
+
 #endif
index b1521e82fecf0c6562da7d2af779ea932b1640be..b6bdcd66c07d2e758ba677f9c9c1796ada542bc8 100644 (file)
 #define IMX6Q_GPR13_CAN2_STOP_REQ              BIT(29)
 #define IMX6Q_GPR13_CAN1_STOP_REQ              BIT(28)
 #define IMX6Q_GPR13_ENET_STOP_REQ              BIT(27)
-#define IMX6Q_GPR13_SATA_PHY_8_MASK            (0x7 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_0_5_DB          (0x0 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_1_0_DB          (0x1 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_1_5_DB          (0x2 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_2_0_DB          (0x3 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_2_5_DB          (0x4 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_3_0_DB          (0x5 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_3_5_DB          (0x6 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_4_0_DB          (0x7 << 24)
-#define IMX6Q_GPR13_SATA_PHY_7_MASK            (0x1f << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA1I          (0x10 << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA1M          (0x10 << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA1X          (0x1a << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA2I          (0x12 << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA2M          (0x12 << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA2X          (0x1a << 19)
-#define IMX6Q_GPR13_SATA_PHY_6_MASK            (0x7 << 16)
-#define IMX6Q_GPR13_SATA_SPEED_MASK            BIT(15)
-#define IMX6Q_GPR13_SATA_SPEED_1P5G            0x0
-#define IMX6Q_GPR13_SATA_SPEED_3P0G            BIT(15)
-#define IMX6Q_GPR13_SATA_PHY_5                 BIT(14)
-#define IMX6Q_GPR13_SATA_PHY_4_MASK            (0x7 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_16_16           (0x0 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_14_16           (0x1 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_12_16           (0x2 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_10_16           (0x3 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_9_16            (0x4 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_8_16            (0x5 << 11)
-#define IMX6Q_GPR13_SATA_PHY_3_MASK            (0xf << 7)
-#define IMX6Q_GPR13_SATA_PHY_3_OFF             0x7
-#define IMX6Q_GPR13_SATA_PHY_2_MASK            (0x1f << 2)
-#define IMX6Q_GPR13_SATA_PHY_2_OFF             0x2
-#define IMX6Q_GPR13_SATA_PHY_1_MASK            (0x3 << 0)
-#define IMX6Q_GPR13_SATA_PHY_1_FAST            (0x0 << 0)
-#define IMX6Q_GPR13_SATA_PHY_1_MED             (0x1 << 0)
-#define IMX6Q_GPR13_SATA_PHY_1_SLOW            (0x2 << 0)
-
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK                (0x7 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_0_5_DB      (0x0 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_1_0_DB      (0x1 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_1_5_DB      (0x2 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_2_0_DB      (0x3 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_2_5_DB      (0x4 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB      (0x5 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_3_5_DB      (0x6 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_4_0_DB      (0x7 << 24)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_MASK       (0x1f << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1I     (0x10 << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1M     (0x10 << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1X     (0x1a << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2I     (0x12 << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M     (0x12 << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2X     (0x1a << 19)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_MASK     (0x7 << 16)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_1P_1F    (0x0 << 16)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_2F    (0x1 << 16)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_1P_4F    (0x2 << 16)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_4F    (0x3 << 16)
+#define IMX6Q_GPR13_SATA_SPD_MODE_MASK         BIT(15)
+#define IMX6Q_GPR13_SATA_SPD_MODE_1P5G         0x0
+#define IMX6Q_GPR13_SATA_SPD_MODE_3P0G         BIT(15)
+#define IMX6Q_GPR13_SATA_MPLL_SS_EN            BIT(14)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_MASK         (0x7 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_16_16                (0x0 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_14_16                (0x1 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_12_16                (0x2 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_10_16                (0x3 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_9_16         (0x4 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_8_16         (0x5 << 11)
+#define IMX6Q_GPR13_SATA_TX_BOOST_MASK         (0xf << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_0_00_DB      (0x0 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_0_37_DB      (0x1 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_0_74_DB      (0x2 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_1_11_DB      (0x3 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_1_48_DB      (0x4 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_1_85_DB      (0x5 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_2_22_DB      (0x6 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_2_59_DB      (0x7 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_2_96_DB      (0x8 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_3_33_DB      (0x9 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_3_70_DB      (0xa << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_4_07_DB      (0xb << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_4_44_DB      (0xc << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_4_81_DB      (0xd << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_5_28_DB      (0xe << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_5_75_DB      (0xf << 7)
+#define IMX6Q_GPR13_SATA_TX_LVL_MASK           (0x1f << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_937_V                (0x00 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_947_V                (0x01 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_957_V                (0x02 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_966_V                (0x03 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_976_V                (0x04 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_986_V                (0x05 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_996_V                (0x06 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_005_V                (0x07 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_015_V                (0x08 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_025_V                (0x09 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_035_V                (0x0a << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_045_V                (0x0b << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_054_V                (0x0c << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_064_V                (0x0d << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_074_V                (0x0e << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_084_V                (0x0f << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_094_V                (0x10 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_104_V                (0x11 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_113_V                (0x12 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_123_V                (0x13 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_133_V                (0x14 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_143_V                (0x15 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_152_V                (0x16 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_162_V                (0x17 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_172_V                (0x18 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_182_V                (0x19 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_191_V                (0x1a << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_201_V                (0x1b << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_211_V                (0x1c << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_221_V                (0x1d << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_230_V                (0x1e << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_240_V                (0x1f << 2)
+#define IMX6Q_GPR13_SATA_MPLL_CLK_EN           BIT(1)
+#define IMX6Q_GPR13_SATA_TX_EDGE_RATE          BIT(0)
 #endif /* __LINUX_IMX6Q_IOMUXC_GPR_H */
index a232b7ece1f63c687a2f50da506c9e70ec33555e..0eec2689b9556bee0c3aae530a3035ccbbe29d2a 100644 (file)
@@ -367,17 +367,6 @@ struct usb_bus {
 
 /* ----------------------------------------------------------------------- */
 
-/* This is arbitrary.
- * From USB 2.0 spec Table 11-13, offset 7, a hub can
- * have up to 255 ports. The most yet reported is 10.
- *
- * Current Wireless USB host hardware (Intel i1480 for example) allows
- * up to 22 devices to connect. Upcoming hardware might raise that
- * limit. Because the arrays need to add a bit for hub status data, we
- * do 31, so plus one evens out to four bytes.
- */
-#define USB_MAXCHILDREN                (31)
-
 struct usb_tt;
 
 enum usb_device_removable {
index 3cc5a0b278c30c2045152d358254d11f109c6a0d..5ebda976ea93bd52c6ca80194d723e49e94c1558 100644 (file)
@@ -9,9 +9,7 @@
 struct search;
 
 DECLARE_EVENT_CLASS(bcache_request,
-
        TP_PROTO(struct search *s, struct bio *bio),
-
        TP_ARGS(s, bio),
 
        TP_STRUCT__entry(
@@ -22,7 +20,6 @@ DECLARE_EVENT_CLASS(bcache_request,
                __field(dev_t,          orig_sector             )
                __field(unsigned int,   nr_sector               )
                __array(char,           rwbs,   6               )
-               __array(char,           comm,   TASK_COMM_LEN   )
        ),
 
        TP_fast_assign(
@@ -33,36 +30,66 @@ DECLARE_EVENT_CLASS(bcache_request,
                __entry->orig_sector    = bio->bi_sector - 16;
                __entry->nr_sector      = bio->bi_size >> 9;
                blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
        ),
 
-       TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d @ %llu)",
+       TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->rwbs,
-                 (unsigned long long)__entry->sector,
-                 __entry->nr_sector, __entry->comm,
-                 __entry->orig_major, __entry->orig_minor,
+                 __entry->rwbs, (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->orig_major, __entry->orig_minor,
                  (unsigned long long)__entry->orig_sector)
 );
 
-DEFINE_EVENT(bcache_request, bcache_request_start,
+DECLARE_EVENT_CLASS(bkey,
+       TP_PROTO(struct bkey *k),
+       TP_ARGS(k),
 
-       TP_PROTO(struct search *s, struct bio *bio),
+       TP_STRUCT__entry(
+               __field(u32,    size                            )
+               __field(u32,    inode                           )
+               __field(u64,    offset                          )
+               __field(bool,   dirty                           )
+       ),
 
-       TP_ARGS(s, bio)
+       TP_fast_assign(
+               __entry->inode  = KEY_INODE(k);
+               __entry->offset = KEY_OFFSET(k);
+               __entry->size   = KEY_SIZE(k);
+               __entry->dirty  = KEY_DIRTY(k);
+       ),
+
+       TP_printk("%u:%llu len %u dirty %u", __entry->inode,
+                 __entry->offset, __entry->size, __entry->dirty)
 );
 
-DEFINE_EVENT(bcache_request, bcache_request_end,
+DECLARE_EVENT_CLASS(btree_node,
+       TP_PROTO(struct btree *b),
+       TP_ARGS(b),
+
+       TP_STRUCT__entry(
+               __field(size_t,         bucket                  )
+       ),
 
+       TP_fast_assign(
+               __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0);
+       ),
+
+       TP_printk("bucket %zu", __entry->bucket)
+);
+
+/* request.c */
+
+DEFINE_EVENT(bcache_request, bcache_request_start,
        TP_PROTO(struct search *s, struct bio *bio),
+       TP_ARGS(s, bio)
+);
 
+DEFINE_EVENT(bcache_request, bcache_request_end,
+       TP_PROTO(struct search *s, struct bio *bio),
        TP_ARGS(s, bio)
 );
 
 DECLARE_EVENT_CLASS(bcache_bio,
-
        TP_PROTO(struct bio *bio),
-
        TP_ARGS(bio),
 
        TP_STRUCT__entry(
@@ -70,7 +97,6 @@ DECLARE_EVENT_CLASS(bcache_bio,
                __field(sector_t,       sector                  )
                __field(unsigned int,   nr_sector               )
                __array(char,           rwbs,   6               )
-               __array(char,           comm,   TASK_COMM_LEN   )
        ),
 
        TP_fast_assign(
@@ -78,191 +104,328 @@ DECLARE_EVENT_CLASS(bcache_bio,
                __entry->sector         = bio->bi_sector;
                __entry->nr_sector      = bio->bi_size >> 9;
                blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
        ),
 
-       TP_printk("%d,%d  %s %llu + %u [%s]",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->rwbs,
-                 (unsigned long long)__entry->sector,
-                 __entry->nr_sector, __entry->comm)
+       TP_printk("%d,%d  %s %llu + %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector, __entry->nr_sector)
 );
 
-
-DEFINE_EVENT(bcache_bio, bcache_passthrough,
-
+DEFINE_EVENT(bcache_bio, bcache_bypass_sequential,
        TP_PROTO(struct bio *bio),
+       TP_ARGS(bio)
+);
 
+DEFINE_EVENT(bcache_bio, bcache_bypass_congested,
+       TP_PROTO(struct bio *bio),
        TP_ARGS(bio)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_cache_hit,
+TRACE_EVENT(bcache_read,
+       TP_PROTO(struct bio *bio, bool hit, bool bypass),
+       TP_ARGS(bio, hit, bypass),
 
-       TP_PROTO(struct bio *bio),
+       TP_STRUCT__entry(
+               __field(dev_t,          dev                     )
+               __field(sector_t,       sector                  )
+               __field(unsigned int,   nr_sector               )
+               __array(char,           rwbs,   6               )
+               __field(bool,           cache_hit               )
+               __field(bool,           bypass                  )
+       ),
 
-       TP_ARGS(bio)
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->sector         = bio->bi_sector;
+               __entry->nr_sector      = bio->bi_size >> 9;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+               __entry->cache_hit = hit;
+               __entry->bypass = bypass;
+       ),
+
+       TP_printk("%d,%d  %s %llu + %u hit %u bypass %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->rwbs, (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->cache_hit, __entry->bypass)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_cache_miss,
+TRACE_EVENT(bcache_write,
+       TP_PROTO(struct bio *bio, bool writeback, bool bypass),
+       TP_ARGS(bio, writeback, bypass),
 
-       TP_PROTO(struct bio *bio),
+       TP_STRUCT__entry(
+               __field(dev_t,          dev                     )
+               __field(sector_t,       sector                  )
+               __field(unsigned int,   nr_sector               )
+               __array(char,           rwbs,   6               )
+               __field(bool,           writeback               )
+               __field(bool,           bypass                  )
+       ),
 
-       TP_ARGS(bio)
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->sector         = bio->bi_sector;
+               __entry->nr_sector      = bio->bi_size >> 9;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+               __entry->writeback = writeback;
+               __entry->bypass = bypass;
+       ),
+
+       TP_printk("%d,%d  %s %llu + %u hit %u bypass %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->rwbs, (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->writeback, __entry->bypass)
 );
 
 DEFINE_EVENT(bcache_bio, bcache_read_retry,
-
        TP_PROTO(struct bio *bio),
-
        TP_ARGS(bio)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_writethrough,
+DEFINE_EVENT(bkey, bcache_cache_insert,
+       TP_PROTO(struct bkey *k),
+       TP_ARGS(k)
+);
 
-       TP_PROTO(struct bio *bio),
+/* Journal */
 
-       TP_ARGS(bio)
-);
+DECLARE_EVENT_CLASS(cache_set,
+       TP_PROTO(struct cache_set *c),
+       TP_ARGS(c),
 
-DEFINE_EVENT(bcache_bio, bcache_writeback,
+       TP_STRUCT__entry(
+               __array(char,           uuid,   16 )
+       ),
 
-       TP_PROTO(struct bio *bio),
+       TP_fast_assign(
+               memcpy(__entry->uuid, c->sb.set_uuid, 16);
+       ),
 
-       TP_ARGS(bio)
+       TP_printk("%pU", __entry->uuid)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_write_skip,
-
-       TP_PROTO(struct bio *bio),
+DEFINE_EVENT(bkey, bcache_journal_replay_key,
+       TP_PROTO(struct bkey *k),
+       TP_ARGS(k)
+);
 
-       TP_ARGS(bio)
+DEFINE_EVENT(cache_set, bcache_journal_full,
+       TP_PROTO(struct cache_set *c),
+       TP_ARGS(c)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_btree_read,
+DEFINE_EVENT(cache_set, bcache_journal_entry_full,
+       TP_PROTO(struct cache_set *c),
+       TP_ARGS(c)
+);
 
+DEFINE_EVENT(bcache_bio, bcache_journal_write,
        TP_PROTO(struct bio *bio),
-
        TP_ARGS(bio)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_btree_write,
+/* Btree */
 
-       TP_PROTO(struct bio *bio),
+DEFINE_EVENT(cache_set, bcache_btree_cache_cannibalize,
+       TP_PROTO(struct cache_set *c),
+       TP_ARGS(c)
+);
 
-       TP_ARGS(bio)
+DEFINE_EVENT(btree_node, bcache_btree_read,
+       TP_PROTO(struct btree *b),
+       TP_ARGS(b)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_write_dirty,
+TRACE_EVENT(bcache_btree_write,
+       TP_PROTO(struct btree *b),
+       TP_ARGS(b),
 
-       TP_PROTO(struct bio *bio),
+       TP_STRUCT__entry(
+               __field(size_t,         bucket                  )
+               __field(unsigned,       block                   )
+               __field(unsigned,       keys                    )
+       ),
 
-       TP_ARGS(bio)
+       TP_fast_assign(
+               __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0);
+               __entry->block  = b->written;
+               __entry->keys   = b->sets[b->nsets].data->keys;
+       ),
+
+       TP_printk("bucket %zu", __entry->bucket)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_read_dirty,
+DEFINE_EVENT(btree_node, bcache_btree_node_alloc,
+       TP_PROTO(struct btree *b),
+       TP_ARGS(b)
+);
 
-       TP_PROTO(struct bio *bio),
+DEFINE_EVENT(btree_node, bcache_btree_node_alloc_fail,
+       TP_PROTO(struct btree *b),
+       TP_ARGS(b)
+);
 
-       TP_ARGS(bio)
+DEFINE_EVENT(btree_node, bcache_btree_node_free,
+       TP_PROTO(struct btree *b),
+       TP_ARGS(b)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_write_moving,
+TRACE_EVENT(bcache_btree_gc_coalesce,
+       TP_PROTO(unsigned nodes),
+       TP_ARGS(nodes),
 
-       TP_PROTO(struct bio *bio),
+       TP_STRUCT__entry(
+               __field(unsigned,       nodes                   )
+       ),
 
-       TP_ARGS(bio)
+       TP_fast_assign(
+               __entry->nodes  = nodes;
+       ),
+
+       TP_printk("coalesced %u nodes", __entry->nodes)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_read_moving,
+DEFINE_EVENT(cache_set, bcache_gc_start,
+       TP_PROTO(struct cache_set *c),
+       TP_ARGS(c)
+);
 
-       TP_PROTO(struct bio *bio),
+DEFINE_EVENT(cache_set, bcache_gc_end,
+       TP_PROTO(struct cache_set *c),
+       TP_ARGS(c)
+);
 
-       TP_ARGS(bio)
+DEFINE_EVENT(bkey, bcache_gc_copy,
+       TP_PROTO(struct bkey *k),
+       TP_ARGS(k)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_journal_write,
+DEFINE_EVENT(bkey, bcache_gc_copy_collision,
+       TP_PROTO(struct bkey *k),
+       TP_ARGS(k)
+);
 
-       TP_PROTO(struct bio *bio),
+TRACE_EVENT(bcache_btree_insert_key,
+       TP_PROTO(struct btree *b, struct bkey *k, unsigned op, unsigned status),
+       TP_ARGS(b, k, op, status),
 
-       TP_ARGS(bio)
-);
+       TP_STRUCT__entry(
+               __field(u64,    btree_node                      )
+               __field(u32,    btree_level                     )
+               __field(u32,    inode                           )
+               __field(u64,    offset                          )
+               __field(u32,    size                            )
+               __field(u8,     dirty                           )
+               __field(u8,     op                              )
+               __field(u8,     status                          )
+       ),
 
-DECLARE_EVENT_CLASS(bcache_cache_bio,
+       TP_fast_assign(
+               __entry->btree_node = PTR_BUCKET_NR(b->c, &b->key, 0);
+               __entry->btree_level = b->level;
+               __entry->inode  = KEY_INODE(k);
+               __entry->offset = KEY_OFFSET(k);
+               __entry->size   = KEY_SIZE(k);
+               __entry->dirty  = KEY_DIRTY(k);
+               __entry->op = op;
+               __entry->status = status;
+       ),
 
-       TP_PROTO(struct bio *bio,
-                sector_t orig_sector,
-                struct block_device* orig_bdev),
+       TP_printk("%u for %u at %llu(%u): %u:%llu len %u dirty %u",
+                 __entry->status, __entry->op,
+                 __entry->btree_node, __entry->btree_level,
+                 __entry->inode, __entry->offset,
+                 __entry->size, __entry->dirty)
+);
 
-       TP_ARGS(bio, orig_sector, orig_bdev),
+DECLARE_EVENT_CLASS(btree_split,
+       TP_PROTO(struct btree *b, unsigned keys),
+       TP_ARGS(b, keys),
 
        TP_STRUCT__entry(
-               __field(dev_t,          dev                     )
-               __field(dev_t,          orig_dev                )
-               __field(sector_t,       sector                  )
-               __field(sector_t,       orig_sector             )
-               __field(unsigned int,   nr_sector               )
-               __array(char,           rwbs,   6               )
-               __array(char,           comm,   TASK_COMM_LEN   )
+               __field(size_t,         bucket                  )
+               __field(unsigned,       keys                    )
        ),
 
        TP_fast_assign(
-               __entry->dev            = bio->bi_bdev->bd_dev;
-               __entry->orig_dev       = orig_bdev->bd_dev;
-               __entry->sector         = bio->bi_sector;
-               __entry->orig_sector    = orig_sector;
-               __entry->nr_sector      = bio->bi_size >> 9;
-               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+               __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0);
+               __entry->keys   = keys;
        ),
 
-       TP_printk("%d,%d  %s %llu + %u [%s] (from %d,%d %llu)",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->rwbs,
-                 (unsigned long long)__entry->sector,
-                 __entry->nr_sector, __entry->comm,
-                 MAJOR(__entry->orig_dev), MINOR(__entry->orig_dev),
-                 (unsigned long long)__entry->orig_sector)
+       TP_printk("bucket %zu keys %u", __entry->bucket, __entry->keys)
 );
 
-DEFINE_EVENT(bcache_cache_bio, bcache_cache_insert,
-
-       TP_PROTO(struct bio *bio,
-                sector_t orig_sector,
-                struct block_device *orig_bdev),
+DEFINE_EVENT(btree_split, bcache_btree_node_split,
+       TP_PROTO(struct btree *b, unsigned keys),
+       TP_ARGS(b, keys)
+);
 
-       TP_ARGS(bio, orig_sector, orig_bdev)
+DEFINE_EVENT(btree_split, bcache_btree_node_compact,
+       TP_PROTO(struct btree *b, unsigned keys),
+       TP_ARGS(b, keys)
 );
 
-DECLARE_EVENT_CLASS(bcache_gc,
+DEFINE_EVENT(btree_node, bcache_btree_set_root,
+       TP_PROTO(struct btree *b),
+       TP_ARGS(b)
+);
 
-       TP_PROTO(uint8_t *uuid),
+/* Allocator */
 
-       TP_ARGS(uuid),
+TRACE_EVENT(bcache_alloc_invalidate,
+       TP_PROTO(struct cache *ca),
+       TP_ARGS(ca),
 
        TP_STRUCT__entry(
-               __field(uint8_t *,      uuid)
+               __field(unsigned,       free                    )
+               __field(unsigned,       free_inc                )
+               __field(unsigned,       free_inc_size           )
+               __field(unsigned,       unused                  )
        ),
 
        TP_fast_assign(
-               __entry->uuid           = uuid;
+               __entry->free           = fifo_used(&ca->free);
+               __entry->free_inc       = fifo_used(&ca->free_inc);
+               __entry->free_inc_size  = ca->free_inc.size;
+               __entry->unused         = fifo_used(&ca->unused);
        ),
 
-       TP_printk("%pU", __entry->uuid)
+       TP_printk("free %u free_inc %u/%u unused %u", __entry->free,
+                 __entry->free_inc, __entry->free_inc_size, __entry->unused)
 );
 
+TRACE_EVENT(bcache_alloc_fail,
+       TP_PROTO(struct cache *ca),
+       TP_ARGS(ca),
 
-DEFINE_EVENT(bcache_gc, bcache_gc_start,
+       TP_STRUCT__entry(
+               __field(unsigned,       free                    )
+               __field(unsigned,       free_inc                )
+               __field(unsigned,       unused                  )
+               __field(unsigned,       blocked                 )
+       ),
 
-            TP_PROTO(uint8_t *uuid),
+       TP_fast_assign(
+               __entry->free           = fifo_used(&ca->free);
+               __entry->free_inc       = fifo_used(&ca->free_inc);
+               __entry->unused         = fifo_used(&ca->unused);
+               __entry->blocked        = atomic_read(&ca->set->prio_blocked);
+       ),
 
-            TP_ARGS(uuid)
+       TP_printk("free %u free_inc %u unused %u blocked %u", __entry->free,
+                 __entry->free_inc, __entry->unused, __entry->blocked)
 );
 
-DEFINE_EVENT(bcache_gc, bcache_gc_end,
+/* Background writeback */
 
-            TP_PROTO(uint8_t *uuid),
+DEFINE_EVENT(bkey, bcache_writeback,
+       TP_PROTO(struct bkey *k),
+       TP_ARGS(k)
+);
 
-            TP_ARGS(uuid)
+DEFINE_EVENT(bkey, bcache_writeback_collision,
+       TP_PROTO(struct bkey *k),
+       TP_ARGS(k)
 );
 
 #endif /* _TRACE_BCACHE_H */
index d615f78cc6b6945d3fa73609056aa1f0bc577870..41a6643e21361cd3993a12e5135bf3e32db79163 100644 (file)
@@ -670,10 +670,6 @@ perf_trace_##call(void *__data, proto)                                     \
                             sizeof(u64));                              \
        __entry_size -= sizeof(u32);                                    \
                                                                        \
-       if (WARN_ONCE(__entry_size > PERF_MAX_TRACE_SIZE,               \
-                     "profile buffer not large enough"))               \
-               return;                                                 \
-                                                                       \
        entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare(     \
                __entry_size, event_call->event.type, &__regs, &rctx);  \
        if (!entry)                                                     \
index 7692dc69ccf7aeeb16ebc682b57d519c56642ba1..331499d597fa86cd941d99d5ad8ab9ec2120cb86 100644 (file)
 
 #include <linux/types.h>       /* __u8 etc */
 
+/* This is arbitrary.
+ * From USB 2.0 spec Table 11-13, offset 7, a hub can
+ * have up to 255 ports. The most yet reported is 10.
+ *
+ * Current Wireless USB host hardware (Intel i1480 for example) allows
+ * up to 22 devices to connect. Upcoming hardware might raise that
+ * limit. Because the arrays need to add a bit for hub status data, we
+ * use 31, so plus one evens out to four bytes.
+ */
+#define USB_MAXCHILDREN                31
+
 /*
  * Hub request types
  */
index ffd4652de91ca390fff537f18f524300bf470246..65e12099ef89d9f3b5706858d956d8aabe427348 100644 (file)
@@ -102,6 +102,30 @@ typedef uint64_t blkif_sector_t;
  */
 #define BLKIF_OP_DISCARD           5
 
+/*
+ * Recognized if "feature-max-indirect-segments" in present in the backend
+ * xenbus info. The "feature-max-indirect-segments" node contains the maximum
+ * number of segments allowed by the backend per request. If the node is
+ * present, the frontend might use blkif_request_indirect structs in order to
+ * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The
+ * maximum number of indirect segments is fixed by the backend, but the
+ * frontend can issue requests with any number of indirect segments as long as
+ * it's less than the number provided by the backend. The indirect_grefs field
+ * in blkif_request_indirect should be filled by the frontend with the
+ * grant references of the pages that are holding the indirect segments.
+ * This pages are filled with an array of blkif_request_segment_aligned
+ * that hold the information about the segments. The number of indirect
+ * pages to use is determined by the maximum number of segments
+ * a indirect request contains. Every indirect page can contain a maximum
+ * of 512 segments (PAGE_SIZE/sizeof(blkif_request_segment_aligned)),
+ * so to calculate the number of indirect pages to use we have to do
+ * ceil(indirect_segments/512).
+ *
+ * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
+ * create the "feature-max-indirect-segments" node!
+ */
+#define BLKIF_OP_INDIRECT          6
+
 /*
  * Maximum scatter/gather segments per request.
  * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
@@ -109,6 +133,16 @@ typedef uint64_t blkif_sector_t;
  */
 #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
 
+#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
+
+struct blkif_request_segment_aligned {
+       grant_ref_t gref;        /* reference to I/O buffer frame        */
+       /* @first_sect: first sector in frame to transfer (inclusive).   */
+       /* @last_sect: last sector in frame to transfer (inclusive).     */
+       uint8_t     first_sect, last_sect;
+       uint16_t    _pad; /* padding to make it 8 bytes, so it's cache-aligned */
+} __attribute__((__packed__));
+
 struct blkif_request_rw {
        uint8_t        nr_segments;  /* number of segments                   */
        blkif_vdev_t   handle;       /* only for read/write requests         */
@@ -147,12 +181,31 @@ struct blkif_request_other {
        uint64_t     id;           /* private guest value, echoed in resp  */
 } __attribute__((__packed__));
 
+struct blkif_request_indirect {
+       uint8_t        indirect_op;
+       uint16_t       nr_segments;
+#ifdef CONFIG_X86_64
+       uint32_t       _pad1;        /* offsetof(blkif_...,u.indirect.id) == 8 */
+#endif
+       uint64_t       id;
+       blkif_sector_t sector_number;
+       blkif_vdev_t   handle;
+       uint16_t       _pad2;
+       grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+#ifdef CONFIG_X86_64
+       uint32_t      _pad3;         /* make it 64 byte aligned */
+#else
+       uint64_t      _pad3;         /* make it 64 byte aligned */
+#endif
+} __attribute__((__packed__));
+
 struct blkif_request {
        uint8_t        operation;    /* BLKIF_OP_???                         */
        union {
                struct blkif_request_rw rw;
                struct blkif_request_discard discard;
                struct blkif_request_other other;
+               struct blkif_request_indirect indirect;
        } u;
 } __attribute__((__packed__));
 
index 75271b9a8f61bedcec5a18465b5a9ca8e84a38c7..7d28aff605c7eb600c4c3bfe93b8b5f1edbaebfe 100644 (file)
@@ -188,6 +188,11 @@ struct __name##_back_ring {                                                \
 #define RING_REQUEST_CONS_OVERFLOW(_r, _cons)                          \
     (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
 
+/* Ill-behaved frontend determination: Can there be this many requests? */
+#define RING_REQUEST_PROD_OVERFLOW(_r, _prod)               \
+    (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r))
+
+
 #define RING_PUSH_REQUESTS(_r) do {                                    \
     wmb(); /* back sees requests /before/ updated producer index */    \
     (_r)->sring->req_prod = (_r)->req_prod_pvt;                                \
index 0e0b20b8c5db172e8dc07fe4de10358c19939d60..789ec4683db3b73f83e455891bdb319124215f97 100644 (file)
@@ -1845,36 +1845,43 @@ out:
 EXPORT_SYMBOL_GPL(cgroup_path);
 
 /**
- * task_cgroup_path_from_hierarchy - cgroup path of a task on a hierarchy
+ * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
  * @task: target task
- * @hierarchy_id: the hierarchy to look up @task's cgroup from
  * @buf: the buffer to write the path into
  * @buflen: the length of the buffer
  *
- * Determine @task's cgroup on the hierarchy specified by @hierarchy_id and
- * copy its path into @buf.  This function grabs cgroup_mutex and shouldn't
- * be used inside locks used by cgroup controller callbacks.
+ * Determine @task's cgroup on the first (the one with the lowest non-zero
+ * hierarchy_id) cgroup hierarchy and copy its path into @buf.  This
+ * function grabs cgroup_mutex and shouldn't be used inside locks used by
+ * cgroup controller callbacks.
+ *
+ * Returns 0 on success, fails with -%ENAMETOOLONG if @buflen is too short.
  */
-int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id,
-                                   char *buf, size_t buflen)
+int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 {
        struct cgroupfs_root *root;
-       struct cgroup *cgrp = NULL;
-       int ret = -ENOENT;
+       struct cgroup *cgrp;
+       int hierarchy_id = 1, ret = 0;
+
+       if (buflen < 2)
+               return -ENAMETOOLONG;
 
        mutex_lock(&cgroup_mutex);
 
-       root = idr_find(&cgroup_hierarchy_idr, hierarchy_id);
+       root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
+
        if (root) {
                cgrp = task_cgroup_from_root(task, root);
                ret = cgroup_path(cgrp, buf, buflen);
+       } else {
+               /* if no hierarchy exists, everyone is in "/" */
+               memcpy(buf, "/", 2);
        }
 
        mutex_unlock(&cgroup_mutex);
-
        return ret;
 }
-EXPORT_SYMBOL_GPL(task_cgroup_path_from_hierarchy);
+EXPORT_SYMBOL_GPL(task_cgroup_path);
 
 /*
  * Control Group taskset
index e444ff88f0a425a00a4921291b46678b6a7efe21..cc2f66f68dc579c7e93967e4306821df29d7adce 100644 (file)
@@ -36,11 +36,11 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
 {
        int ret;
 
-       ret = trace_seq_printf(s, "# compressed entry header\n");
-       ret = trace_seq_printf(s, "\ttype_len    :    5 bits\n");
-       ret = trace_seq_printf(s, "\ttime_delta  :   27 bits\n");
-       ret = trace_seq_printf(s, "\tarray       :   32 bits\n");
-       ret = trace_seq_printf(s, "\n");
+       ret = trace_seq_puts(s, "# compressed entry header\n");
+       ret = trace_seq_puts(s, "\ttype_len    :    5 bits\n");
+       ret = trace_seq_puts(s, "\ttime_delta  :   27 bits\n");
+       ret = trace_seq_puts(s, "\tarray       :   32 bits\n");
+       ret = trace_seq_putc(s, '\n');
        ret = trace_seq_printf(s, "\tpadding     : type == %d\n",
                               RINGBUF_TYPE_PADDING);
        ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
@@ -1066,7 +1066,7 @@ static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
 }
 
 /**
- * check_pages - integrity check of buffer pages
+ * rb_check_pages - integrity check of buffer pages
  * @cpu_buffer: CPU buffer with pages to test
  *
  * As a safety measure we check to make sure the data pages have not
@@ -1258,7 +1258,7 @@ static int rb_cpu_notify(struct notifier_block *self,
 #endif
 
 /**
- * ring_buffer_alloc - allocate a new ring_buffer
+ * __ring_buffer_alloc - allocate a new ring_buffer
  * @size: the size in bytes per cpu that is needed.
  * @flags: attributes to set for the ring buffer.
  *
@@ -1607,6 +1607,7 @@ static void update_pages_handler(struct work_struct *work)
  * ring_buffer_resize - resize the ring buffer
  * @buffer: the buffer to resize.
  * @size: the new size.
+ * @cpu_id: the cpu buffer to resize
  *
  * Minimum size is 2 * BUF_PAGE_SIZE.
  *
@@ -3956,11 +3957,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume);
  * expected.
  *
  * After a sequence of ring_buffer_read_prepare calls, the user is
- * expected to make at least one call to ring_buffer_prepare_sync.
+ * expected to make at least one call to ring_buffer_read_prepare_sync.
  * Afterwards, ring_buffer_read_start is invoked to get things going
  * for real.
  *
- * This overall must be paired with ring_buffer_finish.
+ * This overall must be paired with ring_buffer_read_finish.
  */
 struct ring_buffer_iter *
 ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
@@ -4009,7 +4010,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
  * an intervening ring_buffer_read_prepare_sync must have been
  * performed.
  *
- * Must be paired with ring_buffer_finish.
+ * Must be paired with ring_buffer_read_finish.
  */
 void
 ring_buffer_read_start(struct ring_buffer_iter *iter)
@@ -4031,7 +4032,7 @@ ring_buffer_read_start(struct ring_buffer_iter *iter)
 EXPORT_SYMBOL_GPL(ring_buffer_read_start);
 
 /**
- * ring_buffer_finish - finish reading the iterator of the buffer
+ * ring_buffer_read_finish - finish reading the iterator of the buffer
  * @iter: The iterator retrieved by ring_buffer_start
  *
  * This re-enables the recording to the buffer, and frees the
@@ -4346,6 +4347,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
 /**
  * ring_buffer_alloc_read_page - allocate a page to read from buffer
  * @buffer: the buffer to allocate for.
+ * @cpu: the cpu buffer to allocate.
  *
  * This function is used in conjunction with ring_buffer_read_page.
  * When reading a full page from the ring buffer, these functions
@@ -4403,7 +4405,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
  * to swap with a page in the ring buffer.
  *
  * for example:
- *     rpage = ring_buffer_alloc_read_page(buffer);
+ *     rpage = ring_buffer_alloc_read_page(buffer, cpu);
  *     if (!rpage)
  *             return error;
  *     ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0);
index 0cd500bffd9b0a26fc9d5f7107f51e8037e1d4bd..3f2477713acaa9ea17ed54be7e79a4e0794d4c17 100644 (file)
@@ -3008,7 +3008,6 @@ static int tracing_release(struct inode *inode, struct file *file)
 
        iter = m->private;
        tr = iter->tr;
-       trace_array_put(tr);
 
        mutex_lock(&trace_types_lock);
 
@@ -3023,6 +3022,9 @@ static int tracing_release(struct inode *inode, struct file *file)
        if (!iter->snapshot)
                /* reenable tracing if it was previously enabled */
                tracing_start_tr(tr);
+
+       __trace_array_put(tr);
+
        mutex_unlock(&trace_types_lock);
 
        mutex_destroy(&iter->mutex);
@@ -3447,6 +3449,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
 static int tracing_trace_options_open(struct inode *inode, struct file *file)
 {
        struct trace_array *tr = inode->i_private;
+       int ret;
 
        if (tracing_disabled)
                return -ENODEV;
@@ -3454,7 +3457,11 @@ static int tracing_trace_options_open(struct inode *inode, struct file *file)
        if (trace_array_get(tr) < 0)
                return -ENODEV;
 
-       return single_open(file, tracing_trace_options_show, inode->i_private);
+       ret = single_open(file, tracing_trace_options_show, inode->i_private);
+       if (ret < 0)
+               trace_array_put(tr);
+
+       return ret;
 }
 
 static const struct file_operations tracing_iter_fops = {
@@ -3537,14 +3544,14 @@ static const char readme_msg[] =
        "\n  snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n"
        "\t\t\t  Read the contents for more information\n"
 #endif
-#ifdef CONFIG_STACKTRACE
+#ifdef CONFIG_STACK_TRACER
        "  stack_trace\t\t- Shows the max stack trace when active\n"
        "  stack_max_size\t- Shows current max stack size that was traced\n"
        "\t\t\t  Write into this file to reset the max size (trigger a new trace)\n"
 #ifdef CONFIG_DYNAMIC_FTRACE
        "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n"
 #endif
-#endif /* CONFIG_STACKTRACE */
+#endif /* CONFIG_STACK_TRACER */
 ;
 
 static ssize_t
@@ -3958,6 +3965,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
        if (!iter) {
                ret = -ENOMEM;
+               __trace_array_put(tr);
                goto out;
        }
 
@@ -4704,21 +4712,24 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file)
                        ret = PTR_ERR(iter);
        } else {
                /* Writes still need the seq_file to hold the private data */
+               ret = -ENOMEM;
                m = kzalloc(sizeof(*m), GFP_KERNEL);
                if (!m)
-                       return -ENOMEM;
+                       goto out;
                iter = kzalloc(sizeof(*iter), GFP_KERNEL);
                if (!iter) {
                        kfree(m);
-                       return -ENOMEM;
+                       goto out;
                }
+               ret = 0;
+
                iter->tr = tr;
                iter->trace_buffer = &tc->tr->max_buffer;
                iter->cpu_file = tc->cpu;
                m->private = iter;
                file->private_data = m;
        }
-
+out:
        if (ret < 0)
                trace_array_put(tr);
 
@@ -4948,8 +4959,6 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
 
        mutex_lock(&trace_types_lock);
 
-       tr->ref++;
-
        info->iter.tr           = tr;
        info->iter.cpu_file     = tc->cpu;
        info->iter.trace        = tr->current_trace;
@@ -5328,9 +5337,10 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 }
 
 static const struct file_operations tracing_stats_fops = {
-       .open           = tracing_open_generic,
+       .open           = tracing_open_generic_tc,
        .read           = tracing_stats_read,
        .llseek         = generic_file_llseek,
+       .release        = tracing_release_generic_tc,
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -5973,8 +5983,10 @@ static int new_instance_create(const char *name)
                goto out_free_tr;
 
        ret = event_trace_add_tracer(tr->dir, tr);
-       if (ret)
+       if (ret) {
+               debugfs_remove_recursive(tr->dir);
                goto out_free_tr;
+       }
 
        init_tracer_debugfs(tr, tr->dir);
 
index 4a4f6e1828b633aa8a2ba8a7dc6db70ac87db6a3..e7d643b8a907d6dbd027c723115b3d5a21dc9c9e 100644 (file)
@@ -214,7 +214,6 @@ struct trace_array {
        struct dentry           *event_dir;
        struct list_head        systems;
        struct list_head        events;
-       struct task_struct      *waiter;
        int                     ref;
 };
 
@@ -680,6 +679,15 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace,
                                               struct trace_array *tr);
 extern int trace_selftest_startup_branch(struct tracer *trace,
                                         struct trace_array *tr);
+/*
+ * Tracer data references selftest functions that only occur
+ * on boot up. These can be __init functions. Thus, when selftests
+ * are enabled, then the tracers need to reference __init functions.
+ */
+#define __tracer_data          __refdata
+#else
+/* Tracers are seldom changed. Optimize when selftests are disabled. */
+#define __tracer_data          __read_mostly
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
index 84b1e045faba836583fb371d285f49e79bfa6bd0..80c36bcf66e8e8d089776ff14b780f3f85efa418 100644 (file)
@@ -236,6 +236,10 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
 
        BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
 
+       if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
+                       "perf buffer not large enough"))
+               return NULL;
+
        pc = preempt_count();
 
        *rctxp = perf_swevent_get_recursion_context();
@@ -266,6 +270,10 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
        struct pt_regs regs;
        int rctx;
 
+       head = this_cpu_ptr(event_function.perf_events);
+       if (hlist_empty(head))
+               return;
+
 #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
                    sizeof(u64)) - sizeof(u32))
 
@@ -279,8 +287,6 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
 
        entry->ip = ip;
        entry->parent_ip = parent_ip;
-
-       head = this_cpu_ptr(event_function.perf_events);
        perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
                              1, &regs, head, NULL);
 
index 7d854290bf817ef8c6b116b031830dc060ae5dc7..898f868833f2d4df0d80838f9461385b5eedcd77 100644 (file)
@@ -826,59 +826,33 @@ enum {
 static void *f_next(struct seq_file *m, void *v, loff_t *pos)
 {
        struct ftrace_event_call *call = m->private;
-       struct ftrace_event_field *field;
        struct list_head *common_head = &ftrace_common_fields;
        struct list_head *head = trace_get_fields(call);
+       struct list_head *node = v;
 
        (*pos)++;
 
        switch ((unsigned long)v) {
        case FORMAT_HEADER:
-               if (unlikely(list_empty(common_head)))
-                       return NULL;
-
-               field = list_entry(common_head->prev,
-                                  struct ftrace_event_field, link);
-               return field;
+               node = common_head;
+               break;
 
        case FORMAT_FIELD_SEPERATOR:
-               if (unlikely(list_empty(head)))
-                       return NULL;
-
-               field = list_entry(head->prev, struct ftrace_event_field, link);
-               return field;
+               node = head;
+               break;
 
        case FORMAT_PRINTFMT:
                /* all done */
                return NULL;
        }
 
-       field = v;
-       if (field->link.prev == common_head)
+       node = node->prev;
+       if (node == common_head)
                return (void *)FORMAT_FIELD_SEPERATOR;
-       else if (field->link.prev == head)
+       else if (node == head)
                return (void *)FORMAT_PRINTFMT;
-
-       field = list_entry(field->link.prev, struct ftrace_event_field, link);
-
-       return field;
-}
-
-static void *f_start(struct seq_file *m, loff_t *pos)
-{
-       loff_t l = 0;
-       void *p;
-
-       /* Start by showing the header */
-       if (!*pos)
-               return (void *)FORMAT_HEADER;
-
-       p = (void *)FORMAT_HEADER;
-       do {
-               p = f_next(m, p, &l);
-       } while (p && l < *pos);
-
-       return p;
+       else
+               return node;
 }
 
 static int f_show(struct seq_file *m, void *v)
@@ -904,8 +878,7 @@ static int f_show(struct seq_file *m, void *v)
                return 0;
        }
 
-       field = v;
-
+       field = list_entry(v, struct ftrace_event_field, link);
        /*
         * Smartly shows the array type(except dynamic array).
         * Normal:
@@ -932,6 +905,17 @@ static int f_show(struct seq_file *m, void *v)
        return 0;
 }
 
+static void *f_start(struct seq_file *m, loff_t *pos)
+{
+       void *p = (void *)FORMAT_HEADER;
+       loff_t l = 0;
+
+       while (l < *pos && p)
+               p = f_next(m, p, &l);
+
+       return p;
+}
+
 static void f_stop(struct seq_file *m, void *p)
 {
 }
@@ -963,23 +947,14 @@ static ssize_t
 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
 {
        struct ftrace_event_call *call = filp->private_data;
-       struct trace_seq *s;
-       int r;
+       char buf[32];
+       int len;
 
        if (*ppos)
                return 0;
 
-       s = kmalloc(sizeof(*s), GFP_KERNEL);
-       if (!s)
-               return -ENOMEM;
-
-       trace_seq_init(s);
-       trace_seq_printf(s, "%d\n", call->event.type);
-
-       r = simple_read_from_buffer(ubuf, cnt, ppos,
-                                   s->buffer, s->len);
-       kfree(s);
-       return r;
+       len = sprintf(buf, "%d\n", call->event.type);
+       return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
 }
 
 static ssize_t
@@ -1218,6 +1193,7 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
 
 static int ftrace_event_avail_open(struct inode *inode, struct file *file);
 static int ftrace_event_set_open(struct inode *inode, struct file *file);
+static int ftrace_event_release(struct inode *inode, struct file *file);
 
 static const struct seq_operations show_event_seq_ops = {
        .start = t_start,
@@ -1245,7 +1221,7 @@ static const struct file_operations ftrace_set_event_fops = {
        .read = seq_read,
        .write = ftrace_event_write,
        .llseek = seq_lseek,
-       .release = seq_release,
+       .release = ftrace_event_release,
 };
 
 static const struct file_operations ftrace_enable_fops = {
@@ -1323,6 +1299,15 @@ ftrace_event_open(struct inode *inode, struct file *file,
        return ret;
 }
 
+static int ftrace_event_release(struct inode *inode, struct file *file)
+{
+       struct trace_array *tr = inode->i_private;
+
+       trace_array_put(tr);
+
+       return seq_release(inode, file);
+}
+
 static int
 ftrace_event_avail_open(struct inode *inode, struct file *file)
 {
@@ -1336,12 +1321,19 @@ ftrace_event_set_open(struct inode *inode, struct file *file)
 {
        const struct seq_operations *seq_ops = &show_set_event_seq_ops;
        struct trace_array *tr = inode->i_private;
+       int ret;
+
+       if (trace_array_get(tr) < 0)
+               return -ENODEV;
 
        if ((file->f_mode & FMODE_WRITE) &&
            (file->f_flags & O_TRUNC))
                ftrace_clear_events(tr);
 
-       return ftrace_event_open(inode, file, seq_ops);
+       ret = ftrace_event_open(inode, file, seq_ops);
+       if (ret < 0)
+               trace_array_put(tr);
+       return ret;
 }
 
 static struct event_subsystem *
index 0d883dc057d68fe92bc36f0510298cff725256bb..0c7b75a8acc8f9ccad53d75bc6a3fe0da425ae9f 100644 (file)
@@ -646,7 +646,7 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
        if (filter && filter->filter_string)
                trace_seq_printf(s, "%s\n", filter->filter_string);
        else
-               trace_seq_printf(s, "none\n");
+               trace_seq_puts(s, "none\n");
        mutex_unlock(&event_mutex);
 }
 
@@ -660,7 +660,7 @@ void print_subsystem_event_filter(struct event_subsystem *system,
        if (filter && filter->filter_string)
                trace_seq_printf(s, "%s\n", filter->filter_string);
        else
-               trace_seq_printf(s, DEFAULT_SYS_FILTER_MESSAGE "\n");
+               trace_seq_puts(s, DEFAULT_SYS_FILTER_MESSAGE "\n");
        mutex_unlock(&event_mutex);
 }
 
index b863f93b30f35caae4239629ea56d05b58b38639..38fe1483c50817cc4b1b6a33807985cda37d9b6a 100644 (file)
@@ -199,7 +199,7 @@ static int func_set_flag(u32 old_flags, u32 bit, int set)
        return 0;
 }
 
-static struct tracer function_trace __read_mostly =
+static struct tracer function_trace __tracer_data =
 {
        .name           = "function",
        .init           = function_trace_init,
index 8388bc99f2eee165efacbef7ddde4f7f21d49ea3..b5c09242683d8cd1c989d2706702d4d42e182d73 100644 (file)
@@ -446,7 +446,7 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
 
        /* First spaces to align center */
        for (i = 0; i < spaces / 2; i++) {
-               ret = trace_seq_printf(s, " ");
+               ret = trace_seq_putc(s, ' ');
                if (!ret)
                        return TRACE_TYPE_PARTIAL_LINE;
        }
@@ -457,7 +457,7 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
 
        /* Last spaces to align center */
        for (i = 0; i < spaces - (spaces / 2); i++) {
-               ret = trace_seq_printf(s, " ");
+               ret = trace_seq_putc(s, ' ');
                if (!ret)
                        return TRACE_TYPE_PARTIAL_LINE;
        }
@@ -503,7 +503,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
  ------------------------------------------
 
  */
-       ret = trace_seq_printf(s,
+       ret = trace_seq_puts(s,
                " ------------------------------------------\n");
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
@@ -516,7 +516,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
        if (ret == TRACE_TYPE_PARTIAL_LINE)
                return TRACE_TYPE_PARTIAL_LINE;
 
-       ret = trace_seq_printf(s, " => ");
+       ret = trace_seq_puts(s, " => ");
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
 
@@ -524,7 +524,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
        if (ret == TRACE_TYPE_PARTIAL_LINE)
                return TRACE_TYPE_PARTIAL_LINE;
 
-       ret = trace_seq_printf(s,
+       ret = trace_seq_puts(s,
                "\n ------------------------------------------\n\n");
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
@@ -645,7 +645,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
                        ret = print_graph_proc(s, pid);
                        if (ret == TRACE_TYPE_PARTIAL_LINE)
                                return TRACE_TYPE_PARTIAL_LINE;
-                       ret = trace_seq_printf(s, " | ");
+                       ret = trace_seq_puts(s, " | ");
                        if (!ret)
                                return TRACE_TYPE_PARTIAL_LINE;
                }
@@ -657,9 +657,9 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
                return ret;
 
        if (type == TRACE_GRAPH_ENT)
-               ret = trace_seq_printf(s, "==========>");
+               ret = trace_seq_puts(s, "==========>");
        else
-               ret = trace_seq_printf(s, "<==========");
+               ret = trace_seq_puts(s, "<==========");
 
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
@@ -668,7 +668,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
        if (ret != TRACE_TYPE_HANDLED)
                return ret;
 
-       ret = trace_seq_printf(s, "\n");
+       ret = trace_seq_putc(s, '\n');
 
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
@@ -705,13 +705,13 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
                len += strlen(nsecs_str);
        }
 
-       ret = trace_seq_printf(s, " us ");
+       ret = trace_seq_puts(s, " us ");
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
 
        /* Print remaining spaces to fit the row's width */
        for (i = len; i < 7; i++) {
-               ret = trace_seq_printf(s, " ");
+               ret = trace_seq_putc(s, ' ');
                if (!ret)
                        return TRACE_TYPE_PARTIAL_LINE;
        }
@@ -731,13 +731,13 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
        /* No real adata, just filling the column with spaces */
        switch (duration) {
        case DURATION_FILL_FULL:
-               ret = trace_seq_printf(s, "              |  ");
+               ret = trace_seq_puts(s, "              |  ");
                return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
        case DURATION_FILL_START:
-               ret = trace_seq_printf(s, "  ");
+               ret = trace_seq_puts(s, "  ");
                return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
        case DURATION_FILL_END:
-               ret = trace_seq_printf(s, " |");
+               ret = trace_seq_puts(s, " |");
                return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
        }
 
@@ -745,10 +745,10 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
        if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
                /* Duration exceeded 100 msecs */
                if (duration > 100000ULL)
-                       ret = trace_seq_printf(s, "! ");
+                       ret = trace_seq_puts(s, "! ");
                /* Duration exceeded 10 msecs */
                else if (duration > 10000ULL)
-                       ret = trace_seq_printf(s, "+ ");
+                       ret = trace_seq_puts(s, "+ ");
        }
 
        /*
@@ -757,7 +757,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
         * to fill out the space.
         */
        if (ret == -1)
-               ret = trace_seq_printf(s, "  ");
+               ret = trace_seq_puts(s, "  ");
 
        /* Catching here any failure happenned above */
        if (!ret)
@@ -767,7 +767,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
        if (ret != TRACE_TYPE_HANDLED)
                return ret;
 
-       ret = trace_seq_printf(s, "|  ");
+       ret = trace_seq_puts(s, "|  ");
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
 
@@ -817,7 +817,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 
        /* Function */
        for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
-               ret = trace_seq_printf(s, " ");
+               ret = trace_seq_putc(s, ' ');
                if (!ret)
                        return TRACE_TYPE_PARTIAL_LINE;
        }
@@ -858,7 +858,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
 
        /* Function */
        for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
-               ret = trace_seq_printf(s, " ");
+               ret = trace_seq_putc(s, ' ');
                if (!ret)
                        return TRACE_TYPE_PARTIAL_LINE;
        }
@@ -917,7 +917,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
                if (ret == TRACE_TYPE_PARTIAL_LINE)
                        return TRACE_TYPE_PARTIAL_LINE;
 
-               ret = trace_seq_printf(s, " | ");
+               ret = trace_seq_puts(s, " | ");
                if (!ret)
                        return TRACE_TYPE_PARTIAL_LINE;
        }
@@ -1117,7 +1117,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 
        /* Closing brace */
        for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
-               ret = trace_seq_printf(s, " ");
+               ret = trace_seq_putc(s, ' ');
                if (!ret)
                        return TRACE_TYPE_PARTIAL_LINE;
        }
@@ -1129,7 +1129,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
         * belongs to, write out the function name.
         */
        if (func_match) {
-               ret = trace_seq_printf(s, "}\n");
+               ret = trace_seq_puts(s, "}\n");
                if (!ret)
                        return TRACE_TYPE_PARTIAL_LINE;
        } else {
@@ -1179,13 +1179,13 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
        /* Indentation */
        if (depth > 0)
                for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) {
-                       ret = trace_seq_printf(s, " ");
+                       ret = trace_seq_putc(s, ' ');
                        if (!ret)
                                return TRACE_TYPE_PARTIAL_LINE;
                }
 
        /* The comment */
-       ret = trace_seq_printf(s, "/* ");
+       ret = trace_seq_puts(s, "/* ");
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
 
@@ -1216,7 +1216,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
                s->len--;
        }
 
-       ret = trace_seq_printf(s, " */\n");
+       ret = trace_seq_puts(s, " */\n");
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
 
@@ -1448,7 +1448,7 @@ static struct trace_event graph_trace_ret_event = {
        .funcs          = &graph_functions
 };
 
-static struct tracer graph_trace __read_mostly = {
+static struct tracer graph_trace __tracer_data = {
        .name           = "function_graph",
        .open           = graph_trace_open,
        .pipe_open      = graph_trace_open,
index 7ed6976493c87ec2d31637a60a285bc39b4925b8..3811487e7a7a1d58f23b6e04b49fda0dbf06c9b0 100644 (file)
@@ -243,11 +243,11 @@ find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file)
 static int
 disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
 {
+       struct event_file_link *link = NULL;
+       int wait = 0;
        int ret = 0;
 
        if (file) {
-               struct event_file_link *link;
-
                link = find_event_file_link(tp, file);
                if (!link) {
                        ret = -EINVAL;
@@ -255,10 +255,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
                }
 
                list_del_rcu(&link->list);
-               /* synchronize with kprobe_trace_func/kretprobe_trace_func */
-               synchronize_sched();
-               kfree(link);
-
+               wait = 1;
                if (!list_empty(&tp->files))
                        goto out;
 
@@ -271,8 +268,22 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
                        disable_kretprobe(&tp->rp);
                else
                        disable_kprobe(&tp->rp.kp);
+               wait = 1;
        }
  out:
+       if (wait) {
+               /*
+                * Synchronize with kprobe_trace_func/kretprobe_trace_func
+                * to ensure disabled (all running handlers are finished).
+                * This is not only for kfree(), but also the caller,
+                * trace_remove_event_call() supposes it for releasing
+                * event_call related objects, which will be accessed in
+                * the kprobe_trace_func/kretprobe_trace_func.
+                */
+               synchronize_sched();
+               kfree(link);    /* Ignored if link == NULL */
+       }
+
        return ret;
 }
 
@@ -1087,9 +1098,6 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
        __size = sizeof(*entry) + tp->size + dsize;
        size = ALIGN(__size + sizeof(u32), sizeof(u64));
        size -= sizeof(u32);
-       if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
-                    "profile buffer not large enough"))
-               return;
 
        entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
        if (!entry)
@@ -1120,9 +1128,6 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
        __size = sizeof(*entry) + tp->size + dsize;
        size = ALIGN(__size + sizeof(u32), sizeof(u64));
        size -= sizeof(u32);
-       if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
-                    "profile buffer not large enough"))
-               return;
 
        entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
        if (!entry)
index a5e8f4878bfa3015ebb3372c8d09c2aa641b5bc1..b3dcfb2f0fef4e1bc9e9815021adc3e8dfe7d32a 100644 (file)
@@ -90,7 +90,7 @@ static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
        if (drv)
                ret += trace_seq_printf(s, " %s\n", drv->name);
        else
-               ret += trace_seq_printf(s, " \n");
+               ret += trace_seq_puts(s, " \n");
        return ret;
 }
 
@@ -107,7 +107,7 @@ static void mmio_pipe_open(struct trace_iterator *iter)
        struct header_iter *hiter;
        struct trace_seq *s = &iter->seq;
 
-       trace_seq_printf(s, "VERSION 20070824\n");
+       trace_seq_puts(s, "VERSION 20070824\n");
 
        hiter = kzalloc(sizeof(*hiter), GFP_KERNEL);
        if (!hiter)
@@ -209,7 +209,7 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
                        (rw->value >> 0) & 0xff, rw->pc, 0);
                break;
        default:
-               ret = trace_seq_printf(s, "rw what?\n");
+               ret = trace_seq_puts(s, "rw what?\n");
                break;
        }
        if (ret)
@@ -245,7 +245,7 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter)
                        secs, usec_rem, m->map_id, 0UL, 0);
                break;
        default:
-               ret = trace_seq_printf(s, "map what?\n");
+               ret = trace_seq_puts(s, "map what?\n");
                break;
        }
        if (ret)
index bb922d9ee51ba7b51171db284f6cd20576b012b2..34e7cbac0c9c17f8de2a0c8c137552deb9498b60 100644 (file)
@@ -78,7 +78,7 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
 
        trace_assign_type(field, entry);
 
-       ret = trace_seq_printf(s, "%s", field->buf);
+       ret = trace_seq_puts(s, field->buf);
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
 
@@ -558,14 +558,14 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
                        if (ret)
                                ret = trace_seq_puts(s, "??");
                        if (ret)
-                               ret = trace_seq_puts(s, "\n");
+                               ret = trace_seq_putc(s, '\n');
                        continue;
                }
                if (!ret)
                        break;
                if (ret)
                        ret = seq_print_user_ip(s, mm, ip, sym_flags);
-               ret = trace_seq_puts(s, "\n");
+               ret = trace_seq_putc(s, '\n');
        }
 
        if (mm)
@@ -579,7 +579,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
        int ret;
 
        if (!ip)
-               return trace_seq_printf(s, "0");
+               return trace_seq_putc(s, '0');
 
        if (sym_flags & TRACE_ITER_SYM_OFFSET)
                ret = seq_print_sym_offset(s, "%s", ip);
@@ -964,14 +964,14 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags,
                goto partial;
 
        if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) {
-               if (!trace_seq_printf(s, " <-"))
+               if (!trace_seq_puts(s, " <-"))
                        goto partial;
                if (!seq_print_ip_sym(s,
                                      field->parent_ip,
                                      flags))
                        goto partial;
        }
-       if (!trace_seq_printf(s, "\n"))
+       if (!trace_seq_putc(s, '\n'))
                goto partial;
 
        return TRACE_TYPE_HANDLED;
@@ -1210,7 +1210,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
 
                if (!seq_print_ip_sym(s, *p, flags))
                        goto partial;
-               if (!trace_seq_puts(s, "\n"))
+               if (!trace_seq_putc(s, '\n'))
                        goto partial;
        }
 
index 322e16461072575e6d8439208891b5730bd41438..8fd03657bc7d1f684279c46681519e16a1812f87 100644 (file)
@@ -175,7 +175,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags,
        entry = syscall_nr_to_meta(syscall);
 
        if (!entry) {
-               trace_seq_printf(s, "\n");
+               trace_seq_putc(s, '\n');
                return TRACE_TYPE_HANDLED;
        }
 
@@ -566,15 +566,15 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
        if (!sys_data)
                return;
 
+       head = this_cpu_ptr(sys_data->enter_event->perf_events);
+       if (hlist_empty(head))
+               return;
+
        /* get the size after alignment with the u32 buffer size field */
        size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
        size = ALIGN(size + sizeof(u32), sizeof(u64));
        size -= sizeof(u32);
 
-       if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
-                     "perf buffer not large enough"))
-               return;
-
        rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
                                sys_data->enter_event->event.type, regs, &rctx);
        if (!rec)
@@ -583,8 +583,6 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
        rec->nr = syscall_nr;
        syscall_get_arguments(current, regs, 0, sys_data->nb_args,
                               (unsigned long *)&rec->args);
-
-       head = this_cpu_ptr(sys_data->enter_event->perf_events);
        perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
 }
 
@@ -642,18 +640,14 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
        if (!sys_data)
                return;
 
+       head = this_cpu_ptr(sys_data->exit_event->perf_events);
+       if (hlist_empty(head))
+               return;
+
        /* We can probably do that at build time */
        size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
        size -= sizeof(u32);
 
-       /*
-        * Impossible, but be paranoid with the future
-        * How to put this check outside runtime?
-        */
-       if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
-               "exit event has grown above perf buffer size"))
-               return;
-
        rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
                                sys_data->exit_event->event.type, regs, &rctx);
        if (!rec)
@@ -661,8 +655,6 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 
        rec->nr = syscall_nr;
        rec->ret = syscall_get_return_value(current, regs);
-
-       head = this_cpu_ptr(sys_data->exit_event->perf_events);
        perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
 }
 
index d5d0cd368a5635737f749324b61d1037c361c968..a23d2d71188e61f6ad3b706730b2a70d8c773103 100644 (file)
@@ -818,8 +818,6 @@ static void uprobe_perf_print(struct trace_uprobe *tu,
 
        size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
        size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32);
-       if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
-               return;
 
        preempt_disable();
        head = this_cpu_ptr(call->perf_events);
index ce0daa320a261f1aebfc5e16d7d7b3e7caf71c37..dec68bd4e9d8e996404faa065b67354f1defa8eb 100644 (file)
@@ -333,7 +333,8 @@ int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
                prepare_to_wait(wq, &q->wait, mode);
                val = q->key.flags;
                if (atomic_read(val) == 0)
-                       ret = (*action)(val);
+                       break;
+               ret = (*action)(val);
        } while (!ret && atomic_read(val) != 0);
        finish_wait(wq, &q->wait);
        return ret;
index 35da51359d404180c8214c007195e97ca89563dd..71d9f81f6eed17e7623ce9d0b55934acac54358c 100644 (file)
@@ -66,8 +66,6 @@ config CRC16
 
 config CRC_T10DIF
        tristate "CRC calculation for the T10 Data Integrity Field"
-       select CRYPTO
-       select CRYPTO_CRCT10DIF
        help
          This option is only needed if a module that's not in the
          kernel tree needs to calculate CRC checks for use with the
index fe3428c07b47fb605f53dc1f4d9a48f32fce54c1..fbbd66ed86cde5ed7724ed8f17986cdf1a37edaa 100644 (file)
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/crc-t10dif.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <crypto/hash.h>
 
-static struct crypto_shash *crct10dif_tfm;
+/* Table generated using the following polynomium:
+ * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
+ * gt: 0x8bb7
+ */
+static const __u16 t10_dif_crc_table[256] = {
+       0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
+       0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
+       0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
+       0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
+       0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
+       0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
+       0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
+       0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
+       0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
+       0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
+       0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
+       0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
+       0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
+       0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
+       0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
+       0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
+       0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
+       0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
+       0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
+       0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
+       0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
+       0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
+       0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
+       0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
+       0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
+       0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
+       0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
+       0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
+       0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
+       0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
+       0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
+       0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
+};
 
 __u16 crc_t10dif(const unsigned char *buffer, size_t len)
 {
-       struct {
-               struct shash_desc shash;
-               char ctx[2];
-       } desc;
-       int err;
-
-       desc.shash.tfm = crct10dif_tfm;
-       desc.shash.flags = 0;
-       *(__u16 *)desc.ctx = 0;
+       __u16 crc = 0;
+       unsigned int i;
 
-       err = crypto_shash_update(&desc.shash, buffer, len);
-       BUG_ON(err);
+       for (i = 0 ; i < len ; i++)
+               crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
 
-       return *(__u16 *)desc.ctx;
+       return crc;
 }
 EXPORT_SYMBOL(crc_t10dif);
 
-static int __init crc_t10dif_mod_init(void)
-{
-       crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0);
-       return PTR_RET(crct10dif_tfm);
-}
-
-static void __exit crc_t10dif_mod_fini(void)
-{
-       crypto_free_shash(crct10dif_tfm);
-}
-
-module_init(crc_t10dif_mod_init);
-module_exit(crc_t10dif_mod_fini);
-
 MODULE_DESCRIPTION("T10 DIF CRC calculation");
 MODULE_LICENSE("GPL");
index d411355f238e2088247f0cbf51631c77ae78b10e..aac511417ad19af5d9e3472747a983be5ed3ee4b 100644 (file)
@@ -151,15 +151,12 @@ do { \
 #endif /* __a29k__ */
 
 #if defined(__alpha) && W_TYPE_SIZE == 64
-#define umul_ppmm(ph, pl, m0, m1) \
-do { \
-               UDItype __m0 = (m0), __m1 = (m1); \
-               __asm__ ("umulh %r1,%2,%0" \
-               : "=r" ((UDItype) ph) \
-               : "%rJ" (__m0), \
-                       "rI" (__m1)); \
-               (pl) = __m0 * __m1; \
-       } while (0)
+#define umul_ppmm(ph, pl, m0, m1)                      \
+do {                                                   \
+       UDItype __m0 = (m0), __m1 = (m1);               \
+       (ph) = __builtin_alpha_umulh(__m0, __m1);       \
+       (pl) = __m0 * __m1;                             \
+} while (0)
 #define UMUL_TIME 46
 #ifndef LONGLONG_STANDALONE
 #define udiv_qrnnd(q, r, n1, n0, d) \
@@ -167,7 +164,7 @@ do { UDItype __r; \
        (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
        (r) = __r; \
 } while (0)
-extern UDItype __udiv_qrnnd();
+extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype);
 #define UDIV_TIME 220
 #endif /* LONGLONG_STANDALONE */
 #endif /* __alpha */
index e2f83591161bbf3e7bf5f70ef732b8cd57b38587..92b9b4324372e8f9d15aba987c3a55a4cb6945c0 100644 (file)
@@ -417,9 +417,11 @@ static void stac_update_outputs(struct hda_codec *codec)
                        val &= ~spec->eapd_mask;
                else
                        val |= spec->eapd_mask;
-               if (spec->gpio_data != val)
+               if (spec->gpio_data != val) {
+                       spec->gpio_data = val;
                        stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir,
                                      val);
+               }
        }
 }
 
@@ -3231,7 +3233,7 @@ static const struct hda_fixup stac927x_fixups[] = {
                        /* configure the analog microphone on some laptops */
                        { 0x0c, 0x90a79130 },
                        /* correct the front output jack as a hp out */
-                       { 0x0f, 0x0227011f },
+                       { 0x0f, 0x0221101f },
                        /* correct the front input jack as a mic */
                        { 0x0e, 0x02a79130 },
                        {}
@@ -3612,20 +3614,18 @@ static int stac_parse_auto_config(struct hda_codec *codec)
 static int stac_init(struct hda_codec *codec)
 {
        struct sigmatel_spec *spec = codec->spec;
-       unsigned int gpio;
        int i;
 
        /* override some hints */
        stac_store_hints(codec);
 
        /* set up GPIO */
-       gpio = spec->gpio_data;
        /* turn on EAPD statically when spec->eapd_switch isn't set.
         * otherwise, unsol event will turn it on/off dynamically
         */
        if (!spec->eapd_switch)
-               gpio |= spec->eapd_mask;
-       stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, gpio);
+               spec->gpio_data |= spec->eapd_mask;
+       stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, spec->gpio_data);
 
        snd_hda_gen_init(codec);
 
@@ -3915,6 +3915,7 @@ static void stac_setup_gpio(struct hda_codec *codec)
 {
        struct sigmatel_spec *spec = codec->spec;
 
+       spec->gpio_mask |= spec->eapd_mask;
        if (spec->gpio_led) {
                if (!spec->vref_mute_led_nid) {
                        spec->gpio_mask |= spec->gpio_led;
index ac73c607410a73d92ff84a817c22bf6a99c25d5f..04491f0e8d1bde2636f8772e3b7d3bb65284d44a 100644 (file)
@@ -102,13 +102,13 @@ static struct ep93xx_ac97_info *ep93xx_ac97_info;
 
 static struct ep93xx_dma_data ep93xx_ac97_pcm_out = {
        .name           = "ac97-pcm-out",
-       .dma_port       = EP93XX_DMA_AAC1,
+       .port           = EP93XX_DMA_AAC1,
        .direction      = DMA_MEM_TO_DEV,
 };
 
 static struct ep93xx_dma_data ep93xx_ac97_pcm_in = {
        .name           = "ac97-pcm-in",
-       .dma_port       = EP93XX_DMA_AAC1,
+       .port           = EP93XX_DMA_AAC1,
        .direction      = DMA_DEV_TO_MEM,
 };
 
index 3eeada57e87de68b43394e3440805de03932d890..566a367c94fa0ce71ac04f86686923ed63ece58d 100644 (file)
@@ -1612,7 +1612,7 @@ static int max98088_dai2_digital_mute(struct snd_soc_dai *codec_dai, int mute)
 
 static void max98088_sync_cache(struct snd_soc_codec *codec)
 {
-       u16 *reg_cache = codec->reg_cache;
+       u8 *reg_cache = codec->reg_cache;
        int i;
 
        if (!codec->cache_sync)
index d659d3adcfb34e01a04fbc8dfff6fdb9468ba3ad..6c8a9e7bee25c837cd35dc260847e1c778a811b6 100644 (file)
@@ -1527,6 +1527,9 @@ static int sgtl5000_i2c_probe(struct i2c_client *client,
        if (IS_ERR(sgtl5000->mclk)) {
                ret = PTR_ERR(sgtl5000->mclk);
                dev_err(&client->dev, "Failed to get mclock: %d\n", ret);
+               /* Defer the probe to see if the clk will be provided later */
+               if (ret == -ENOENT)
+                       return -EPROBE_DEFER;
                return ret;
        }
 
index 0ec070cf7231ebcc39a057061af8995d7f7574ff..d82ee386eab564d352eccd4209dff767b222e6f3 100644 (file)
@@ -3908,10 +3908,8 @@ int snd_soc_add_platform(struct device *dev, struct snd_soc_platform *platform,
 {
        /* create platform component name */
        platform->name = fmt_single_name(dev, &platform->id);
-       if (platform->name == NULL) {
-               kfree(platform);
+       if (platform->name == NULL)
                return -ENOMEM;
-       }
 
        platform->dev = dev;
        platform->driver = platform_drv;
index e58233f7df616e6bb406f809cea23069c4059933..6c486625321bebb78b6f66370fda54a49a300fcb 100644 (file)
@@ -389,9 +389,9 @@ static int tegra20_ac97_platform_probe(struct platform_device *pdev)
        ac97->capture_dma_data.slave_id = of_dma[1];
 
        ac97->playback_dma_data.addr = mem->start + TEGRA20_AC97_FIFO_TX1;
-       ac97->capture_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-       ac97->capture_dma_data.maxburst = 4;
-       ac97->capture_dma_data.slave_id = of_dma[0];
+       ac97->playback_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+       ac97->playback_dma_data.maxburst = 4;
+       ac97->playback_dma_data.slave_id = of_dma[1];
 
        ret = tegra_asoc_utils_init(&ac97->util_data, &pdev->dev);
        if (ret)
index 5eaa12cdc6ebd3bfcbd502beb1aff881c111d406..551b3c93ce932c77e9e05de4bcf01f5af63d1c94 100644 (file)
@@ -323,8 +323,8 @@ static int tegra20_spdif_platform_probe(struct platform_device *pdev)
        }
 
        spdif->playback_dma_data.addr = mem->start + TEGRA20_SPDIF_DATA_OUT;
-       spdif->capture_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-       spdif->capture_dma_data.maxburst = 4;
+       spdif->playback_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+       spdif->playback_dma_data.maxburst = 4;
        spdif->playback_dma_data.slave_id = dmareq->start;
 
        pm_runtime_enable(&pdev->dev);
index 2aa4e13063a8d8d0ab3f5fea2c0849251739ff47..3d2551cc10f2f6fc191acea154ff2a2bc303b09e 100644 (file)
@@ -543,7 +543,7 @@ static snd_pcm_uframes_t usb6fire_pcm_pointer(
        snd_pcm_uframes_t ret;
 
        if (rt->panic || !sub)
-               return SNDRV_PCM_STATE_XRUN;
+               return SNDRV_PCM_POS_XRUN;
 
        spin_lock_irqsave(&sub->lock, flags);
        ret = sub->dma_off;
index 6430ed2a9f65d68d3d7741acd8475b5140c17f51..c21a3df9a0df1ef6900a468ee7af36ad7b57ab19 100644 (file)
@@ -503,7 +503,7 @@ static snd_pcm_uframes_t hiface_pcm_pointer(struct snd_pcm_substream *alsa_sub)
        snd_pcm_uframes_t dma_offset;
 
        if (rt->panic || !sub)
-               return SNDRV_PCM_STATE_XRUN;
+               return SNDRV_PCM_POS_XRUN;
 
        spin_lock_irqsave(&sub->lock, flags);
        dma_offset = sub->dma_off;
index ca9fa4d32e07e3421ce4446f68d9a0601e51ac28..07819bfa7dba30390fedad9e962813f0c95d38c9 100644 (file)
@@ -1026,9 +1026,10 @@ kvp_get_ip_info(int family, char *if_name, int op,
 
                                if (sn_offset == 0)
                                        strcpy(sn_str, cidr_mask);
-                               else
+                               else {
+                                       strcat((char *)ip_buffer->sub_net, ";");
                                        strcat(sn_str, cidr_mask);
-                               strcat((char *)ip_buffer->sub_net, ";");
+                               }
                                sn_offset += strlen(sn_str) + 1;
                        }